xref: /titanic_50/usr/src/uts/common/os/sunpm.c (revision 46a2abf27af40eda17a3f97e79eda1aef4e3c3c8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sunpm.c builds sunpm.o	"power management framework"
31  *	kernel-resident power management code.  Implements power management
32  *	policy
33  *	Assumes: all backwards compat. device components wake up on &
34  *		 the pm_info pointer in dev_info is initially NULL
35  *
36  * PM - (device) Power Management
37  *
38  * Each device may have 0 or more components.  If a device has no components,
39  * then it can't be power managed.  Each component has 2 or more
40  * power states.
41  *
42  * "Backwards Compatible" (bc) devices:
43  * There are two different types of devices from the point of view of this
44  * code.  The original type, left over from the original PM implementation on
45  * the voyager platform are known in this code as "backwards compatible"
46  * devices (PM_ISBC(dip) returns true).
47  * They are recognized by the pm code by the lack of a pm-components property
48  * and a call made by the driver to pm_create_components(9F).
49  * For these devices, component 0 is special, and represents the power state
50  * of the device.  If component 0 is to be set to power level 0 (off), then
51  * the framework must first call into the driver's detach(9E) routine with
52  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
53  * After setting component 0 from 0 to a non-zero power level, a call must be
54  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
55  *
56  * Currently, the only way to get a bc device power managed is via a set of
57  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
58  *
59  * For non-bc devices, the driver describes the components by exporting a
60  * pm-components(9P) property that tells how many components there are,
61  * tells what each component's power state values are, and provides human
62  * readable strings (currently unused) for each component name and power state.
63  * Devices which export pm-components(9P) are automatically power managed
64  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
65  * after parsing power.conf(4)).
66  * For these devices, all components are considered independent of each other,
67  * and it is up to the driver to decide when a transition requires saving or
68  * restoring hardware state.
69  *
70  * Each device component also has a threshold time associated with each power
71  * transition (see power.conf(4)), and a busy/idle state maintained by the
72  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
73  * Components are created idle.
74  *
75  * The PM framework provides several functions:
76  * -implement PM policy as described in power.conf(4)
77  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
78  *  Policies consist of:
79  *    -set threshold values (defaults if none provided by pmconfig)
80  *    -set dependencies among devices
81  *    -enable/disable autopm
82  *    -turn down idle components based on thresholds (if autopm is enabled)
83  *     (aka scanning)
84  *    -maintain power states based on dependencies among devices
85  *    -upon request, or when the frame buffer powers off, attempt to turn off
86  *     all components that are idle or become idle over the next (10 sec)
87  *     period in an attempt to get down to an EnergyStar compliant state
88  *    -prevent powering off of a device which exported the
89  *     pm-no-involuntary-power-cycles property without active involvement of
90  *     the device's driver (so no removing power when the device driver is
91  *     not attached)
92  * -provide a mechanism for a device driver to request that a device's component
93  *  be brought back to the power level necessary for the use of the device
94  * -allow a process to directly control the power levels of device components
95  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
96  * -ensure that the console frame buffer is powered up before being referenced
97  *  via prom_printf() or other prom calls that might generate console output
98  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
99  * -provide "backwards compatible" behavior for devices without pm-components
100  *  property
101  *
102  * Scanning:
103  * Whenever autopm is enabled, the framework attempts to bring each component
104  * of each device to its lowest power based on the threshold of idleness
105  * associated with each transition and the busy/idle state of the component.
106  *
107  * The actual work of this is done by pm_scan_dev(), which cycles through each
108  * component of a device, checking its idleness against its current threshold,
109  * and calling pm_set_power() as appropriate to change the power level.
110  * This function also indicates when it would next be profitable to scan the
111  * device again, and a new scan is scheduled after that time.
112  *
113  * Dependencies:
114  * It is possible to establish a dependency between the power states of two
115  * otherwise unrelated devices.  This is currently done to ensure that the
116  * cdrom is always up whenever the console framebuffer is up, so that the user
117  * can insert a cdrom and see a popup as a result.
118  *
119  * The dependency terminology used in power.conf(4) is not easy to understand,
120  * so we've adopted a different terminology in the implementation.  We write
121  * of a "keeps up" and a "kept up" device.  A relationship can be established
122  * where one device keeps up another.  That means that if the keepsup device
123  * has any component that is at a non-zero power level, all components of the
124  * "kept up" device must be brought to full power.  This relationship is
125  * asynchronous.  When the keeping device is powered up, a request is queued
126  * to a worker thread to bring up the kept device.  The caller does not wait.
127  * Scan will not turn down a kept up device.
128  *
129  * Direct PM:
130  * A device may be directly power managed by a process.  If a device is
131  * directly pm'd, then it will not be scanned, and dependencies will not be
132  * enforced.  * If a directly pm'd device's driver requests a power change (via
133  * pm_raise_power(9F)), then the request is blocked and notification is sent
134  * to the controlling process, which must issue the requested power change for
135  * the driver to proceed.
136  *
137  */
138 
139 #include <sys/types.h>
140 #include <sys/errno.h>
141 #include <sys/callb.h>		/* callback registration during CPR */
142 #include <sys/conf.h>		/* driver flags and functions */
143 #include <sys/open.h>		/* OTYP_CHR definition */
144 #include <sys/stat.h>		/* S_IFCHR definition */
145 #include <sys/pathname.h>	/* name -> dev_info xlation */
146 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
147 #include <sys/kmem.h>		/* memory alloc stuff */
148 #include <sys/debug.h>
149 #include <sys/archsystm.h>
150 #include <sys/pm.h>
151 #include <sys/ddi.h>
152 #include <sys/sunddi.h>
153 #include <sys/sunndi.h>
154 #include <sys/sunpm.h>
155 #include <sys/epm.h>
156 #include <sys/vfs.h>
157 #include <sys/mode.h>
158 #include <sys/mkdev.h>
159 #include <sys/promif.h>
160 #include <sys/consdev.h>
161 #include <sys/esunddi.h>
162 #include <sys/modctl.h>
163 #include <sys/fs/ufs_fs.h>
164 #include <sys/note.h>
165 #include <sys/taskq.h>
166 #include <sys/bootconf.h>
167 #include <sys/reboot.h>
168 #include <sys/spl.h>
169 #include <sys/disp.h>
170 #include <sys/sobject.h>
171 #include <sys/sunmdi.h>
172 
173 
174 /*
175  * PM LOCKING
176  *	The list of locks:
177  * Global pm mutex locks.
178  *
179  * pm_scan_lock:
180  *		It protects the timeout id of the scan thread, and the value
181  *		of autopm_enabled.  This lock is not held concurrently with
182  *		any other PM locks.
183  *
184  * pm_clone_lock:	Protects the clone list and count of poll events
185  *		pending for the pm driver.
186  *		Lock ordering:
187  *			pm_clone_lock -> pm_pscc_interest_rwlock,
188  *			pm_clone_lock -> pm_pscc_direct_rwlock.
189  *
190  * pm_rsvp_lock:
191  *		Used to synchronize the data structures used for processes
192  *		to rendezvous with state change information when doing
193  *		direct PM.
194  *		Lock ordering:
195  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
196  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
197  *			pm_rsvp_lock -> pm_clone_lock.
198  *
199  * ppm_lock:	protects the list of registered ppm drivers
200  *		Lock ordering:
201  *			ppm_lock -> ppm driver unit_lock
202  *
203  * pm_compcnt_lock:
204  *		Protects count of components that are not at their lowest
205  *		power level.
206  *		Lock ordering:
207  *			pm_compcnt_lock -> ppm_lock.
208  *
209  * pm_dep_thread_lock:
210  *		Protects work list for pm_dep_thread.  Not taken concurrently
211  *		with any other pm lock.
212  *
213  * pm_remdrv_lock:
214  *		Serializes the operation of removing noinvol data structure
215  *		entries for a branch of the tree when a driver has been
216  *		removed from the system (modctl_rem_major).
217  *		Lock ordering:
218  *			pm_remdrv_lock -> pm_noinvol_rwlock.
219  *
220  * pm_cfb_lock: (High level spin lock)
221  *		Protects the count of how many components of the console
222  *		frame buffer are off (so we know if we have to bring up the
223  *		console as a result of a prom_printf, etc.
224  *		No other locks are taken while holding this lock.
225  *
226  * pm_loan_lock:
227  *		Protects the lock_loan list.  List is used to record that one
228  *		thread has acquired a power lock but has launched another thread
229  *		to complete its processing.  An entry in the list indicates that
230  *		the worker thread can borrow the lock held by the other thread,
231  *		which must block on the completion of the worker.  Use is
232  *		specific to module loading.
233  *		No other locks are taken while holding this lock.
234  *
235  * Global PM rwlocks
236  *
237  * pm_thresh_rwlock:
238  *		Protects the list of thresholds recorded for future use (when
239  *		devices attach).
240  *		Lock ordering:
241  *			pm_thresh_rwlock -> devi_pm_lock
242  *
243  * pm_noinvol_rwlock:
244  *		Protects list of detached nodes that had noinvol registered.
245  *		No other PM locks are taken while holding pm_noinvol_rwlock.
246  *
247  * pm_pscc_direct_rwlock:
248  *		Protects the list that maps devices being directly power
249  *		managed to the processes that manage them.
250  *		Lock ordering:
251  *			pm_pscc_direct_rwlock -> psce_lock
252  *
253  * pm_pscc_interest_rwlock;
254  *		Protects the list that maps state change events to processes
255  *		that want to know about them.
256  *		Lock ordering:
257  *			pm_pscc_interest_rwlock -> psce_lock
258  *
259  * per-dip locks:
260  *
261  * Each node has these per-dip locks, which are only used if the device is
262  * a candidate for power management (e.g. has pm components)
263  *
264  * devi_pm_lock:
265  *		Protects all power management state of the node except for
266  *		power level, which is protected by ndi_devi_enter().
267  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
268  *		Lock ordering:
269  *			devi_pm_lock -> pm_rsvp_lock,
270  *			devi_pm_lock -> pm_dep_thread_lock,
271  *			devi_pm_lock -> pm_noinvol_rwlock,
272  *			devi_pm_lock -> power lock
273  *
274  * power lock (ndi_devi_enter()):
275  *		Since changing power level is possibly a slow operation (30
276  *		seconds to spin up a disk drive), this is locked separately.
277  *		Since a call into the driver to change the power level of one
278  *		component may result in a call back into the framework to change
279  *		the power level of another, this lock allows re-entrancy by
280  *		the same thread (ndi_devi_enter is used for this because
281  *		the USB framework uses ndi_devi_enter in its power entry point,
282  *		and use of any other lock would produce a deadlock.
283  *
284  * devi_pm_busy_lock:
285  *		This lock protects the integrity of the busy count.  It is
286  *		only taken by pm_busy_component() and pm_idle_component and
287  *		some code that adjust the busy time after the timer gets set
288  *		up or after a CPR operation.  It is per-dip to keep from
289  *		single-threading all the disk drivers on a system.
290  *		It could be per component instead, but most devices have
291  *		only one component.
292  *		No other PM locks are taken while holding this lock.
293  *
294  */
295 
296 static int stdout_is_framebuffer;
297 static kmutex_t	e_pm_power_lock;
298 static kmutex_t pm_loan_lock;
299 kmutex_t	pm_scan_lock;
300 callb_id_t	pm_cpr_cb_id;
301 callb_id_t	pm_panic_cb_id;
302 callb_id_t	pm_halt_cb_id;
303 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
304 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
305 
306 clock_t pm_min_scan = PM_MIN_SCAN;
307 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
308 
309 static int pm_busop_set_power(dev_info_t *,
310     void *, pm_bus_power_op_t, void *, void *);
311 static int pm_busop_match_request(dev_info_t *, void *);
312 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
313 
314 /*
315  * Dependency Processing is done thru a seperate thread.
316  */
317 kmutex_t	pm_dep_thread_lock;
318 kcondvar_t	pm_dep_thread_cv;
319 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
320 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
321 
322 /*
323  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
324  * power managing things in single user mode that have been suppressed via
325  * power.conf entries.  Protected by pm_scan_lock.
326  */
327 int		autopm_enabled;
328 
329 /*
330  * This flag is true while processes are stopped for a checkpoint/resume.
331  * Controlling processes of direct pm'd devices are not available to
332  * participate in power level changes, so we bypass them when this is set.
333  */
334 static int	pm_processes_stopped;
335 
336 #ifdef	DEBUG
337 
338 /*
339  * see common/sys/epm.h for PMD_* values
340  */
341 uint_t		pm_debug = 0;
342 
343 /*
344  * If pm_divertdebug is set, then no prom_printf calls will be made by
345  * PMD(), which will prevent debug output from bringing up the console
346  * frame buffer.  Clearing this variable before setting pm_debug will result
347  * in PMD output going to the console.
348  *
349  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
350  * deadlocks and decremented at the end of pm_set_power()
351  */
352 uint_t		pm_divertdebug = 1;
353 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
354 
355 void prdeps(char *);
356 #endif
357 
358 /* Globals */
359 
360 /*
361  * List of recorded thresholds and dependencies
362  */
363 pm_thresh_rec_t *pm_thresh_head;
364 krwlock_t pm_thresh_rwlock;
365 
366 pm_pdr_t *pm_dep_head;
367 static int pm_unresolved_deps = 0;
368 static int pm_prop_deps = 0;
369 
370 /*
371  * List of devices that exported no-involuntary-power-cycles property
372  */
373 pm_noinvol_t *pm_noinvol_head;
374 
375 /*
376  * Locks used in noinvol processing
377  */
378 krwlock_t pm_noinvol_rwlock;
379 kmutex_t pm_remdrv_lock;
380 
381 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
382 int pm_system_idle_threshold;
383 /*
384  * By default nexus has 0 threshold, and depends on its children to keep it up
385  */
386 int pm_default_nexus_threshold = 0;
387 
388 /*
389  * Data structures shared with common/io/pm.c
390  */
391 kmutex_t	pm_clone_lock;
392 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
393 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
394 unsigned char	pm_interest[PM_MAX_CLONE];
395 struct pollhead	pm_pollhead;
396 
397 extern int	hz;
398 extern char	*platform_module_list[];
399 
400 /*
401  * Wrappers for use in ddi_walk_devs
402  */
403 
404 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
405 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
406 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
407 static int		pm_discard_dep_walk(dev_info_t *, void *);
408 #ifdef DEBUG
409 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
410 #endif
411 
412 /*
413  * Routines for managing noinvol devices
414  */
415 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
416 void			pm_noinvol_update_node(dev_info_t *,
417 			    pm_bp_noinvol_t *req);
418 
419 kmutex_t pm_rsvp_lock;
420 kmutex_t pm_compcnt_lock;
421 krwlock_t pm_pscc_direct_rwlock;
422 krwlock_t pm_pscc_interest_rwlock;
423 
424 #define	PSC_INTEREST	0	/* belongs to interest psc list */
425 #define	PSC_DIRECT	1	/* belongs to direct psc list */
426 
427 pscc_t *pm_pscc_interest;
428 pscc_t *pm_pscc_direct;
429 
430 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
431 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
432 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
433 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
434 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
435 
436 #define	PM_INCR_NOTLOWEST(dip) {					\
437 	mutex_enter(&pm_compcnt_lock);					\
438 	if (!PM_IS_NEXUS(dip) ||					\
439 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
440 		if (pm_comps_notlowest == 0)				\
441 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
442 		pm_comps_notlowest++;					\
443 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
444 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
445 	}								\
446 	mutex_exit(&pm_compcnt_lock);					\
447 }
448 #define	PM_DECR_NOTLOWEST(dip) {					\
449 	mutex_enter(&pm_compcnt_lock);					\
450 	if (!PM_IS_NEXUS(dip) ||					\
451 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
452 		ASSERT(pm_comps_notlowest);				\
453 		pm_comps_notlowest--;					\
454 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
455 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
456 		if (pm_comps_notlowest == 0)				\
457 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
458 	}								\
459 	mutex_exit(&pm_compcnt_lock);					\
460 }
461 
462 /*
463  * console frame-buffer power-management is not enabled when
464  * debugging services are present.  to override, set pm_cfb_override
465  * to non-zero.
466  */
467 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
468 kmutex_t pm_cfb_lock;
469 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
470 #ifdef DEBUG
471 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
472 #else
473 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
474 #endif
475 
476 static dev_info_t *cfb_dip = 0;
477 static dev_info_t *cfb_dip_detaching = 0;
478 uint_t cfb_inuse = 0;
479 static ddi_softintr_t pm_soft_id;
480 static clock_t pm_soft_pending;
481 int	pm_scans_disabled = 0;
482 
483 /*
484  * A structure to record the fact that one thread has borrowed a lock held
485  * by another thread.  The context requires that the lender block on the
486  * completion of the borrower.
487  */
488 typedef struct lock_loan {
489 	struct lock_loan	*pmlk_next;
490 	kthread_t		*pmlk_borrower;
491 	kthread_t		*pmlk_lender;
492 	dev_info_t		*pmlk_dip;
493 } lock_loan_t;
494 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
495 
496 #ifdef	DEBUG
497 #define	PMD_FUNC(func, name)	char *(func) = (name);
498 #else
499 #define	PMD_FUNC(func, name)
500 #endif
501 
502 
503 /*
504  * Must be called before first device (including pseudo) attach
505  */
506 void
507 pm_init_locks(void)
508 {
509 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
510 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
511 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
512 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
513 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
514 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
515 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
516 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
517 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
518 }
519 
520 static boolean_t
521 pm_cpr_callb(void *arg, int code)
522 {
523 	_NOTE(ARGUNUSED(arg))
524 	static int auto_save;
525 	static int pm_reset_timestamps(dev_info_t *, void *);
526 
527 	switch (code) {
528 	case CB_CODE_CPR_CHKPT:
529 		/*
530 		 * Cancel scan or wait for scan in progress to finish
531 		 * Other threads may be trying to restart the scan, so we
532 		 * have to keep at it unil it sticks
533 		 */
534 		mutex_enter(&pm_scan_lock);
535 		ASSERT(!pm_scans_disabled);
536 		pm_scans_disabled = 1;
537 		auto_save = autopm_enabled;
538 		autopm_enabled = 0;
539 		mutex_exit(&pm_scan_lock);
540 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
541 		break;
542 
543 	case CB_CODE_CPR_RESUME:
544 		ASSERT(!autopm_enabled);
545 		ASSERT(pm_scans_disabled);
546 		pm_scans_disabled = 0;
547 		/*
548 		 * Call pm_reset_timestamps to reset timestamps of each
549 		 * device to the time when the system is resumed so that their
550 		 * idleness can be re-calculated. That's to avoid devices from
551 		 * being powered down right after resume if the system was in
552 		 * suspended mode long enough.
553 		 */
554 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
555 
556 		autopm_enabled = auto_save;
557 		/*
558 		 * If there is any auto-pm device, get the scanning
559 		 * going. Otherwise don't bother.
560 		 */
561 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
562 		break;
563 	}
564 	return (B_TRUE);
565 }
566 
567 /*
568  * This callback routine is called when there is a system panic.  This function
569  * exists for prototype matching.
570  */
571 static boolean_t
572 pm_panic_callb(void *arg, int code)
573 {
574 	_NOTE(ARGUNUSED(arg, code))
575 	void pm_cfb_check_and_powerup(void);
576 	PMD(PMD_CFB, ("pm_panic_callb\n"))
577 	pm_cfb_check_and_powerup();
578 	return (B_TRUE);
579 }
580 
581 static boolean_t
582 pm_halt_callb(void *arg, int code)
583 {
584 	_NOTE(ARGUNUSED(arg, code))
585 	return (B_TRUE);	/* XXX for now */
586 }
587 
588 /*
589  * This needs to be called after the root and platform drivers are loaded
590  * and be single-threaded with respect to driver attach/detach
591  */
592 void
593 pm_init(void)
594 {
595 	PMD_FUNC(pmf, "pm_init")
596 	char **mod;
597 	extern pri_t minclsyspri;
598 	static void pm_dep_thread(void);
599 
600 	pm_comps_notlowest = 0;
601 	pm_system_idle_threshold = pm_default_idle_threshold;
602 
603 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
604 	    CB_CL_CPR_PM, "pm_cpr");
605 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
606 		    CB_CL_PANIC, "pm_panic");
607 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
608 		    CB_CL_HALT, "pm_halt");
609 
610 	/*
611 	 * Create a thread to do dependency processing.
612 	 */
613 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
614 	    TS_RUN, minclsyspri);
615 
616 	/*
617 	 * loadrootmodules already loaded these ppm drivers, now get them
618 	 * attached so they can claim the root drivers as they attach
619 	 */
620 	for (mod = platform_module_list; *mod; mod++) {
621 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
622 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
623 			    *mod);
624 		} else {
625 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
626 			    ddi_major_to_name(ddi_name_to_major(*mod))))
627 		}
628 	}
629 }
630 
631 /*
632  * pm_scan_init - create pm scan data structure.  Called (if autopm enabled)
633  * when device becomes power managed or after a failed detach and when autopm
634  * is started via PM_START_PM ioctl, and after a CPR resume to get all the
635  * devices scanning again.
636  */
637 void
638 pm_scan_init(dev_info_t *dip)
639 {
640 	PMD_FUNC(pmf, "scan_init")
641 	pm_scan_t	*scanp;
642 
643 	ASSERT(!PM_ISBC(dip));
644 
645 	PM_LOCK_DIP(dip);
646 	scanp = PM_GET_PM_SCAN(dip);
647 	if (!scanp) {
648 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
649 		    pmf, PM_DEVICE(dip)))
650 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
651 		DEVI(dip)->devi_pm_scan = scanp;
652 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
653 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
654 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
655 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
656 	}
657 	PM_UNLOCK_DIP(dip);
658 }
659 
660 /*
661  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
662  */
663 void
664 pm_scan_fini(dev_info_t *dip)
665 {
666 	PMD_FUNC(pmf, "scan_fini")
667 	pm_scan_t	*scanp;
668 
669 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
670 	ASSERT(!PM_ISBC(dip));
671 	PM_LOCK_DIP(dip);
672 	scanp = PM_GET_PM_SCAN(dip);
673 	if (!scanp) {
674 		PM_UNLOCK_DIP(dip);
675 		return;
676 	}
677 
678 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
679 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
680 
681 	kmem_free(scanp, sizeof (pm_scan_t));
682 	DEVI(dip)->devi_pm_scan = NULL;
683 	PM_UNLOCK_DIP(dip);
684 }
685 
686 /*
687  * Given a pointer to a component struct, return the current power level
688  * (struct contains index unless it is a continuous level).
689  * Located here in hopes of getting both this and dev_is_needed into the
690  * cache together
691  */
692 static int
693 cur_power(pm_component_t *cp)
694 {
695 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
696 		return (cp->pmc_cur_pwr);
697 
698 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
699 }
700 
701 static char *
702 pm_decode_direction(int direction)
703 {
704 	switch (direction) {
705 	case PM_LEVEL_UPONLY:
706 		return ("up");
707 
708 	case PM_LEVEL_EXACT:
709 		return ("exact");
710 
711 	case PM_LEVEL_DOWNONLY:
712 		return ("down");
713 
714 	default:
715 		return ("INVALID DIRECTION");
716 	}
717 }
718 
719 char *
720 pm_decode_op(pm_bus_power_op_t op)
721 {
722 	switch (op) {
723 	case BUS_POWER_CHILD_PWRCHG:
724 		return ("CHILD_PWRCHG");
725 	case BUS_POWER_NEXUS_PWRUP:
726 		return ("NEXUS_PWRUP");
727 	case BUS_POWER_PRE_NOTIFICATION:
728 		return ("PRE_NOTIFICATION");
729 	case BUS_POWER_POST_NOTIFICATION:
730 		return ("POST_NOTIFICATION");
731 	case BUS_POWER_HAS_CHANGED:
732 		return ("HAS_CHANGED");
733 	case BUS_POWER_NOINVOL:
734 		return ("NOINVOL");
735 	default:
736 		return ("UNKNOWN OP");
737 	}
738 }
739 
740 /*
741  * Returns true if level is a possible (valid) power level for component
742  */
743 int
744 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
745 {
746 	PMD_FUNC(pmf, "e_pm_valid_power")
747 	pm_component_t *cp = PM_CP(dip, cmpt);
748 	int i;
749 	int *ip = cp->pmc_comp.pmc_lvals;
750 	int limit = cp->pmc_comp.pmc_numlevels;
751 
752 	if (level < 0)
753 		return (0);
754 	for (i = 0; i < limit; i++) {
755 		if (level == *ip++)
756 			return (1);
757 	}
758 #ifdef DEBUG
759 	if (pm_debug & PMD_FAIL) {
760 		ip = cp->pmc_comp.pmc_lvals;
761 
762 		for (i = 0; i < limit; i++)
763 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
764 			    pmf, i, *ip++))
765 	}
766 #endif
767 	return (0);
768 }
769 
770 /*
771  * Returns true if device is pm'd (after calling pm_start if need be)
772  */
773 int
774 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
775 {
776 	pm_info_t *info;
777 	static int pm_start(dev_info_t *dip);
778 
779 	/*
780 	 * Check if the device is power managed if not.
781 	 * To make the common case (device is power managed already)
782 	 * fast, we check without the lock.  If device is not already
783 	 * power managed, then we take the lock and the long route through
784 	 * go get it managed.  Devices never go unmanaged until they
785 	 * detach.
786 	 */
787 	info = PM_GET_PM_INFO(dip);
788 	if (!info) {
789 		if (!DEVI_IS_ATTACHING(dip)) {
790 			return (0);
791 		}
792 		if (pm_start(dip) != DDI_SUCCESS) {
793 			return (0);
794 		}
795 		info = PM_GET_PM_INFO(dip);
796 	}
797 	ASSERT(info);
798 	if (infop != NULL)
799 		*infop = info;
800 	return (1);
801 }
802 
803 int
804 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
805 {
806 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
807 		if (cpp != NULL)
808 			*cpp = PM_CP(dip, cmpt);
809 		return (1);
810 	} else {
811 		return (0);
812 	}
813 }
814 
815 /*
816  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
817  */
818 static int
819 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
820 {
821 	PMD_FUNC(pmf, "din")
822 	pm_component_t *cp;
823 	char *pathbuf;
824 	int result;
825 
826 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
827 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
828 	    !e_pm_valid_power(dip, cmpt, level))
829 		return (DDI_FAILURE);
830 
831 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
832 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
833 	    level, cur_power(cp)))
834 
835 	if (pm_set_power(dip, cmpt, level,  direction,
836 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
837 		if (direction == PM_LEVEL_UPONLY) {
838 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
839 			(void) ddi_pathname(dip, pathbuf);
840 			cmn_err(CE_WARN, "Device %s failed to power up.",
841 			    pathbuf);
842 			kmem_free(pathbuf, MAXPATHLEN);
843 		}
844 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
845 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
846 		    pm_decode_direction(direction), level, result))
847 		return (DDI_FAILURE);
848 	}
849 
850 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
851 	    PM_DEVICE(dip)))
852 	pm_rescan(dip);
853 	return (DDI_SUCCESS);
854 }
855 
856 /*
857  * We can get multiple pm_rescan() threads, if one of them discovers
858  * that no scan is running at the moment, it kicks it into action.
859  * Otherwise, it tells the current scanning thread to scan again when
860  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
861  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
862  * thread at a time runs the pm_scan_dev() code.
863  */
864 void
865 pm_rescan(void *arg)
866 {
867 	PMD_FUNC(pmf, "rescan")
868 	dev_info_t	*dip = (dev_info_t *)arg;
869 	pm_info_t	*info;
870 	pm_scan_t	*scanp;
871 	timeout_id_t	scanid;
872 
873 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
874 	PM_LOCK_DIP(dip);
875 	info = PM_GET_PM_INFO(dip);
876 	scanp = PM_GET_PM_SCAN(dip);
877 	if (pm_scans_disabled || !autopm_enabled || !info || !scanp ||
878 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
879 		PM_UNLOCK_DIP(dip);
880 		return;
881 	}
882 	if (scanp->ps_scan_flags & PM_SCANNING) {
883 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
884 		PM_UNLOCK_DIP(dip);
885 		return;
886 	} else if (scanp->ps_scan_id) {
887 		scanid = scanp->ps_scan_id;
888 		scanp->ps_scan_id = 0;
889 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
890 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
891 		PM_UNLOCK_DIP(dip);
892 		(void) untimeout(scanid);
893 		PM_LOCK_DIP(dip);
894 	}
895 
896 	/*
897 	 * Dispatching pm_scan during attach time is risky due to the fact that
898 	 * attach might soon fail and dip dissolved, and panic may happen while
899 	 * attempting to stop scan. So schedule a pm_rescan instead.
900 	 * (Note that if either of the first two terms are true, taskq_dispatch
901 	 * will not be invoked).
902 	 *
903 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
904 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
905 	 * to regulate the dispatching.
906 	 *
907 	 * Scan is stopped before the device is detached (in pm_detaching())
908 	 * but it may get re-started during the post_detach processing if the
909 	 * driver fails to detach.
910 	 */
911 	if (DEVI_IS_ATTACHING(dip) ||
912 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
913 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
914 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
915 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
916 		if (scanp->ps_scan_id) {
917 			scanid = scanp->ps_scan_id;
918 			scanp->ps_scan_id = 0;
919 			PM_UNLOCK_DIP(dip);
920 			(void) untimeout(scanid);
921 			PM_LOCK_DIP(dip);
922 			if (scanp->ps_scan_id) {
923 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
924 				    "thread scheduled pm_rescan, scanid %lx\n",
925 				    pmf, PM_DEVICE(dip),
926 				    (ulong_t)scanp->ps_scan_id))
927 				PM_UNLOCK_DIP(dip);
928 				return;
929 			}
930 		}
931 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
932 		    (scanp->ps_idle_down ? pm_id_ticks :
933 		    (pm_min_scan * hz)));
934 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
935 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
936 		    (ulong_t)scanp->ps_scan_id))
937 	} else {
938 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
939 		    pmf, PM_DEVICE(dip)))
940 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
941 	}
942 	PM_UNLOCK_DIP(dip);
943 }
944 
945 void
946 pm_scan(void *arg)
947 {
948 	PMD_FUNC(pmf, "scan")
949 	dev_info_t	*dip = (dev_info_t *)arg;
950 	pm_scan_t	*scanp;
951 	time_t		nextscan;
952 
953 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
954 
955 	PM_LOCK_DIP(dip);
956 	scanp = PM_GET_PM_SCAN(dip);
957 	ASSERT(scanp && PM_GET_PM_INFO(dip));
958 
959 	if (pm_scans_disabled || !autopm_enabled ||
960 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
961 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
962 		PM_UNLOCK_DIP(dip);
963 		return;
964 	}
965 
966 	if (scanp->ps_idle_down) {
967 		/*
968 		 * make sure we remember idledown was in affect until
969 		 * we've completed the scan
970 		 */
971 		PMID_SET_SCANS(scanp->ps_idle_down)
972 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
973 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
974 	}
975 
976 	/* possible having two threads running pm_scan() */
977 	if (scanp->ps_scan_flags & PM_SCANNING) {
978 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
979 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
980 		    pmf, PM_DEVICE(dip)))
981 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
982 		PM_UNLOCK_DIP(dip);
983 		return;
984 	}
985 
986 	scanp->ps_scan_flags |= PM_SCANNING;
987 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
988 	do {
989 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
990 		PM_UNLOCK_DIP(dip);
991 		nextscan = pm_scan_dev(dip);
992 		PM_LOCK_DIP(dip);
993 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
994 
995 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
996 	scanp->ps_scan_flags &= ~PM_SCANNING;
997 
998 	if (scanp->ps_idle_down) {
999 		scanp->ps_idle_down &= ~PMID_SCANS;
1000 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1001 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1002 	}
1003 
1004 	/* schedule for next idle check */
1005 	if (nextscan != LONG_MAX) {
1006 		if (nextscan > (LONG_MAX / hz))
1007 			nextscan = (LONG_MAX - 1) / hz;
1008 		if (scanp->ps_scan_id) {
1009 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1010 			    "another rescan scheduled scanid(%lx)\n", pmf,
1011 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1012 			PM_UNLOCK_DIP(dip);
1013 			return;
1014 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1015 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1016 			    (clock_t)(nextscan * hz));
1017 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1018 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1019 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1020 		}
1021 	}
1022 	PM_UNLOCK_DIP(dip);
1023 }
1024 
1025 void
1026 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1027 {
1028 	int components = PM_NUMCMPTS(dip);
1029 	int i;
1030 
1031 	ASSERT(components > 0);
1032 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1033 	for (i = 0; i < components; i++) {
1034 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1035 	}
1036 	PM_UNLOCK_BUSY(dip);
1037 }
1038 
1039 /*
1040  * Returns true if device needs to be kept up because it exported the
1041  * "no-involuntary-power-cycles" property or we're pretending it did (console
1042  * fb case) or it is an ancestor of such a device and has used up the "one
1043  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1044  * upon detach
1045  */
1046 int
1047 pm_noinvol(dev_info_t *dip)
1048 {
1049 	PMD_FUNC(pmf, "noinvol")
1050 
1051 	/*
1052 	 * This doesn't change over the life of a driver, so no locking needed
1053 	 */
1054 	if (PM_IS_CFB(dip)) {
1055 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1056 		    pmf, PM_DEVICE(dip)))
1057 		return (1);
1058 	}
1059 	/*
1060 	 * Not an issue if no such kids
1061 	 */
1062 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1063 #ifdef DEBUG
1064 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1065 			dev_info_t *pdip = dip;
1066 			do {
1067 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1068 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1069 				    DEVI(pdip)->devi_pm_noinvolpm,
1070 				    DEVI(pdip)->devi_pm_volpmd))
1071 				pdip = ddi_get_parent(pdip);
1072 			} while (pdip);
1073 		}
1074 #endif
1075 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1076 		return (0);
1077 	}
1078 
1079 	/*
1080 	 * Since we now maintain the counts correct at every node, we no longer
1081 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1082 	 * without the children getting their counts adjusted.
1083 	 */
1084 
1085 #ifdef	DEBUG
1086 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1087 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1088 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1089 		    PM_DEVICE(dip)))
1090 #endif
1091 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1092 }
1093 
1094 /*
1095  * This function performs the actual scanning of the device.
1096  * It attempts to power off the indicated device's components if they have
1097  * been idle and other restrictions are met.
1098  * pm_scan_dev calculates and returns when the next scan should happen for
1099  * this device.
1100  */
1101 time_t
1102 pm_scan_dev(dev_info_t *dip)
1103 {
1104 	PMD_FUNC(pmf, "scan_dev")
1105 	pm_scan_t	*scanp;
1106 	time_t		*timestamp, idletime, now, thresh;
1107 	time_t		timeleft = 0;
1108 	int		i, nxtpwr, curpwr, pwrndx, unused;
1109 	size_t		size;
1110 	pm_component_t	 *cp;
1111 	dev_info_t	*pdip = ddi_get_parent(dip);
1112 	int		circ;
1113 	static int	cur_threshold(dev_info_t *, int);
1114 	static int	pm_next_lower_power(pm_component_t *, int);
1115 
1116 	/*
1117 	 * skip attaching device
1118 	 */
1119 	if (DEVI_IS_ATTACHING(dip)) {
1120 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1121 		    pmf, PM_DEVICE(dip), pm_min_scan))
1122 		return (pm_min_scan);
1123 	}
1124 
1125 	PM_LOCK_DIP(dip);
1126 	scanp = PM_GET_PM_SCAN(dip);
1127 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1128 
1129 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1130 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1131 	    PM_KUC(dip)))
1132 
1133 	/* no scan under the following conditions */
1134 	if (pm_scans_disabled || !autopm_enabled ||
1135 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1136 	    (PM_KUC(dip) != 0) ||
1137 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1138 		PM_UNLOCK_DIP(dip);
1139 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1140 		    "scan_disabled(%d), apm_enabled(%d), kuc(%d), "
1141 		    "%s directpm, %s pm_noinvol\n", pmf, PM_DEVICE(dip),
1142 		    pm_scans_disabled, autopm_enabled, PM_KUC(dip),
1143 		    PM_ISDIRECT(dip) ? "is" : "is not",
1144 		    pm_noinvol(dip) ? "is" : "is not"))
1145 		return (LONG_MAX);
1146 	}
1147 	PM_UNLOCK_DIP(dip);
1148 
1149 	if (!ndi_devi_tryenter(pdip, &circ)) {
1150 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1151 		    pmf, PM_DEVICE(pdip)))
1152 		return ((time_t)1);
1153 	}
1154 	now = gethrestime_sec();
1155 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1156 	timestamp = kmem_alloc(size, KM_SLEEP);
1157 	pm_get_timestamps(dip, timestamp);
1158 
1159 	/*
1160 	 * Since we removed support for backwards compatible devices,
1161 	 * (see big comment at top of file)
1162 	 * it is no longer required to deal with component 0 last.
1163 	 */
1164 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1165 		/*
1166 		 * If already off (an optimization, perhaps)
1167 		 */
1168 		cp = PM_CP(dip, i);
1169 		pwrndx = cp->pmc_cur_pwr;
1170 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1171 		    PM_LEVEL_UNKNOWN :
1172 		    cp->pmc_comp.pmc_lvals[pwrndx];
1173 
1174 		if (pwrndx == 0) {
1175 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1176 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1177 			/* skip device if off or at its lowest */
1178 			continue;
1179 		}
1180 
1181 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1182 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1183 			/* were busy or newly became busy by another thread */
1184 			if (timeleft == 0)
1185 				timeleft = max(thresh, pm_min_scan);
1186 			else
1187 				timeleft = min(
1188 				    timeleft, max(thresh, pm_min_scan));
1189 			continue;
1190 		}
1191 
1192 		idletime = now - timestamp[i];		/* idle time */
1193 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1194 		    pmf, PM_DEVICE(dip), i, idletime))
1195 		if (idletime >= thresh || PM_IS_PID(dip)) {
1196 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1197 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1198 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1199 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1200 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1201 			    PM_CURPOWER(dip, i) != nxtpwr) {
1202 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1203 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1204 				    i, curpwr, nxtpwr))
1205 				timeleft = pm_min_scan;
1206 				continue;
1207 			} else {
1208 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1209 				    "%d->%d, GOOD curpwr %d\n", pmf,
1210 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1211 				    cur_power(cp)))
1212 
1213 				if (nxtpwr == 0)	/* component went off */
1214 					continue;
1215 
1216 				/*
1217 				 * scan to next lower level
1218 				 */
1219 				if (timeleft == 0)
1220 					timeleft = max(
1221 					    1, cur_threshold(dip, i));
1222 				else
1223 					timeleft = min(timeleft,
1224 					    max(1, cur_threshold(dip, i)));
1225 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1226 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1227 				    i, timeleft))
1228 			}
1229 		} else {	/* comp not idle long enough */
1230 			if (timeleft == 0)
1231 				timeleft = thresh - idletime;
1232 			else
1233 				timeleft = min(timeleft, (thresh - idletime));
1234 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1235 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1236 		}
1237 	}
1238 	ndi_devi_exit(pdip, circ);
1239 	kmem_free(timestamp, size);
1240 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1241 	    PM_DEVICE(dip), timeleft))
1242 
1243 	/*
1244 	 * if components are already at lowest level, timeleft is left 0
1245 	 */
1246 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1247 }
1248 
1249 /*
1250  * pm_scan_stop - cancel scheduled pm_rescan,
1251  *                wait for termination of dispatched pm_scan thread
1252  *                     and active pm_scan_dev thread.
1253  */
1254 void
1255 pm_scan_stop(dev_info_t *dip)
1256 {
1257 	PMD_FUNC(pmf, "scan_stop")
1258 	pm_scan_t	*scanp;
1259 	timeout_id_t	scanid;
1260 
1261 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1262 	PM_LOCK_DIP(dip);
1263 	scanp = PM_GET_PM_SCAN(dip);
1264 	if (!scanp) {
1265 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1266 		    pmf, PM_DEVICE(dip)))
1267 		PM_UNLOCK_DIP(dip);
1268 		return;
1269 	}
1270 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1271 
1272 	/* cancel scheduled scan taskq */
1273 	while (scanp->ps_scan_id) {
1274 		scanid = scanp->ps_scan_id;
1275 		scanp->ps_scan_id = 0;
1276 		PM_UNLOCK_DIP(dip);
1277 		(void) untimeout(scanid);
1278 		PM_LOCK_DIP(dip);
1279 	}
1280 
1281 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1282 		PM_UNLOCK_DIP(dip);
1283 		delay(1);
1284 		PM_LOCK_DIP(dip);
1285 	}
1286 	PM_UNLOCK_DIP(dip);
1287 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1288 }
1289 
1290 int
1291 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1292 {
1293 	_NOTE(ARGUNUSED(arg))
1294 
1295 	if (!PM_GET_PM_SCAN(dip))
1296 		return (DDI_WALK_CONTINUE);
1297 	ASSERT(!PM_ISBC(dip));
1298 	pm_scan_stop(dip);
1299 	return (DDI_WALK_CONTINUE);
1300 }
1301 
1302 /*
1303  * Converts a power level value to its index
1304  */
1305 static int
1306 power_val_to_index(pm_component_t *cp, int val)
1307 {
1308 	int limit, i, *ip;
1309 
1310 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1311 	    val != PM_LEVEL_EXACT);
1312 	/*  convert power value into index (i) */
1313 	limit = cp->pmc_comp.pmc_numlevels;
1314 	ip = cp->pmc_comp.pmc_lvals;
1315 	for (i = 0; i < limit; i++)
1316 		if (val == *ip++)
1317 			return (i);
1318 	return (-1);
1319 }
1320 
1321 /*
1322  * Converts a numeric power level to a printable string
1323  */
1324 static char *
1325 power_val_to_string(pm_component_t *cp, int val)
1326 {
1327 	int index;
1328 
1329 	if (val == PM_LEVEL_UPONLY)
1330 		return ("<UPONLY>");
1331 
1332 	if (val == PM_LEVEL_UNKNOWN ||
1333 	    (index = power_val_to_index(cp, val)) == -1)
1334 		return ("<LEVEL_UNKNOWN>");
1335 
1336 	return (cp->pmc_comp.pmc_lnames[index]);
1337 }
1338 
1339 /*
1340  * Return true if this node has been claimed by a ppm.
1341  */
1342 static int
1343 pm_ppm_claimed(dev_info_t *dip)
1344 {
1345 	return (PPM(dip) != NULL);
1346 }
1347 
1348 /*
1349  * A node which was voluntarily power managed has just used up its "free cycle"
1350  * and need is volpmd field cleared, and the same done to all its descendents
1351  */
1352 static void
1353 pm_clear_volpm_dip(dev_info_t *dip)
1354 {
1355 	PMD_FUNC(pmf, "clear_volpm_dip")
1356 
1357 	if (dip == NULL)
1358 		return;
1359 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1360 	    PM_DEVICE(dip)))
1361 	DEVI(dip)->devi_pm_volpmd = 0;
1362 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1363 		pm_clear_volpm_dip(dip);
1364 	}
1365 }
1366 
1367 /*
1368  * A node which was voluntarily power managed has used up the "free cycles"
1369  * for the subtree that it is the root of.  Scan through the list of detached
1370  * nodes and adjust the counts of any that are descendents of the node.
1371  */
1372 static void
1373 pm_clear_volpm_list(dev_info_t *dip)
1374 {
1375 	PMD_FUNC(pmf, "clear_volpm_list")
1376 	char	*pathbuf;
1377 	size_t	len;
1378 	pm_noinvol_t *ip;
1379 
1380 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1381 	(void) ddi_pathname(dip, pathbuf);
1382 	len = strlen(pathbuf);
1383 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1384 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1385 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1386 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1387 		    ip->ni_path))
1388 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1389 		    ip->ni_path[len] == '/') {
1390 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1391 			    ip->ni_path))
1392 			ip->ni_volpmd = 0;
1393 			ip->ni_wasvolpmd = 0;
1394 		}
1395 	}
1396 	kmem_free(pathbuf, MAXPATHLEN);
1397 	rw_exit(&pm_noinvol_rwlock);
1398 }
1399 
1400 /*
1401  * Powers a device, suspending or resuming the driver if it is a backward
1402  * compatible device, calling into ppm to change power level.
1403  * Called with the component's power lock held.
1404  */
1405 static int
1406 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1407     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1408 {
1409 	PMD_FUNC(pmf, "power_dev")
1410 	power_req_t power_req;
1411 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1412 	int		resume_needed = 0;
1413 	int		suspended = 0;
1414 	int		result;
1415 	struct pm_component *cp = PM_CP(dip, comp);
1416 	int		bc = PM_ISBC(dip);
1417 	int pm_all_components_off(dev_info_t *);
1418 	int		clearvolpmd = 0;
1419 	char		pathbuf[MAXNAMELEN];
1420 #ifdef DEBUG
1421 	char *ppmname, *ppmaddr;
1422 #endif
1423 	/*
1424 	 * If this is comp 0 of a backwards compat device and we are
1425 	 * going to take the power away, we need to detach it with
1426 	 * DDI_PM_SUSPEND command.
1427 	 */
1428 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1429 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1430 			/* We could not suspend before turning cmpt zero off */
1431 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1432 			    pmf, PM_DEVICE(dip)))
1433 			return (DDI_FAILURE);
1434 		} else {
1435 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1436 			suspended++;
1437 		}
1438 	}
1439 	power_req.request_type = PMR_PPM_SET_POWER;
1440 	power_req.req.ppm_set_power_req.who = dip;
1441 	power_req.req.ppm_set_power_req.cmpt = comp;
1442 	power_req.req.ppm_set_power_req.old_level = old_level;
1443 	power_req.req.ppm_set_power_req.new_level = level;
1444 	power_req.req.ppm_set_power_req.canblock = canblock;
1445 	power_req.req.ppm_set_power_req.cookie = NULL;
1446 #ifdef DEBUG
1447 	if (pm_ppm_claimed(dip)) {
1448 		ppmname = PM_NAME(PPM(dip));
1449 		ppmaddr = PM_ADDR(PPM(dip));
1450 
1451 	} else {
1452 		ppmname = "noppm";
1453 		ppmaddr = "0";
1454 	}
1455 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1456 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1457 	    power_val_to_string(cp, old_level), old_level,
1458 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1459 #endif
1460 	/*
1461 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1462 	 * bc device comp 0 is powering on, then we count it as a power cycle
1463 	 * against its voluntary count.
1464 	 */
1465 	if (DEVI(dip)->devi_pm_volpmd &&
1466 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1467 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1468 		clearvolpmd = 1;
1469 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1470 	    &power_req, &result)) == DDI_SUCCESS) {
1471 		/*
1472 		 * Now do involuntary pm accounting;  If we've just cycled power
1473 		 * on a voluntarily pm'd node, and by inference on its entire
1474 		 * subtree, we need to set the subtree (including those nodes
1475 		 * already detached) volpmd counts to 0, and subtract out the
1476 		 * value of the current node's volpmd count from the ancestors
1477 		 */
1478 		if (clearvolpmd) {
1479 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1480 			pm_clear_volpm_dip(dip);
1481 			pm_clear_volpm_list(dip);
1482 			if (volpmd) {
1483 				(void) ddi_pathname(dip, pathbuf);
1484 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1485 				    volpmd, 0, pathbuf, dip);
1486 			}
1487 		}
1488 	} else {
1489 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1490 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1491 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1492 	}
1493 	/*
1494 	 * If some other devices were also powered up (e.g. other cpus in
1495 	 * the same domain) return a pointer to that list
1496 	 */
1497 	if (devlist) {
1498 		*devlist = (pm_ppm_devlist_t *)
1499 		    power_req.req.ppm_set_power_req.cookie;
1500 	}
1501 	/*
1502 	 * We will have to resume the device if the device is backwards compat
1503 	 * device and either of the following is true:
1504 	 * -This is comp 0 and we have successfully powered it up
1505 	 * -This is comp 0 and we have failed to power it down. Resume is
1506 	 *  needed because we have suspended it above
1507 	 */
1508 
1509 	if (bc && comp == 0) {
1510 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1511 		if (power_op_ret == DDI_SUCCESS) {
1512 			if (POWERING_ON(old_level, level)) {
1513 				/*
1514 				 * It must be either suspended or resumed
1515 				 * via pm_power_has_changed path
1516 				 */
1517 				ASSERT((DEVI(dip)->devi_pm_flags &
1518 				    PMC_SUSPENDED) ||
1519 				    (PM_CP(dip, comp)->pmc_flags &
1520 				    PM_PHC_WHILE_SET_POWER));
1521 
1522 					resume_needed = suspended;
1523 			}
1524 		} else {
1525 			if (POWERING_OFF(old_level, level)) {
1526 				/*
1527 				 * It must be either suspended or resumed
1528 				 * via pm_power_has_changed path
1529 				 */
1530 				ASSERT((DEVI(dip)->devi_pm_flags &
1531 				    PMC_SUSPENDED) ||
1532 				    (PM_CP(dip, comp)->pmc_flags &
1533 				    PM_PHC_WHILE_SET_POWER));
1534 
1535 					resume_needed = suspended;
1536 			}
1537 		}
1538 	}
1539 	if (resume_needed) {
1540 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1541 		/* ppm is not interested in DDI_PM_RESUME */
1542 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1543 		    DDI_SUCCESS) {
1544 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1545 		} else
1546 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1547 			    PM_DEVICE(dip));
1548 	}
1549 	return (power_op_ret);
1550 }
1551 
1552 /*
1553  * Return true if we are the owner or a borrower of the devi lock.  See
1554  * pm_lock_power_single() about borrowing the lock.
1555  */
1556 static int
1557 pm_devi_lock_held(dev_info_t *dip)
1558 {
1559 	lock_loan_t *cur;
1560 
1561 	if (DEVI_BUSY_OWNED(dip))
1562 	    return (1);
1563 
1564 	/* return false if no locks borrowed */
1565 	if (lock_loan_head.pmlk_next == NULL)
1566 		return (0);
1567 
1568 	mutex_enter(&pm_loan_lock);
1569 	/* see if our thread is registered as a lock borrower. */
1570 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1571 		if (cur->pmlk_borrower == curthread)
1572 			break;
1573 	mutex_exit(&pm_loan_lock);
1574 
1575 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1576 }
1577 
1578 /*
1579  * pm_set_power: adjusts power level of device.	 Assumes device is power
1580  * manageable & component exists.
1581  *
1582  * Cases which require us to bring up devices we keep up ("wekeepups") for
1583  * backwards compatible devices:
1584  *	component 0 is off and we're bringing it up from 0
1585  *		bring up wekeepup first
1586  *	and recursively when component 0 is off and we bring some other
1587  *	component up from 0
1588  * For devices which are not backward compatible, our dependency notion is much
1589  * simpler.  Unless all components are off, then wekeeps must be on.
1590  * We don't treat component 0 differently.
1591  * Canblock tells how to deal with a direct pm'd device.
1592  * Scan arg tells us if we were called from scan, in which case we don't need
1593  * to go back to the root node and walk down to change power.
1594  */
1595 int
1596 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1597     pm_canblock_t canblock, int scan, int *retp)
1598 {
1599 	PMD_FUNC(pmf, "set_power")
1600 	char		*pathbuf;
1601 	pm_bp_child_pwrchg_t bpc;
1602 	pm_sp_misc_t	pspm;
1603 	int		ret = DDI_SUCCESS;
1604 	int		unused = DDI_SUCCESS;
1605 	dev_info_t	*pdip = ddi_get_parent(dip);
1606 
1607 #ifdef DEBUG
1608 	int		diverted = 0;
1609 
1610 	/*
1611 	 * This prevents operations on the console from calling prom_printf and
1612 	 * either deadlocking or bringing up the console because of debug
1613 	 * output
1614 	 */
1615 	if (dip == cfb_dip) {
1616 		diverted++;
1617 		mutex_enter(&pm_debug_lock);
1618 		pm_divertdebug++;
1619 		mutex_exit(&pm_debug_lock);
1620 	}
1621 #endif
1622 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1623 	    direction == PM_LEVEL_EXACT);
1624 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1625 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1626 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1627 	(void) ddi_pathname(dip, pathbuf);
1628 	bpc.bpc_dip = dip;
1629 	bpc.bpc_path = pathbuf;
1630 	bpc.bpc_comp = comp;
1631 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1632 	bpc.bpc_nlevel = level;
1633 	pspm.pspm_direction = direction;
1634 	pspm.pspm_errnop = retp;
1635 	pspm.pspm_canblock = canblock;
1636 	pspm.pspm_scan = scan;
1637 	bpc.bpc_private = &pspm;
1638 
1639 	/*
1640 	 * If a config operation is being done (we've locked the parent) or
1641 	 * we already hold the power lock (we've locked the node)
1642 	 * then we can operate directly on the node because we have already
1643 	 * brought up all the ancestors, otherwise, we have to go back to the
1644 	 * top of the tree.
1645 	 */
1646 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1647 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1648 		    (void *)&bpc, (void *)&unused);
1649 	else
1650 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1651 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1652 #ifdef DEBUG
1653 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1654 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1655 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1656 	}
1657 	if (diverted) {
1658 		mutex_enter(&pm_debug_lock);
1659 		pm_divertdebug--;
1660 		mutex_exit(&pm_debug_lock);
1661 	}
1662 #endif
1663 	kmem_free(pathbuf, MAXPATHLEN);
1664 	return (ret);
1665 }
1666 
1667 
1668 static dev_info_t *
1669 find_dip(dev_info_t *dip, char *dev_name, int holddip)
1670 {
1671 	PMD_FUNC(pmf, "find_dip")
1672 	dev_info_t	*cdip;
1673 	char		*child_dev, *addr;
1674 	char		*device;	/* writeable copy of path */
1675 	int		dev_len = strlen(dev_name)+1;
1676 	int		circ;
1677 
1678 	device = kmem_zalloc(dev_len, KM_SLEEP);
1679 	(void) strcpy(device, dev_name);
1680 	addr = strchr(device, '@');
1681 	child_dev = strchr(device, '/');
1682 	if ((addr != NULL) && (child_dev == NULL || addr < child_dev)) {
1683 		/*
1684 		 * We have device = "name@addr..." form
1685 		 */
1686 		*addr++ = '\0';			/* for strcmp (and skip '@') */
1687 		if (child_dev != NULL)
1688 			*child_dev++ = '\0';	/* for strcmp (and skip '/') */
1689 	} else {
1690 		/*
1691 		 * We have device = "name/..." or "name"
1692 		 */
1693 		addr = "";
1694 		if (child_dev != NULL)
1695 			*child_dev++ = '\0';	/* for strcmp (and skip '/') */
1696 	}
1697 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
1698 		if (strcmp(ddi_node_name(dip), device) == 0) {
1699 			/* If the driver isn't loaded, we prune the search */
1700 			if (!i_ddi_devi_attached(dip)) {
1701 				continue;
1702 			}
1703 			if (strcmp(ddi_get_name_addr(dip), addr) == 0) {
1704 				PMD(PMD_NAMETODIP, ("%s: matched %s@%s"
1705 				    "(%s#%d)\n", pmf, PM_DEVICE(dip)))
1706 				if (child_dev != NULL) {
1707 					PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): "
1708 					    "held, call find_dip %s\n", pmf,
1709 					    PM_DEVICE(dip), child_dev))
1710 					ndi_devi_enter(dip, &circ);
1711 					cdip = dip;
1712 					dip = find_dip(ddi_get_child(dip),
1713 					    child_dev, holddip);
1714 					ndi_devi_exit(cdip, circ);
1715 					PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): "
1716 					    "release, find_dip rets %s\n", pmf,
1717 					    PM_DEVICE(cdip), child_dev))
1718 				} else {
1719 					if (holddip) {
1720 						e_ddi_hold_devi(dip);
1721 						PMD(PMD_DHR | PMD_NAMETODIP,
1722 						    ("%s: held %s@%s(%s#%d), "
1723 						    "refcnt=%d\n", pmf,
1724 						    PM_DEVICE(dip),
1725 						    e_ddi_devi_holdcnt(dip)))
1726 					}
1727 				}
1728 				kmem_free(device, dev_len);
1729 				return (dip);
1730 			}
1731 		}
1732 	}
1733 	kmem_free(device, dev_len);
1734 	return (dip);
1735 }
1736 
1737 /*
1738  * If holddip is set, then if a dip is found we return with the node held
1739  */
1740 dev_info_t *
1741 pm_name_to_dip(char *pathname, int holddip)
1742 {
1743 	PMD_FUNC(pmf, "name_to_dip")
1744 	dev_info_t	*dip = NULL;
1745 	char		dev_name[MAXNAMELEN];
1746 	dev_info_t	*first_child;
1747 	int		circular;
1748 
1749 	if (!pathname)
1750 		return (NULL);
1751 
1752 	(void) strncpy(dev_name, pathname, MAXNAMELEN);
1753 
1754 	PMD(PMD_NAMETODIP, ("%s: devname: %s\n", pmf, dev_name))
1755 
1756 	/*
1757 	 * First we attempt to match the node in the tree.  If we succeed
1758 	 * we hold the driver and look up the dip again.
1759 	 * No need to hold the root as that node is always held.
1760 	 */
1761 	if (dev_name[0] == '/') {
1762 		ndi_devi_enter(ddi_root_node(), &circular);
1763 		first_child = ddi_get_child(ddi_root_node());
1764 		dip = find_dip(first_child, dev_name + 1, holddip);
1765 		ndi_devi_exit(ddi_root_node(), circular);
1766 
1767 	} else {
1768 		PMD(PMD_NAMETODIP, ("%s: physpath with unrooted "
1769 		    "search\n", pmf))
1770 		return (NULL);
1771 	}
1772 
1773 	ASSERT(!dip ||
1774 	    (ddi_name_to_major(ddi_binding_name(dip)) != (major_t)-1));
1775 
1776 	return (dip);
1777 }
1778 
1779 /*
1780  * Search for a dependency and mark it unsatisfied
1781  */
1782 static void
1783 pm_unsatisfy(char *keeper, char *kept)
1784 {
1785 	PMD_FUNC(pmf, "unsatisfy")
1786 	pm_pdr_t *dp;
1787 
1788 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1789 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1790 		if (!dp->pdr_isprop) {
1791 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1792 			    (dp->pdr_kept_count > 0) &&
1793 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1794 				if (dp->pdr_satisfied) {
1795 					dp->pdr_satisfied = 0;
1796 					pm_unresolved_deps++;
1797 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1798 					    "pm_unresolved_deps now %d\n", pmf,
1799 					    pm_unresolved_deps))
1800 				}
1801 			}
1802 		}
1803 	}
1804 }
1805 
1806 /*
1807  * Device dip is being un power managed, it keeps up count other devices.
1808  * We need to release any hold we have on the kept devices, and also
1809  * mark the dependency no longer satisfied.
1810  */
1811 static void
1812 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1813 {
1814 	PMD_FUNC(pmf, "unkeeps")
1815 	int i, j;
1816 	dev_info_t *kept;
1817 	dev_info_t *dip;
1818 	struct pm_component *cp;
1819 	int keeper_on = 0, circ;
1820 
1821 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1822 	    keeper, (void *)keptpaths))
1823 	/*
1824 	 * Try to grab keeper. Keeper may have gone away by now,
1825 	 * in this case, used the passed in value pwr
1826 	 */
1827 	dip = pm_name_to_dip(keeper, 1);
1828 	for (i = 0; i < count; i++) {
1829 		/* Release power hold */
1830 		kept = pm_name_to_dip(keptpaths[i], 1);
1831 		if (kept) {
1832 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1833 			    PM_DEVICE(kept), i))
1834 			/*
1835 			 * We need to check if we skipped a bringup here
1836 			 * because we could have failed the bringup
1837 			 * (ie DIRECT PM device) and have
1838 			 * not increment the count.
1839 			 */
1840 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1841 				keeper_on = 0;
1842 				PM_LOCK_POWER(dip, &circ);
1843 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1844 				    cp = &DEVI(dip)->devi_pm_components[j];
1845 					if (cur_power(cp)) {
1846 						keeper_on++;
1847 						break;
1848 					}
1849 				}
1850 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1851 					pm_rele_power(kept);
1852 					DEVI(kept)->devi_pm_flags
1853 						&= ~PMC_SKIP_BRINGUP;
1854 				}
1855 				PM_UNLOCK_POWER(dip, circ);
1856 			} else if (pwr) {
1857 				if (PM_SKBU(kept) == 0) {
1858 					pm_rele_power(kept);
1859 					DEVI(kept)->devi_pm_flags
1860 					    &= ~PMC_SKIP_BRINGUP;
1861 				}
1862 			}
1863 			ddi_release_devi(kept);
1864 		}
1865 		/*
1866 		 * mark this dependency not satisfied
1867 		 */
1868 		pm_unsatisfy(keeper, keptpaths[i]);
1869 	}
1870 	if (dip)
1871 		ddi_release_devi(dip);
1872 }
1873 
1874 /*
1875  * Device kept is being un power managed, it is kept up by keeper.
1876  * We need to mark the dependency no longer satisfied.
1877  */
1878 static void
1879 pm_unkepts(char *kept, char *keeper)
1880 {
1881 	PMD_FUNC(pmf, "unkepts")
1882 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1883 	ASSERT(keeper != NULL);
1884 	/*
1885 	 * mark this dependency not satisfied
1886 	 */
1887 	pm_unsatisfy(keeper, kept);
1888 }
1889 
1890 /*
1891  * Removes dependency information and hold on the kepts, if the path is a
1892  * path of a keeper.
1893  */
1894 static void
1895 pm_free_keeper(char *path, int pwr)
1896 {
1897 	pm_pdr_t *dp;
1898 	int i;
1899 	size_t length;
1900 
1901 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1902 		if (strcmp(dp->pdr_keeper, path) != 0)
1903 			continue;
1904 		/*
1905 		 * Remove all our kept holds and the dependency records,
1906 		 * then free up the kept lists.
1907 		 */
1908 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1909 		if (dp->pdr_kept_count)  {
1910 			for (i = 0; i < dp->pdr_kept_count; i++) {
1911 				length = strlen(dp->pdr_kept_paths[i]);
1912 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1913 			}
1914 			kmem_free(dp->pdr_kept_paths,
1915 			    dp->pdr_kept_count * sizeof (char **));
1916 			dp->pdr_kept_paths = NULL;
1917 			dp->pdr_kept_count = 0;
1918 		}
1919 	}
1920 }
1921 
1922 /*
1923  * Removes the device represented by path from the list of kepts, if the
1924  * path is a path of a kept
1925  */
1926 static void
1927 pm_free_kept(char *path)
1928 {
1929 	pm_pdr_t *dp;
1930 	int i;
1931 	int j, count;
1932 	size_t length;
1933 	char **paths;
1934 
1935 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1936 		if (dp->pdr_kept_count == 0)
1937 			continue;
1938 		count = dp->pdr_kept_count;
1939 		/* Remove this device from the kept path lists */
1940 		for (i = 0; i < count; i++) {
1941 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1942 				pm_unkepts(path, dp->pdr_keeper);
1943 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1944 				kmem_free(dp->pdr_kept_paths[i], length);
1945 				dp->pdr_kept_paths[i] = NULL;
1946 				dp->pdr_kept_count--;
1947 			}
1948 		}
1949 		/* Compact the kept paths array */
1950 		if (dp->pdr_kept_count) {
1951 			length = dp->pdr_kept_count * sizeof (char **);
1952 			paths = kmem_zalloc(length, KM_SLEEP);
1953 			j = 0;
1954 			for (i = 0; i < count; i++) {
1955 				if (dp->pdr_kept_paths[i] != NULL) {
1956 					paths[j] = dp->pdr_kept_paths[i];
1957 					j++;
1958 				}
1959 			}
1960 			ASSERT(j == dp->pdr_kept_count);
1961 		}
1962 		/* Now free the old array and point to the new one */
1963 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1964 		if (dp->pdr_kept_count)
1965 			dp->pdr_kept_paths = paths;
1966 		else
1967 			dp->pdr_kept_paths = NULL;
1968 	}
1969 }
1970 
1971 /*
1972  * Free the dependency information for a device.
1973  */
1974 void
1975 pm_free_keeps(char *path, int pwr)
1976 {
1977 	PMD_FUNC(pmf, "free_keeps")
1978 
1979 #ifdef DEBUG
1980 	int doprdeps = 0;
1981 	void prdeps(char *);
1982 
1983 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
1984 	if (pm_debug & PMD_KEEPS) {
1985 		doprdeps = 1;
1986 		prdeps("pm_free_keeps before");
1987 	}
1988 #endif
1989 	/*
1990 	 * First assume we are a keeper and remove all our kepts.
1991 	 */
1992 	pm_free_keeper(path, pwr);
1993 	/*
1994 	 * Now assume we a kept device, and remove all our records.
1995 	 */
1996 	pm_free_kept(path);
1997 #ifdef	DEBUG
1998 	if (doprdeps) {
1999 		prdeps("pm_free_keeps after");
2000 	}
2001 #endif
2002 }
2003 
2004 static int
2005 pm_is_kept(char *path)
2006 {
2007 	pm_pdr_t *dp;
2008 	int i;
2009 
2010 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2011 		if (dp->pdr_kept_count == 0)
2012 			continue;
2013 		for (i = 0; i < dp->pdr_kept_count; i++) {
2014 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2015 				return (1);
2016 		}
2017 	}
2018 	return (0);
2019 }
2020 
2021 static void
2022 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2023 {
2024 	PMD_FUNC(pmf, "hold_rele_power")
2025 	int circ;
2026 
2027 	if ((dip == NULL) ||
2028 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2029 		return;
2030 	PM_LOCK_POWER(dip, &circ);
2031 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2032 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2033 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2034 	PM_KUC(dip) += cnt;
2035 	ASSERT(PM_KUC(dip) >= 0);
2036 	PM_UNLOCK_POWER(dip, circ);
2037 	if (cnt < 0 && PM_KUC(dip) == 0)
2038 		pm_rescan(dip);
2039 }
2040 
2041 #define	MAX_PPM_HANDLERS	4
2042 
2043 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2044 
2045 struct	ppm_callbacks {
2046 	int (*ppmc_func)(dev_info_t *);
2047 	dev_info_t	*ppmc_dip;
2048 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2049 
2050 
2051 /*
2052  * This routine calls into all the registered ppms to notify them
2053  * that either all components of power-managed devices are at their
2054  * lowest levels or no longer all are at their lowest levels.
2055  */
2056 static void
2057 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2058 {
2059 	struct ppm_callbacks *ppmcp;
2060 	power_req_t power_req;
2061 	int result = 0;
2062 
2063 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2064 	power_req.req.ppm_all_lowest_req.mode = mode;
2065 	mutex_enter(&ppm_lock);
2066 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2067 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2068 		    DDI_CTLOPS_POWER, &power_req, &result);
2069 	mutex_exit(&ppm_lock);
2070 }
2071 
2072 static void
2073 pm_set_pm_info(dev_info_t *dip, void *value)
2074 {
2075 	DEVI(dip)->devi_pm_info = value;
2076 }
2077 
2078 pm_rsvp_t *pm_blocked_list;
2079 
2080 /*
2081  * Look up an entry in the blocked list by dip and component
2082  */
2083 static pm_rsvp_t *
2084 pm_rsvp_lookup(dev_info_t *dip, int comp)
2085 {
2086 	pm_rsvp_t *p;
2087 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2088 	for (p = pm_blocked_list; p; p = p->pr_next)
2089 		if (p->pr_dip == dip && p->pr_comp == comp) {
2090 			return (p);
2091 		}
2092 	return (NULL);
2093 }
2094 
2095 /*
2096  * Called when a device which is direct power managed (or the parent or
2097  * dependent of such a device) changes power, or when a pm clone is closed
2098  * that was direct power managing a device.  This call results in pm_blocked()
2099  * (below) returning.
2100  */
2101 void
2102 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2103 {
2104 	PMD_FUNC(pmf, "proceed")
2105 	pm_rsvp_t *found = NULL;
2106 	pm_rsvp_t *p;
2107 
2108 	mutex_enter(&pm_rsvp_lock);
2109 	switch (cmd) {
2110 	/*
2111 	 * we're giving up control, let any pending op continue
2112 	 */
2113 	case PMP_RELEASE:
2114 		for (p = pm_blocked_list; p; p = p->pr_next) {
2115 			if (dip == p->pr_dip) {
2116 				p->pr_retval = PMP_RELEASE;
2117 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2118 				    pmf, PM_DEVICE(dip)))
2119 				cv_signal(&p->pr_cv);
2120 			}
2121 		}
2122 		break;
2123 
2124 	/*
2125 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2126 	 * succeed and a non-matching request for the same device fail
2127 	 */
2128 	case PMP_SETPOWER:
2129 		found = pm_rsvp_lookup(dip, comp);
2130 		if (!found)	/* if driver not waiting */
2131 			break;
2132 		/*
2133 		 * This cannot be pm_lower_power, since that can only happen
2134 		 * during detach or probe
2135 		 */
2136 		if (found->pr_newlevel <= newlevel) {
2137 			found->pr_retval = PMP_SUCCEED;
2138 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2139 			    PM_DEVICE(dip)))
2140 		} else {
2141 			found->pr_retval = PMP_FAIL;
2142 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2143 			    PM_DEVICE(dip)))
2144 		}
2145 		cv_signal(&found->pr_cv);
2146 		break;
2147 
2148 	default:
2149 		panic("pm_proceed unknown cmd %d", cmd);
2150 	}
2151 	mutex_exit(&pm_rsvp_lock);
2152 }
2153 
2154 /*
2155  * This routine dispatches new work to the dependency thread. Caller must
2156  * be prepared to block for memory if necessary.
2157  */
2158 void
2159 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2160     int *res, int cached_pwr)
2161 {
2162 	pm_dep_wk_t	*new_work;
2163 
2164 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2165 	new_work->pdw_type = cmd;
2166 	new_work->pdw_wait = wait;
2167 	new_work->pdw_done = 0;
2168 	new_work->pdw_ret = 0;
2169 	new_work->pdw_pwr = cached_pwr;
2170 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2171 	if (keeper != NULL) {
2172 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2173 		    KM_SLEEP);
2174 		(void) strcpy(new_work->pdw_keeper, keeper);
2175 	}
2176 	if (kept != NULL) {
2177 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2178 		(void) strcpy(new_work->pdw_kept, kept);
2179 	}
2180 	mutex_enter(&pm_dep_thread_lock);
2181 	if (pm_dep_thread_workq == NULL) {
2182 		pm_dep_thread_workq = new_work;
2183 		pm_dep_thread_tail = new_work;
2184 		new_work->pdw_next = NULL;
2185 	} else {
2186 		pm_dep_thread_tail->pdw_next = new_work;
2187 		pm_dep_thread_tail = new_work;
2188 		new_work->pdw_next = NULL;
2189 	}
2190 	cv_signal(&pm_dep_thread_cv);
2191 	/* If caller asked for it, wait till it is done. */
2192 	if (wait)  {
2193 		while (!new_work->pdw_done)
2194 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2195 		/*
2196 		 * Pass return status, if any, back.
2197 		 */
2198 		if (res != NULL)
2199 			*res = new_work->pdw_ret;
2200 		/*
2201 		 * If we asked to wait, it is our job to free the request
2202 		 * structure.
2203 		 */
2204 		if (new_work->pdw_keeper)
2205 			kmem_free(new_work->pdw_keeper,
2206 			    strlen(new_work->pdw_keeper) + 1);
2207 		if (new_work->pdw_kept)
2208 			kmem_free(new_work->pdw_kept,
2209 			    strlen(new_work->pdw_kept) + 1);
2210 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2211 	}
2212 	mutex_exit(&pm_dep_thread_lock);
2213 }
2214 
2215 /*
2216  * Release the pm resource for this device.
2217  */
2218 void
2219 pm_rem_info(dev_info_t *dip)
2220 {
2221 	PMD_FUNC(pmf, "rem_info")
2222 	int		i, count = 0;
2223 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2224 	dev_info_t	*pdip = ddi_get_parent(dip);
2225 	char		*pathbuf;
2226 	int		work_type = PM_DEP_WK_DETACH;
2227 
2228 	ASSERT(info);
2229 
2230 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2231 	if (PM_ISDIRECT(dip)) {
2232 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2233 		ASSERT(info->pmi_clone);
2234 		info->pmi_clone = 0;
2235 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2236 	}
2237 	ASSERT(!PM_GET_PM_SCAN(dip));
2238 
2239 	/*
2240 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2241 	 * Others we check all components.  BC node that has already
2242 	 * called pm_destroy_components() has zero component count.
2243 	 * Parents that get notification are not adjusted because their
2244 	 * kidsupcnt is always 0 (or 1 during configuration).
2245 	 */
2246 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2247 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2248 
2249 	/* node is detached, so we can examine power without locking */
2250 	if (PM_ISBC(dip)) {
2251 		count = (PM_CURPOWER(dip, 0) != 0);
2252 	} else {
2253 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2254 			count += (PM_CURPOWER(dip, i) != 0);
2255 	}
2256 
2257 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2258 		e_pm_hold_rele_power(pdip, -count);
2259 
2260 	/* Schedule a request to clean up dependency records */
2261 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2262 	(void) ddi_pathname(dip, pathbuf);
2263 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2264 	    PM_DEP_NOWAIT, NULL, (count > 0));
2265 	kmem_free(pathbuf, MAXPATHLEN);
2266 
2267 	/*
2268 	 * Adjust the pm_comps_notlowest count since this device is
2269 	 * not being power-managed anymore.
2270 	 */
2271 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2272 		if (PM_CURPOWER(dip, i) != 0)
2273 			PM_DECR_NOTLOWEST(dip);
2274 	}
2275 	/*
2276 	 * Once we clear the info pointer, it looks like it is not power
2277 	 * managed to everybody else.
2278 	 */
2279 	pm_set_pm_info(dip, NULL);
2280 	kmem_free(info, sizeof (pm_info_t));
2281 }
2282 
2283 int
2284 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2285 {
2286 	int components = PM_NUMCMPTS(dip);
2287 	int *bufp;
2288 	size_t size;
2289 	int i;
2290 
2291 	if (components <= 0) {
2292 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2293 		    "can't get normal power values\n", PM_DEVICE(dip));
2294 		return (DDI_FAILURE);
2295 	} else {
2296 		size = components * sizeof (int);
2297 		bufp = kmem_alloc(size, KM_SLEEP);
2298 		for (i = 0; i < components; i++) {
2299 			bufp[i] = pm_get_normal_power(dip, i);
2300 		}
2301 	}
2302 	*length = size;
2303 	*valuep = bufp;
2304 	return (DDI_SUCCESS);
2305 }
2306 
2307 static int
2308 pm_reset_timestamps(dev_info_t *dip, void *arg)
2309 {
2310 	_NOTE(ARGUNUSED(arg))
2311 
2312 	int components;
2313 	int	i;
2314 
2315 	if (!PM_GET_PM_INFO(dip))
2316 		return (DDI_WALK_CONTINUE);
2317 	components = PM_NUMCMPTS(dip);
2318 	ASSERT(components > 0);
2319 	PM_LOCK_BUSY(dip);
2320 	for (i = 0; i < components; i++) {
2321 		struct pm_component *cp;
2322 		/*
2323 		 * If the component was not marked as busy,
2324 		 * reset its timestamp to now.
2325 		 */
2326 		cp = PM_CP(dip, i);
2327 		if (cp->pmc_timestamp)
2328 			cp->pmc_timestamp = gethrestime_sec();
2329 	}
2330 	PM_UNLOCK_BUSY(dip);
2331 	return (DDI_WALK_CONTINUE);
2332 }
2333 
2334 /*
2335  * Convert a power level to an index into the levels array (or
2336  * just PM_LEVEL_UNKNOWN in that special case).
2337  */
2338 static int
2339 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2340 {
2341 	PMD_FUNC(pmf, "level_to_index")
2342 	int i;
2343 	int limit = cp->pmc_comp.pmc_numlevels;
2344 	int *ip = cp->pmc_comp.pmc_lvals;
2345 
2346 	if (level == PM_LEVEL_UNKNOWN)
2347 		return (level);
2348 
2349 	for (i = 0; i < limit; i++) {
2350 		if (level == *ip++) {
2351 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2352 			    pmf, PM_DEVICE(dip),
2353 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2354 			return (i);
2355 		}
2356 	}
2357 	panic("pm_level_to_index: level %d not found for device "
2358 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2359 	/*NOTREACHED*/
2360 }
2361 
2362 /*
2363  * Internal function to set current power level
2364  */
2365 static void
2366 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2367 {
2368 	PMD_FUNC(pmf, "set_cur_pwr")
2369 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2370 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2371 
2372 	/*
2373 	 * Nothing to adjust if current & new levels are the same.
2374 	 */
2375 	if (curpwr != PM_LEVEL_UNKNOWN &&
2376 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2377 		return;
2378 
2379 	/*
2380 	 * Keep the count for comps doing transition to/from lowest
2381 	 * level.
2382 	 */
2383 	if (curpwr == 0) {
2384 		PM_INCR_NOTLOWEST(dip);
2385 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2386 		PM_DECR_NOTLOWEST(dip);
2387 	}
2388 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2389 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2390 }
2391 
2392 /*
2393  * This is the default method of setting the power of a device if no ppm
2394  * driver has claimed it.
2395  */
2396 int
2397 pm_power(dev_info_t *dip, int comp, int level)
2398 {
2399 	PMD_FUNC(pmf, "power")
2400 	struct dev_ops	*ops;
2401 	int		(*fn)(dev_info_t *, int, int);
2402 	struct pm_component *cp = PM_CP(dip, comp);
2403 	int retval;
2404 	pm_info_t *info = PM_GET_PM_INFO(dip);
2405 	static int pm_phc_impl(dev_info_t *, int, int, int);
2406 
2407 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2408 	    PM_DEVICE(dip), comp, level))
2409 	if (!(ops = ddi_get_driver(dip))) {
2410 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2411 		    PM_DEVICE(dip)))
2412 		return (DDI_FAILURE);
2413 	}
2414 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2415 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2416 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2417 		    (!fn ? " devo_power NULL" : "")))
2418 		return (DDI_FAILURE);
2419 	}
2420 	cp->pmc_flags |= PM_POWER_OP;
2421 	retval = (*fn)(dip, comp, level);
2422 	cp->pmc_flags &= ~PM_POWER_OP;
2423 	if (retval == DDI_SUCCESS) {
2424 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2425 		return (DDI_SUCCESS);
2426 	}
2427 
2428 	/*
2429 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2430 	 * updated only the power level of the component.  If our attempt to
2431 	 * set the device new to a power level above has failed we sync the
2432 	 * total power state via phc code now.
2433 	 */
2434 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2435 		int phc_lvl =
2436 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2437 
2438 		ASSERT(info);
2439 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2440 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2441 			pmf, PM_DEVICE(dip), comp, phc_lvl))
2442 	}
2443 
2444 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2445 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2446 	    level, power_val_to_string(cp, level)));
2447 	return (DDI_FAILURE);
2448 }
2449 
2450 int
2451 pm_unmanage(dev_info_t *dip)
2452 {
2453 	PMD_FUNC(pmf, "unmanage")
2454 	power_req_t power_req;
2455 	int result, retval = 0;
2456 
2457 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2458 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2459 	    PM_DEVICE(dip)))
2460 	power_req.request_type = PMR_PPM_UNMANAGE;
2461 	power_req.req.ppm_config_req.who = dip;
2462 	if (pm_ppm_claimed(dip))
2463 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2464 		    &power_req, &result);
2465 #ifdef DEBUG
2466 	else
2467 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2468 		    &power_req, &result);
2469 #endif
2470 	ASSERT(retval == DDI_SUCCESS);
2471 	pm_rem_info(dip);
2472 	return (retval);
2473 }
2474 
2475 int
2476 pm_raise_power(dev_info_t *dip, int comp, int level)
2477 {
2478 	if (level < 0)
2479 		return (DDI_FAILURE);
2480 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2481 	    !e_pm_valid_power(dip, comp, level))
2482 		return (DDI_FAILURE);
2483 
2484 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2485 }
2486 
2487 int
2488 pm_lower_power(dev_info_t *dip, int comp, int level)
2489 {
2490 	PMD_FUNC(pmf, "pm_lower_power")
2491 
2492 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2493 	    !e_pm_valid_power(dip, comp, level)) {
2494 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2495 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2496 		return (DDI_FAILURE);
2497 	}
2498 
2499 	if (!DEVI_IS_DETACHING(dip)) {
2500 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2501 		    pmf, PM_DEVICE(dip)))
2502 		return (DDI_FAILURE);
2503 	}
2504 
2505 	/*
2506 	 * If we don't care about saving power, or we're treating this node
2507 	 * specially, then this is a no-op
2508 	 */
2509 	if (!autopm_enabled || pm_noinvol(dip)) {
2510 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s\n", pmf, PM_DEVICE(dip),
2511 		    !autopm_enabled ? "!autopm_enabled " : "",
2512 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2513 		return (DDI_SUCCESS);
2514 	}
2515 
2516 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2517 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2518 		    PM_DEVICE(dip)))
2519 		return (DDI_FAILURE);
2520 	}
2521 	return (DDI_SUCCESS);
2522 }
2523 
2524 /*
2525  * Find the entries struct for a given dip in the blocked list, return it locked
2526  */
2527 static psce_t *
2528 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2529 {
2530 	pscc_t *p;
2531 	psce_t *psce;
2532 
2533 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2534 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2535 		if (p->pscc_dip == dip) {
2536 			*psccp = p;
2537 			psce = p->pscc_entries;
2538 			mutex_enter(&psce->psce_lock);
2539 			ASSERT(psce);
2540 			rw_exit(&pm_pscc_direct_rwlock);
2541 			return (psce);
2542 		}
2543 	}
2544 	rw_exit(&pm_pscc_direct_rwlock);
2545 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2546 	/*NOTREACHED*/
2547 }
2548 
2549 /*
2550  * Write an entry indicating a power level change (to be passed to a process
2551  * later) in the given psce.
2552  * If we were called in the path that brings up the console fb in the
2553  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2554  * we create a record that has a size of -1, a physaddr of NULL, and that
2555  * has the overflow flag set.
2556  */
2557 static int
2558 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2559     int old, int which, pm_canblock_t canblock)
2560 {
2561 	char	buf[MAXNAMELEN];
2562 	pm_state_change_t *p;
2563 	size_t	size;
2564 	caddr_t physpath = NULL;
2565 	int	overrun = 0;
2566 
2567 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2568 	(void) ddi_pathname(dip, buf);
2569 	size = strlen(buf) + 1;
2570 	p = psce->psce_in;
2571 	if (canblock == PM_CANBLOCK_BYPASS) {
2572 		physpath = kmem_alloc(size, KM_NOSLEEP);
2573 		if (physpath == NULL) {
2574 			/*
2575 			 * mark current entry as overrun
2576 			 */
2577 			p->flags |= PSC_EVENT_LOST;
2578 			size = (size_t)-1;
2579 		}
2580 	} else
2581 		physpath = kmem_alloc(size, KM_SLEEP);
2582 	if (p->size) {	/* overflow; mark the next entry */
2583 		if (p->size != (size_t)-1)
2584 			kmem_free(p->physpath, p->size);
2585 		ASSERT(psce->psce_out == p);
2586 		if (p == psce->psce_last) {
2587 			psce->psce_first->flags |= PSC_EVENT_LOST;
2588 			psce->psce_out = psce->psce_first;
2589 		} else {
2590 			(p + 1)->flags |= PSC_EVENT_LOST;
2591 			psce->psce_out = (p + 1);
2592 		}
2593 		overrun++;
2594 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2595 		p->flags |= PSC_EVENT_LOST;
2596 		p->size = 0;
2597 		p->physpath = NULL;
2598 	}
2599 	if (which == PSC_INTEREST) {
2600 		mutex_enter(&pm_compcnt_lock);
2601 		if (pm_comps_notlowest == 0)
2602 			p->flags |= PSC_ALL_LOWEST;
2603 		else
2604 			p->flags &= ~PSC_ALL_LOWEST;
2605 		mutex_exit(&pm_compcnt_lock);
2606 	}
2607 	p->event = event;
2608 	p->timestamp = gethrestime_sec();
2609 	p->component = comp;
2610 	p->old_level = old;
2611 	p->new_level = new;
2612 	p->physpath = physpath;
2613 	p->size = size;
2614 	if (physpath != NULL)
2615 		(void) strcpy(p->physpath, buf);
2616 	if (p == psce->psce_last)
2617 		psce->psce_in = psce->psce_first;
2618 	else
2619 		psce->psce_in = ++p;
2620 	mutex_exit(&psce->psce_lock);
2621 	return (overrun);
2622 }
2623 
2624 /*
2625  * Find the next entry on the interest list.  We keep a pointer to the item we
2626  * last returned in the user's cooke.  Returns a locked entries struct.
2627  */
2628 static psce_t *
2629 psc_interest(void **cookie, pscc_t **psccp)
2630 {
2631 	pscc_t *pscc;
2632 	pscc_t **cookiep = (pscc_t **)cookie;
2633 
2634 	if (*cookiep == NULL)
2635 		pscc = pm_pscc_interest;
2636 	else
2637 		pscc = (*cookiep)->pscc_next;
2638 	if (pscc) {
2639 		*cookiep = pscc;
2640 		*psccp = pscc;
2641 		mutex_enter(&pscc->pscc_entries->psce_lock);
2642 		return (pscc->pscc_entries);
2643 	} else {
2644 		return (NULL);
2645 	}
2646 }
2647 
2648 /*
2649  * Create an entry for a process to pick up indicating a power level change.
2650  */
2651 static void
2652 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2653     int newlevel, int oldlevel, pm_canblock_t canblock)
2654 {
2655 	PMD_FUNC(pmf, "enqueue_notify")
2656 	pscc_t	*pscc;
2657 	psce_t	*psce;
2658 	void		*cookie = NULL;
2659 	int	overrun;
2660 
2661 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2662 	switch (cmd) {
2663 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2664 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2665 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2666 		psce = pm_psc_dip_to_direct(dip, &pscc);
2667 		ASSERT(psce);
2668 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2669 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2670 		    pm_poll_cnt[pscc->pscc_clone]))
2671 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2672 		    PSC_DIRECT, canblock);
2673 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2674 		mutex_enter(&pm_clone_lock);
2675 		if (!overrun)
2676 			pm_poll_cnt[pscc->pscc_clone]++;
2677 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2678 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2679 		mutex_exit(&pm_clone_lock);
2680 		break;
2681 	case PSC_HAS_CHANGED:
2682 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2683 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2684 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2685 			psce = pm_psc_dip_to_direct(dip, &pscc);
2686 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2687 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2688 			    pm_poll_cnt[pscc->pscc_clone]))
2689 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2690 			    oldlevel, PSC_DIRECT, canblock);
2691 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2692 			mutex_enter(&pm_clone_lock);
2693 			if (!overrun)
2694 				pm_poll_cnt[pscc->pscc_clone]++;
2695 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2696 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2697 			mutex_exit(&pm_clone_lock);
2698 		}
2699 		mutex_enter(&pm_clone_lock);
2700 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2701 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2702 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2703 			    oldlevel, PSC_INTEREST, canblock);
2704 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2705 		}
2706 		rw_exit(&pm_pscc_interest_rwlock);
2707 		mutex_exit(&pm_clone_lock);
2708 		break;
2709 #ifdef DEBUG
2710 	default:
2711 		ASSERT(0);
2712 #endif
2713 	}
2714 }
2715 
2716 static void
2717 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2718 {
2719 	if (listp) {
2720 		pm_ppm_devlist_t *p, *next = NULL;
2721 
2722 		for (p = *listp; p; p = next) {
2723 			next = p->ppd_next;
2724 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2725 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2726 			    canblock);
2727 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2728 		}
2729 		*listp = NULL;
2730 	}
2731 }
2732 
2733 /*
2734  * Try to get the power locks of the parent node and target (child)
2735  * node.  Return true if successful (with both locks held) or false
2736  * (with no locks held).
2737  */
2738 static int
2739 pm_try_parent_child_locks(dev_info_t *pdip,
2740     dev_info_t *dip, int *pcircp, int *circp)
2741 {
2742 	if (ndi_devi_tryenter(pdip, pcircp))
2743 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2744 			return (1);
2745 		} else {
2746 			ndi_devi_exit(pdip, *pcircp);
2747 		}
2748 	return (0);
2749 }
2750 
2751 /*
2752  * Determine if the power lock owner is blocked by current thread.
2753  * returns :
2754  * 	1 - If the thread owning the effective power lock (the first lock on
2755  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2756  *          a mutex held by the current thread.
2757  *
2758  *	0 - otherwise
2759  *
2760  * Note : This function is called by pm_power_has_changed to determine whether
2761  * it is executing in parallel with pm_set_power.
2762  */
2763 static int
2764 pm_blocked_by_us(dev_info_t *dip)
2765 {
2766 	power_req_t power_req;
2767 	kthread_t *owner;
2768 	int result;
2769 	kmutex_t *mp;
2770 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2771 
2772 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2773 	power_req.req.ppm_power_lock_owner_req.who = dip;
2774 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2775 	    DDI_SUCCESS) {
2776 		/*
2777 		 * It is assumed that if the device is claimed by ppm, ppm
2778 		 * will always implement this request type and it'll always
2779 		 * return success. We panic here, if it fails.
2780 		 */
2781 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2782 		    PM_DEVICE(dip));
2783 		/*NOTREACHED*/
2784 	}
2785 
2786 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2787 	    owner->t_state == TS_SLEEP &&
2788 	    owner->t_sobj_ops &&
2789 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2790 	    (mp = (kmutex_t *)owner->t_wchan) &&
2791 	    mutex_owner(mp) == curthread)
2792 		return (1);
2793 
2794 	return (0);
2795 }
2796 
2797 /*
2798  * Notify parent which wants to hear about a child's power changes.
2799  */
2800 static void
2801 pm_notify_parent(dev_info_t *dip,
2802     dev_info_t *pdip, int comp, int old_level, int level)
2803 {
2804 	pm_bp_has_changed_t bphc;
2805 	pm_sp_misc_t pspm;
2806 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2807 	int result = DDI_SUCCESS;
2808 
2809 	bphc.bphc_dip = dip;
2810 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2811 	bphc.bphc_comp = comp;
2812 	bphc.bphc_olevel = old_level;
2813 	bphc.bphc_nlevel = level;
2814 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2815 	pspm.pspm_scan = 0;
2816 	bphc.bphc_private = &pspm;
2817 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2818 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2819 	kmem_free(pathbuf, MAXPATHLEN);
2820 }
2821 
2822 /*
2823  * Check if we need to resume a BC device, and make the attach call as required.
2824  */
2825 static int
2826 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2827 {
2828 	int ret = DDI_SUCCESS;
2829 
2830 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2831 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2832 		/* ppm is not interested in DDI_PM_RESUME */
2833 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2834 			/* XXX Should we mark it resumed, */
2835 			/* even though it failed? */
2836 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2837 			    PM_NAME(dip), PM_ADDR(dip));
2838 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2839 	}
2840 
2841 	return (ret);
2842 }
2843 
2844 /*
2845  * Tests outside the lock to see if we should bother to enqueue an entry
2846  * for any watching process.  If yes, then caller will take the lock and
2847  * do the full protocol
2848  */
2849 static int
2850 pm_watchers()
2851 {
2852 	if (pm_processes_stopped)
2853 		return (0);
2854 	return (pm_pscc_direct || pm_pscc_interest);
2855 }
2856 
2857 /*
2858  * A driver is reporting that the power of one of its device's components
2859  * has changed.  Update the power state accordingly.
2860  */
2861 int
2862 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2863 {
2864 	PMD_FUNC(pmf, "pm_power_has_changed")
2865 	int ret;
2866 	dev_info_t *pdip = ddi_get_parent(dip);
2867 	struct pm_component *cp;
2868 	int blocked, circ, pcirc, old_level;
2869 	static int pm_phc_impl(dev_info_t *, int, int, int);
2870 
2871 	if (level < 0) {
2872 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2873 		    PM_DEVICE(dip), level))
2874 		return (DDI_FAILURE);
2875 	}
2876 
2877 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2878 	    PM_DEVICE(dip), comp, level))
2879 
2880 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2881 	    !e_pm_valid_power(dip, comp, level))
2882 		return (DDI_FAILURE);
2883 
2884 	/*
2885 	 * A driver thread calling pm_power_has_changed and another thread
2886 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2887 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2888 	 * this specific deadlock.  If we can't get the lock immediately
2889 	 * and we are deadlocked, just update the component's level, do
2890 	 * notifications, and return.  We intend to update the total power
2891 	 * state later (if the other thread fails to set power to the
2892 	 * desired level).  If we were called because of a power change on a
2893 	 * component that isn't involved in a set_power op, update all state
2894 	 * immediately.
2895 	 */
2896 	cp = PM_CP(dip, comp);
2897 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2898 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2899 		    (cp->pmc_flags & PM_POWER_OP)) {
2900 			if (pm_watchers()) {
2901 				mutex_enter(&pm_rsvp_lock);
2902 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2903 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2904 				mutex_exit(&pm_rsvp_lock);
2905 			}
2906 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2907 				pm_notify_parent(dip,
2908 				    pdip, comp, cur_power(cp), level);
2909 			(void) pm_check_and_resume(dip,
2910 			    comp, cur_power(cp), level);
2911 
2912 			/*
2913 			 * Stash the old power index, update curpwr, and flag
2914 			 * that the total power state needs to be synched.
2915 			 */
2916 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2917 			/*
2918 			 * Several pm_power_has_changed calls could arrive
2919 			 * while the set power path remains blocked.  Keep the
2920 			 * oldest old power and the newest new power of any
2921 			 * sequence of phc calls which arrive during deadlock.
2922 			 */
2923 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2924 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2925 			cp->pmc_cur_pwr =
2926 			    pm_level_to_index(dip, cp, level);
2927 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2928 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2929 			return (DDI_SUCCESS);
2930 		} else
2931 			if (blocked) {	/* blocked, but different cmpt? */
2932 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2933 					cmn_err(CE_NOTE,
2934 					    "!pm: parent kuc not updated due "
2935 					    "to possible deadlock.\n");
2936 					return (pm_phc_impl(dip,
2937 						    comp, level, 1));
2938 				}
2939 				old_level = cur_power(cp);
2940 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2941 				    (!PM_ISBC(dip) || comp == 0) &&
2942 				    POWERING_ON(old_level, level))
2943 					pm_hold_power(pdip);
2944 				ret = pm_phc_impl(dip, comp, level, 1);
2945 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2946 					if ((!PM_ISBC(dip) ||
2947 					    comp == 0) && level == 0 &&
2948 					    old_level != PM_LEVEL_UNKNOWN)
2949 						pm_rele_power(pdip);
2950 				}
2951 				ndi_devi_exit(pdip, pcirc);
2952 				/* child lock not held: deadlock */
2953 				return (ret);
2954 			}
2955 		delay(1);
2956 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
2957 	}
2958 
2959 	/* non-deadlock case */
2960 	old_level = cur_power(cp);
2961 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2962 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
2963 		pm_hold_power(pdip);
2964 	ret = pm_phc_impl(dip, comp, level, 1);
2965 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2966 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
2967 		    old_level != PM_LEVEL_UNKNOWN)
2968 			pm_rele_power(pdip);
2969 	}
2970 	PM_UNLOCK_POWER(dip, circ);
2971 	ndi_devi_exit(pdip, pcirc);
2972 	return (ret);
2973 }
2974 
2975 /*
2976  * Account for power changes to a component of the the console frame buffer.
2977  * If lowering power from full (or "unkown", which is treatd as full)
2978  * we will increment the "components off" count of the fb device.
2979  * Subsequent lowering of the same component doesn't affect the count.  If
2980  * raising a component back to full power, we will decrement the count.
2981  *
2982  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
2983  */
2984 static int
2985 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
2986 {
2987 	struct pm_component *cp = PM_CP(dip, cmpt);
2988 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
2989 	int want_normal = (new == cp->pmc_norm_pwr);
2990 	int incr = 0;
2991 
2992 	if (on && !want_normal)
2993 		incr = 1;
2994 	else if (!on && want_normal)
2995 		incr = -1;
2996 	return (incr);
2997 }
2998 
2999 /*
3000  * Adjust the count of console frame buffer components < full power.
3001  */
3002 static void
3003 update_comps_off(int incr, dev_info_t *dip)
3004 {
3005 		mutex_enter(&pm_cfb_lock);
3006 		pm_cfb_comps_off += incr;
3007 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3008 		mutex_exit(&pm_cfb_lock);
3009 }
3010 
3011 /*
3012  * Update the power state in the framework (via the ppm).  The 'notify'
3013  * argument tells whether to notify watchers.  Power lock is already held.
3014  */
3015 static int
3016 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3017 {
3018 	PMD_FUNC(pmf, "phc_impl")
3019 	power_req_t power_req;
3020 	int i, dodeps = 0;
3021 	dev_info_t *pdip = ddi_get_parent(dip);
3022 	int result;
3023 	int old_level;
3024 	struct pm_component *cp;
3025 	int incr = 0;
3026 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3027 	int work_type = 0;
3028 	char *pathbuf;
3029 
3030 	/* Must use "official" power level for this test. */
3031 	cp = PM_CP(dip, comp);
3032 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3033 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3034 	if (old_level != PM_LEVEL_UNKNOWN)
3035 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3036 
3037 	if (level == old_level) {
3038 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3039 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3040 		return (DDI_SUCCESS);
3041 	}
3042 
3043 	/*
3044 	 * Tell ppm about this.
3045 	 */
3046 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3047 	power_req.req.ppm_notify_level_req.who = dip;
3048 	power_req.req.ppm_notify_level_req.cmpt = comp;
3049 	power_req.req.ppm_notify_level_req.new_level = level;
3050 	power_req.req.ppm_notify_level_req.old_level = old_level;
3051 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3052 	    &result) == DDI_FAILURE) {
3053 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3054 		    pmf, PM_DEVICE(dip), level))
3055 		return (DDI_FAILURE);
3056 	}
3057 
3058 	if (PM_IS_CFB(dip)) {
3059 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3060 
3061 		if (incr) {
3062 			update_comps_off(incr, dip);
3063 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3064 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3065 			    comp, old_level, level, pm_cfb_comps_off))
3066 		}
3067 	}
3068 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3069 	result = DDI_SUCCESS;
3070 
3071 	if (notify) {
3072 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3073 			pm_notify_parent(dip, pdip, comp, old_level, level);
3074 		(void) pm_check_and_resume(dip, comp, old_level, level);
3075 	}
3076 
3077 	/*
3078 	 * Decrement the dependency kidsup count if we turn a device
3079 	 * off.
3080 	 */
3081 	if (POWERING_OFF(old_level, level)) {
3082 		dodeps = 1;
3083 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3084 			cp = PM_CP(dip, i);
3085 			if (cur_power(cp)) {
3086 				dodeps = 0;
3087 				break;
3088 			}
3089 		}
3090 		if (dodeps)
3091 			work_type = PM_DEP_WK_POWER_OFF;
3092 	}
3093 
3094 	/*
3095 	 * Increment if we turn it on. Check to see
3096 	 * if other comps are already on, if so,
3097 	 * dont increment.
3098 	 */
3099 	if (POWERING_ON(old_level, level)) {
3100 		dodeps = 1;
3101 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3102 			cp = PM_CP(dip, i);
3103 			if (comp == i)
3104 				continue;
3105 			/* -1 also treated as 0 in this case */
3106 			if (cur_power(cp) > 0) {
3107 				dodeps = 0;
3108 				break;
3109 			}
3110 		}
3111 		if (dodeps)
3112 			work_type = PM_DEP_WK_POWER_ON;
3113 	}
3114 
3115 	if (dodeps) {
3116 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3117 		(void) ddi_pathname(dip, pathbuf);
3118 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3119 		    PM_DEP_NOWAIT, NULL, 0);
3120 		kmem_free(pathbuf, MAXPATHLEN);
3121 	}
3122 
3123 	if (notify && (level != old_level) && pm_watchers()) {
3124 		mutex_enter(&pm_rsvp_lock);
3125 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3126 		    PM_CANBLOCK_BLOCK);
3127 		mutex_exit(&pm_rsvp_lock);
3128 	}
3129 
3130 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3131 	pm_rescan(dip);
3132 	return (DDI_SUCCESS);
3133 }
3134 
3135 /*
3136  * This function is called at startup time to notify pm of the existence
3137  * of any platform power managers for this platform.  As a result of
3138  * this registration, each function provided will be called each time
3139  * a device node is attached, until one returns true, and it must claim the
3140  * device node (by returning non-zero) if it wants to be involved in the
3141  * node's power management.  If it does claim the node, then it will
3142  * subsequently be notified of attach and detach events.
3143  *
3144  */
3145 
3146 int
3147 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3148 {
3149 	PMD_FUNC(pmf, "register_ppm")
3150 	struct ppm_callbacks *ppmcp;
3151 	pm_component_t *cp;
3152 	int i, pwr, result, circ;
3153 	power_req_t power_req;
3154 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3155 	void pm_ppm_claim(dev_info_t *);
3156 
3157 	mutex_enter(&ppm_lock);
3158 	ppmcp = ppm_callbacks;
3159 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3160 		if (ppmcp->ppmc_func == NULL) {
3161 			ppmcp->ppmc_func = func;
3162 			ppmcp->ppmc_dip = dip;
3163 			break;
3164 		}
3165 	}
3166 	mutex_exit(&ppm_lock);
3167 
3168 	if (i >= MAX_PPM_HANDLERS)
3169 		return (DDI_FAILURE);
3170 	while ((dip = ddi_get_parent(dip)) != NULL) {
3171 		if (PM_GET_PM_INFO(dip) == NULL)
3172 			continue;
3173 		pm_ppm_claim(dip);
3174 		if (pm_ppm_claimed(dip)) {
3175 			/*
3176 			 * Tell ppm about this.
3177 			 */
3178 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3179 			p->old_level = PM_LEVEL_UNKNOWN;
3180 			p->who = dip;
3181 			PM_LOCK_POWER(dip, &circ);
3182 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3183 				cp = PM_CP(dip, i);
3184 				pwr = cp->pmc_cur_pwr;
3185 				if (pwr != PM_LEVEL_UNKNOWN) {
3186 					p->cmpt = i;
3187 					p->new_level = cur_power(cp);
3188 					p->old_level = PM_LEVEL_UNKNOWN;
3189 					if (pm_ctlops(PPM(dip), dip,
3190 					    DDI_CTLOPS_POWER, &power_req,
3191 					    &result) == DDI_FAILURE) {
3192 						PMD(PMD_FAIL, ("%s: pc "
3193 						    "%s@%s(%s#%d) to %d "
3194 						    "fails\n", pmf,
3195 						    PM_DEVICE(dip), pwr))
3196 					}
3197 				}
3198 			}
3199 			PM_UNLOCK_POWER(dip, circ);
3200 		}
3201 	}
3202 	return (DDI_SUCCESS);
3203 }
3204 
3205 /*
3206  * Call the ppm's that have registered and adjust the devinfo struct as
3207  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3208  * by each ppm are assumed to be disjoint.
3209  */
3210 void
3211 pm_ppm_claim(dev_info_t *dip)
3212 {
3213 	struct ppm_callbacks *ppmcp;
3214 
3215 	if (PPM(dip)) {
3216 		return;
3217 	}
3218 	mutex_enter(&ppm_lock);
3219 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3220 		if ((*ppmcp->ppmc_func)(dip)) {
3221 			DEVI(dip)->devi_pm_ppm =
3222 			    (struct dev_info *)ppmcp->ppmc_dip;
3223 			mutex_exit(&ppm_lock);
3224 			return;
3225 		}
3226 	}
3227 	mutex_exit(&ppm_lock);
3228 }
3229 
3230 /*
3231  * Node is being detached so stop autopm until we see if it succeeds, in which
3232  * case pm_stop will be called.  For backwards compatible devices we bring the
3233  * device up to full power on the assumption the detach will succeed.
3234  */
3235 void
3236 pm_detaching(dev_info_t *dip)
3237 {
3238 	PMD_FUNC(pmf, "detaching")
3239 	pm_info_t *info = PM_GET_PM_INFO(dip);
3240 	int iscons;
3241 
3242 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3243 	    PM_NUMCMPTS(dip)))
3244 	if (info == NULL)
3245 		return;
3246 	ASSERT(DEVI_IS_DETACHING(dip));
3247 	PM_LOCK_DIP(dip);
3248 	info->pmi_dev_pm_state |= PM_DETACHING;
3249 	PM_UNLOCK_DIP(dip);
3250 	if (!PM_ISBC(dip))
3251 		pm_scan_stop(dip);
3252 
3253 	/*
3254 	 * console and old-style devices get brought up when detaching.
3255 	 */
3256 	iscons = PM_IS_CFB(dip);
3257 	if (iscons || PM_ISBC(dip)) {
3258 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3259 		if (iscons) {
3260 			mutex_enter(&pm_cfb_lock);
3261 			while (cfb_inuse) {
3262 				mutex_exit(&pm_cfb_lock);
3263 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3264 				delay(1);
3265 				mutex_enter(&pm_cfb_lock);
3266 			}
3267 			ASSERT(cfb_dip_detaching == NULL);
3268 			ASSERT(cfb_dip);
3269 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3270 			cfb_dip = NULL;
3271 			mutex_exit(&pm_cfb_lock);
3272 		}
3273 	}
3274 }
3275 
3276 /*
3277  * Node failed to detach.  If it used to be autopm'd, make it so again.
3278  */
3279 void
3280 pm_detach_failed(dev_info_t *dip)
3281 {
3282 	PMD_FUNC(pmf, "detach_failed")
3283 	pm_info_t *info = PM_GET_PM_INFO(dip);
3284 	int pm_all_at_normal(dev_info_t *);
3285 
3286 	if (info == NULL)
3287 		return;
3288 	ASSERT(DEVI_IS_DETACHING(dip));
3289 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3290 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3291 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3292 			/* Make sure the operation is still needed */
3293 			if (!pm_all_at_normal(dip)) {
3294 				if (pm_all_to_normal(dip,
3295 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3296 					PMD(PMD_ERROR, ("%s: could not bring "
3297 					    "%s@%s(%s#%d) to normal\n", pmf,
3298 					    PM_DEVICE(dip)))
3299 				}
3300 			}
3301 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3302 		}
3303 	}
3304 	if (!PM_ISBC(dip)) {
3305 		mutex_enter(&pm_scan_lock);
3306 		if (autopm_enabled)
3307 			pm_scan_init(dip);
3308 		mutex_exit(&pm_scan_lock);
3309 		pm_rescan(dip);
3310 	}
3311 }
3312 
3313 /* generic Backwards Compatible component */
3314 static char *bc_names[] = {"off", "on"};
3315 
3316 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3317 
3318 static void
3319 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3320 {
3321 	pm_comp_t *pmc;
3322 	pmc = &cp->pmc_comp;
3323 	pmc->pmc_numlevels = 2;
3324 	pmc->pmc_lvals[0] = 0;
3325 	pmc->pmc_lvals[1] = norm;
3326 	e_pm_set_cur_pwr(dip, cp, norm);
3327 }
3328 
3329 static void
3330 e_pm_default_components(dev_info_t *dip, int cmpts)
3331 {
3332 	int i;
3333 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3334 
3335 	p = DEVI(dip)->devi_pm_components;
3336 	for (i = 0; i < cmpts; i++, p++) {
3337 		p->pmc_comp = bc_comp;	/* struct assignment */
3338 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3339 		    KM_SLEEP);
3340 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3341 		    KM_SLEEP);
3342 		p->pmc_comp.pmc_numlevels = 2;
3343 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3344 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3345 	}
3346 }
3347 
3348 /*
3349  * Called from functions that require components to exist already to allow
3350  * for their creation by parsing the pm-components property.
3351  * Device will not be power managed as a result of this call
3352  * No locking needed because we're single threaded by the ndi_devi_enter
3353  * done while attaching, and the device isn't visible until after it has
3354  * attached
3355  */
3356 int
3357 pm_premanage(dev_info_t *dip, int style)
3358 {
3359 	PMD_FUNC(pmf, "premanage")
3360 	pm_comp_t	*pcp, *compp;
3361 	int		cmpts, i, norm, error;
3362 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3363 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3364 
3365 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3366 	/*
3367 	 * If this dip has already been processed, don't mess with it
3368 	 */
3369 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3370 		return (DDI_SUCCESS);
3371 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3372 		return (DDI_FAILURE);
3373 	}
3374 	/*
3375 	 * Look up pm-components property and create components accordingly
3376 	 * If that fails, fall back to backwards compatibility
3377 	 */
3378 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3379 		/*
3380 		 * If error is set, the property existed but was not well formed
3381 		 */
3382 		if (error || (style == PM_STYLE_NEW)) {
3383 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3384 			return (DDI_FAILURE);
3385 		}
3386 		/*
3387 		 * If they don't have the pm-components property, then we
3388 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3389 		 * behavior driver must have called pm_create_components, and
3390 		 * we need to flesh out dummy components
3391 		 */
3392 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3393 			/*
3394 			 * Not really failure, but we don't want the
3395 			 * caller to treat it as success
3396 			 */
3397 			return (DDI_FAILURE);
3398 		}
3399 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3400 		e_pm_default_components(dip, cmpts);
3401 		for (i = 0; i < cmpts; i++) {
3402 			/*
3403 			 * if normal power not set yet, we don't really know
3404 			 * what *ANY* of the power values are.  If normal
3405 			 * power is set, then we assume for this backwards
3406 			 * compatible case that the values are 0, normal power.
3407 			 */
3408 			norm = pm_get_normal_power(dip, i);
3409 			if (norm == (uint_t)-1) {
3410 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3411 				    PM_DEVICE(dip), i))
3412 				return (DDI_FAILURE);
3413 			}
3414 			/*
3415 			 * Components of BC devices start at their normal power,
3416 			 * so count them to be not at their lowest power.
3417 			 */
3418 			PM_INCR_NOTLOWEST(dip);
3419 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3420 		}
3421 	} else {
3422 		/*
3423 		 * e_pm_create_components was called from pm_autoconfig(), it
3424 		 * creates components with no descriptions (or known levels)
3425 		 */
3426 		cmpts = PM_NUMCMPTS(dip);
3427 		ASSERT(cmpts != 0);
3428 		pcp = compp;
3429 		p = DEVI(dip)->devi_pm_components;
3430 		for (i = 0; i < cmpts; i++, p++) {
3431 			p->pmc_comp = *pcp++;   /* struct assignment */
3432 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3433 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3434 		}
3435 		pm_set_device_threshold(dip, pm_system_idle_threshold,
3436 		    PMC_DEF_THRESH);
3437 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3438 	}
3439 	return (DDI_SUCCESS);
3440 }
3441 
3442 /*
3443  * Called from during or after the device's attach to let us know it is ready
3444  * to play autopm.   Look up the pm model and manage the device accordingly.
3445  * Returns system call errno value.
3446  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3447  * a little cleaner
3448  *
3449  * Called with dip lock held, return with dip lock unheld.
3450  */
3451 
3452 int
3453 e_pm_manage(dev_info_t *dip, int style)
3454 {
3455 	PMD_FUNC(pmf, "e_manage")
3456 	pm_info_t	*info;
3457 	dev_info_t	*pdip = ddi_get_parent(dip);
3458 	int	pm_thresh_specd(dev_info_t *);
3459 	int	count;
3460 	char	*pathbuf;
3461 
3462 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3463 		return (DDI_FAILURE);
3464 	}
3465 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3466 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3467 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3468 
3469 	/*
3470 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3471 	 * out at their normal power, so they are "up", others start out
3472 	 * unknown, which is effectively "up".  Parent which want notification
3473 	 * get kidsupcnt of 0 always.
3474 	 */
3475 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3476 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3477 		e_pm_hold_rele_power(pdip, count);
3478 
3479 	pm_set_pm_info(dip, info);
3480 	/*
3481 	 * Apply any recorded thresholds
3482 	 */
3483 	(void) pm_thresh_specd(dip);
3484 
3485 	/*
3486 	 * Do dependency processing.
3487 	 */
3488 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3489 	(void) ddi_pathname(dip, pathbuf);
3490 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3491 	    PM_DEP_NOWAIT, NULL, 0);
3492 	kmem_free(pathbuf, MAXPATHLEN);
3493 
3494 	if (!PM_ISBC(dip)) {
3495 		mutex_enter(&pm_scan_lock);
3496 		if (autopm_enabled) {
3497 			pm_scan_init(dip);
3498 			mutex_exit(&pm_scan_lock);
3499 			pm_rescan(dip);
3500 		} else {
3501 			mutex_exit(&pm_scan_lock);
3502 		}
3503 	}
3504 	return (0);
3505 }
3506 
3507 /*
3508  * This is the obsolete exported interface for a driver to find out its
3509  * "normal" (max) power.
3510  * We only get components destroyed while no power management is
3511  * going on (and the device is detached), so we don't need a mutex here
3512  */
3513 int
3514 pm_get_normal_power(dev_info_t *dip, int comp)
3515 {
3516 
3517 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3518 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3519 	}
3520 	return (DDI_FAILURE);
3521 }
3522 
3523 /*
3524  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3525  */
3526 int
3527 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3528 {
3529 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3530 		*levelp = PM_CURPOWER(dip, comp);
3531 		return (DDI_SUCCESS);
3532 	}
3533 	return (DDI_FAILURE);
3534 }
3535 
3536 /*
3537  * Returns current threshold of indicated component
3538  */
3539 static int
3540 cur_threshold(dev_info_t *dip, int comp)
3541 {
3542 	pm_component_t *cp = PM_CP(dip, comp);
3543 	int pwr;
3544 
3545 	if (PM_ISBC(dip)) {
3546 		/*
3547 		 * backwards compatible nodes only have one threshold
3548 		 */
3549 		return (cp->pmc_comp.pmc_thresh[1]);
3550 	}
3551 	pwr = cp->pmc_cur_pwr;
3552 	if (pwr == PM_LEVEL_UNKNOWN) {
3553 		int thresh;
3554 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3555 			thresh = pm_default_nexus_threshold;
3556 		else
3557 			thresh = pm_system_idle_threshold;
3558 		return (thresh);
3559 	}
3560 	ASSERT(cp->pmc_comp.pmc_thresh);
3561 	return (cp->pmc_comp.pmc_thresh[pwr]);
3562 }
3563 
3564 /*
3565  * Compute next lower component power level given power index.
3566  */
3567 static int
3568 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3569 {
3570 	int nxt_pwr;
3571 
3572 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3573 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3574 	} else {
3575 		pwrndx--;
3576 		ASSERT(pwrndx >= 0);
3577 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3578 	}
3579 	return (nxt_pwr);
3580 }
3581 
3582 /*
3583  * Bring all components of device to normal power
3584  */
3585 int
3586 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3587 {
3588 	PMD_FUNC(pmf, "all_to_normal")
3589 	int		*normal;
3590 	int		i, ncomps, result;
3591 	size_t		size;
3592 	int		changefailed = 0;
3593 
3594 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3595 	ASSERT(PM_GET_PM_INFO(dip));
3596 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3597 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3598 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3599 		return (DDI_FAILURE);
3600 	}
3601 	ncomps = PM_NUMCMPTS(dip);
3602 	for (i = 0; i < ncomps; i++) {
3603 		if (pm_set_power(dip, i, normal[i],
3604 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3605 			changefailed++;
3606 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3607 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3608 			    PM_DEVICE(dip), i, normal[i], result))
3609 		}
3610 	}
3611 	kmem_free(normal, size);
3612 	if (changefailed) {
3613 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3614 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3615 		return (DDI_FAILURE);
3616 	}
3617 	return (DDI_SUCCESS);
3618 }
3619 
3620 /*
3621  * Returns true if all components of device are at normal power
3622  */
3623 int
3624 pm_all_at_normal(dev_info_t *dip)
3625 {
3626 	PMD_FUNC(pmf, "all_at_normal")
3627 	int		*normal;
3628 	int		i;
3629 	size_t		size;
3630 
3631 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3632 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3633 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3634 		return (DDI_FAILURE);
3635 	}
3636 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3637 		int current = PM_CURPOWER(dip, i);
3638 		if (normal[i] > current) {
3639 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3640 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3641 			    normal[i], current))
3642 			break;
3643 		}
3644 	}
3645 	kmem_free(normal, size);
3646 	if (i != PM_NUMCMPTS(dip)) {
3647 		return (0);
3648 	}
3649 	return (1);
3650 }
3651 
3652 static void
3653 bring_wekeeps_up(char *keeper)
3654 {
3655 	PMD_FUNC(pmf, "bring_wekeeps_up")
3656 	int i;
3657 	pm_pdr_t *dp;
3658 	pm_info_t *wku_info;
3659 	char *kept_path;
3660 	dev_info_t *kept;
3661 	static void bring_pmdep_up(dev_info_t *, int);
3662 
3663 	if (panicstr) {
3664 		return;
3665 	}
3666 	/*
3667 	 * We process the request even if the keeper detaches because
3668 	 * detach processing expects this to increment kidsupcnt of kept.
3669 	 */
3670 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3671 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3672 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3673 			continue;
3674 		for (i = 0; i < dp->pdr_kept_count; i++) {
3675 			kept_path = dp->pdr_kept_paths[i];
3676 			if (kept_path == NULL)
3677 				continue;
3678 			ASSERT(kept_path[0] != '\0');
3679 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3680 				continue;
3681 			wku_info = PM_GET_PM_INFO(kept);
3682 			if (wku_info == NULL) {
3683 				if (kept)
3684 					ddi_release_devi(kept);
3685 				continue;
3686 			}
3687 			/*
3688 			 * Don't mess with it if it is being detached, it isn't
3689 			 * safe to call its power entry point
3690 			 */
3691 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3692 				if (kept)
3693 					ddi_release_devi(kept);
3694 				continue;
3695 			}
3696 			bring_pmdep_up(kept, 1);
3697 			ddi_release_devi(kept);
3698 		}
3699 	}
3700 }
3701 
3702 /*
3703  * Bring up the 'kept' device passed as argument
3704  */
3705 static void
3706 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3707 {
3708 	PMD_FUNC(pmf, "bring_pmdep_up")
3709 	int is_all_at_normal = 0;
3710 
3711 	/*
3712 	 * If the kept device has been unmanaged, do nothing.
3713 	 */
3714 	if (!PM_GET_PM_INFO(kept_dip))
3715 		return;
3716 
3717 	/* Just ignore DIRECT PM device till they are released. */
3718 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3719 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3720 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3721 		    "controlling process did something else\n", pmf,
3722 		    PM_DEVICE(kept_dip)))
3723 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3724 		return;
3725 	}
3726 	/* if we got here the keeper had a transition from OFF->ON */
3727 	if (hold)
3728 		pm_hold_power(kept_dip);
3729 
3730 	if (!is_all_at_normal)
3731 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3732 }
3733 
3734 /*
3735  * A bunch of stuff that belongs only to the next routine (or two)
3736  */
3737 
3738 static const char namestr[] = "NAME=";
3739 static const int nameln = sizeof (namestr) - 1;
3740 static const char pmcompstr[] = "pm-components";
3741 
3742 struct pm_comp_pkg {
3743 	pm_comp_t		*comp;
3744 	struct pm_comp_pkg	*next;
3745 };
3746 
3747 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3748 
3749 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3750 			((ch) >= 'A' && (ch) <= 'F'))
3751 
3752 /*
3753  * Rather than duplicate this code ...
3754  * (this code excerpted from the function that follows it)
3755  */
3756 #define	FINISH_COMP { \
3757 	ASSERT(compp); \
3758 	compp->pmc_lnames_sz = size; \
3759 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3760 	compp->pmc_numlevels = level; \
3761 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3762 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3763 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3764 	/* copy string out of prop array into buffer */ \
3765 	for (j = 0; j < level; j++) { \
3766 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3767 		compp->pmc_lvals[j] = lvals[j]; \
3768 		(void) strcpy(tp, lnames[j]); \
3769 		compp->pmc_lnames[j] = tp; \
3770 		tp += lszs[j]; \
3771 	} \
3772 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3773 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3774 	}
3775 
3776 /*
3777  * Create (empty) component data structures.
3778  */
3779 static void
3780 e_pm_create_components(dev_info_t *dip, int num_components)
3781 {
3782 	struct pm_component *compp, *ocompp;
3783 	int i, size = 0;
3784 
3785 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3786 	ASSERT(!DEVI(dip)->devi_pm_components);
3787 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3788 	size = sizeof (struct pm_component) * num_components;
3789 
3790 	compp = kmem_zalloc(size, KM_SLEEP);
3791 	ocompp = compp;
3792 	DEVI(dip)->devi_pm_comp_size = size;
3793 	DEVI(dip)->devi_pm_num_components = num_components;
3794 	PM_LOCK_BUSY(dip);
3795 	for (i = 0; i < num_components;  i++) {
3796 		compp->pmc_timestamp = gethrestime_sec();
3797 		compp->pmc_norm_pwr = (uint_t)-1;
3798 		compp++;
3799 	}
3800 	PM_UNLOCK_BUSY(dip);
3801 	DEVI(dip)->devi_pm_components = ocompp;
3802 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3803 }
3804 
3805 /*
3806  * Parse hex or decimal value from char string
3807  */
3808 static char *
3809 pm_parsenum(char *cp, int *valp)
3810 {
3811 	int ch, offset;
3812 	char numbuf[256];
3813 	char *np = numbuf;
3814 	int value = 0;
3815 
3816 	ch = *cp++;
3817 	if (isdigit(ch)) {
3818 		if (ch == '0') {
3819 			if ((ch = *cp++) == 'x' || ch == 'X') {
3820 				ch = *cp++;
3821 				while (isxdigit(ch)) {
3822 					*np++ = (char)ch;
3823 					ch = *cp++;
3824 				}
3825 				*np = 0;
3826 				cp--;
3827 				goto hexval;
3828 			} else {
3829 				goto digit;
3830 			}
3831 		} else {
3832 digit:
3833 			while (isdigit(ch)) {
3834 				*np++ = (char)ch;
3835 				ch = *cp++;
3836 			}
3837 			*np = 0;
3838 			cp--;
3839 			goto decval;
3840 		}
3841 	} else
3842 		return (NULL);
3843 
3844 hexval:
3845 	for (np = numbuf; *np; np++) {
3846 		if (*np >= 'a' && *np <= 'f')
3847 			offset = 'a' - 10;
3848 		else if (*np >= 'A' && *np <= 'F')
3849 			offset = 'A' - 10;
3850 		else if (*np >= '0' && *np <= '9')
3851 			offset = '0';
3852 		value *= 16;
3853 		value += *np - offset;
3854 	}
3855 	*valp = value;
3856 	return (cp);
3857 
3858 decval:
3859 	offset = '0';
3860 	for (np = numbuf; *np; np++) {
3861 		value *= 10;
3862 		value += *np - offset;
3863 	}
3864 	*valp = value;
3865 	return (cp);
3866 }
3867 
3868 /*
3869  * Set max (previously documented as "normal") power.
3870  */
3871 static void
3872 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3873 {
3874 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3875 }
3876 
3877 /*
3878  * Internal routine for destroying components
3879  * It is called even when there might not be any, so it must be forgiving.
3880  */
3881 static void
3882 e_pm_destroy_components(dev_info_t *dip)
3883 {
3884 	int i;
3885 	struct pm_component *cp;
3886 
3887 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3888 	if (PM_NUMCMPTS(dip) == 0)
3889 		return;
3890 	cp = DEVI(dip)->devi_pm_components;
3891 	ASSERT(cp);
3892 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
3893 		int nlevels = cp->pmc_comp.pmc_numlevels;
3894 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
3895 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
3896 		/*
3897 		 * For BC nodes, the rest is static in bc_comp, so skip it
3898 		 */
3899 		if (PM_ISBC(dip))
3900 			continue;
3901 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
3902 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
3903 		kmem_free(cp->pmc_comp.pmc_lname_buf,
3904 				cp->pmc_comp.pmc_lnames_sz);
3905 	}
3906 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
3907 	DEVI(dip)->devi_pm_components = NULL;
3908 	DEVI(dip)->devi_pm_num_components = 0;
3909 	DEVI(dip)->devi_pm_flags &=
3910 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
3911 }
3912 
3913 /*
3914  * Read the pm-components property (if there is one) and use it to set up
3915  * components.  Returns a pointer to an array of component structures if
3916  * pm-components found and successfully parsed, else returns NULL.
3917  * Sets error return *errp to true to indicate a failure (as opposed to no
3918  * property being present).
3919  */
3920 pm_comp_t *
3921 pm_autoconfig(dev_info_t *dip, int *errp)
3922 {
3923 	PMD_FUNC(pmf, "autoconfig")
3924 	uint_t nelems;
3925 	char **pp;
3926 	pm_comp_t *compp = NULL;
3927 	int i, j, level, components = 0;
3928 	size_t size = 0;
3929 	struct pm_comp_pkg *p, *ptail;
3930 	struct pm_comp_pkg *phead = NULL;
3931 	int *lvals = NULL;
3932 	int *lszs = NULL;
3933 	int *np = NULL;
3934 	int npi = 0;
3935 	char **lnames = NULL;
3936 	char *cp, *tp;
3937 	pm_comp_t *ret = NULL;
3938 
3939 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3940 	*errp = 0;	/* assume success */
3941 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3942 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
3943 		return (NULL);
3944 	}
3945 
3946 	if (nelems < 3) {	/* need at least one name and two levels */
3947 		goto errout;
3948 	}
3949 
3950 	/*
3951 	 * pm_create_components is no longer allowed
3952 	 */
3953 	if (PM_NUMCMPTS(dip) != 0) {
3954 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
3955 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
3956 		goto errout;
3957 	}
3958 
3959 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3960 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3961 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
3962 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3963 
3964 	level = 0;
3965 	phead = NULL;
3966 	for (i = 0; i < nelems; i++) {
3967 		cp = pp[i];
3968 		if (!isdigit(*cp)) {	/*  must be name */
3969 			if (strncmp(cp, namestr, nameln) != 0) {
3970 				goto errout;
3971 			}
3972 			if (i != 0) {
3973 				if (level == 0) {	/* no level spec'd */
3974 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
3975 					    pmf))
3976 					goto errout;
3977 				}
3978 				np[npi++] = lvals[level - 1];
3979 				/* finish up previous component levels */
3980 				FINISH_COMP;
3981 			}
3982 			cp += nameln;
3983 			if (!*cp) {
3984 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
3985 				goto errout;
3986 			}
3987 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
3988 			if (phead == NULL) {
3989 				phead = ptail = p;
3990 			} else {
3991 				ptail->next = p;
3992 				ptail = p;
3993 			}
3994 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
3995 			    KM_SLEEP);
3996 			compp->pmc_name_sz = strlen(cp) + 1;
3997 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
3998 			    KM_SLEEP);
3999 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4000 			components++;
4001 			level = 0;
4002 		} else {	/* better be power level <num>=<name> */
4003 #ifdef DEBUG
4004 			tp = cp;
4005 #endif
4006 			if (i == 0 ||
4007 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4008 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4009 				goto errout;
4010 			}
4011 #ifdef DEBUG
4012 			tp = cp;
4013 #endif
4014 			if (*cp++ != '=' || !*cp) {
4015 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4016 				goto errout;
4017 			}
4018 
4019 			lszs[level] = strlen(cp) + 1;
4020 			size += lszs[level];
4021 			lnames[level] = cp;	/* points into prop string */
4022 			level++;
4023 		}
4024 	}
4025 	np[npi++] = lvals[level - 1];
4026 	if (level == 0) {	/* ended with a name */
4027 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4028 		goto errout;
4029 	}
4030 	FINISH_COMP;
4031 
4032 
4033 	/*
4034 	 * Now we have a list of components--we have to return instead an
4035 	 * array of them, but we can just copy the top level and leave
4036 	 * the rest as is
4037 	 */
4038 	(void) e_pm_create_components(dip, components);
4039 	for (i = 0; i < components; i++)
4040 		e_pm_set_max_power(dip, i, np[i]);
4041 
4042 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4043 	for (i = 0, p = phead; i < components; i++) {
4044 		ASSERT(p);
4045 		/*
4046 		 * Now sanity-check values:  levels must be monotonically
4047 		 * increasing
4048 		 */
4049 		if (p->comp->pmc_numlevels < 2) {
4050 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4051 			    "levels\n", pmf,
4052 			    p->comp->pmc_name, PM_DEVICE(dip),
4053 			    p->comp->pmc_numlevels))
4054 			goto errout;
4055 		}
4056 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4057 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4058 			    (p->comp->pmc_lvals[j] <=
4059 			    p->comp->pmc_lvals[j - 1]))) {
4060 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4061 				    "not mono. incr, %d follows %d\n", pmf,
4062 				    p->comp->pmc_name, PM_DEVICE(dip),
4063 				    p->comp->pmc_lvals[j],
4064 				    p->comp->pmc_lvals[j - 1]))
4065 				goto errout;
4066 			}
4067 		}
4068 		ret[i] = *p->comp;	/* struct assignment */
4069 		for (j = 0; j < i; j++) {
4070 			/*
4071 			 * Test for unique component names
4072 			 */
4073 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4074 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4075 				    "unique\n", pmf, ret[j].pmc_name,
4076 				    PM_DEVICE(dip)))
4077 				goto errout;
4078 			}
4079 		}
4080 		ptail = p;
4081 		p = p->next;
4082 		phead = p;	/* errout depends on phead making sense */
4083 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4084 		kmem_free(ptail, sizeof (*ptail));
4085 	}
4086 out:
4087 	ddi_prop_free(pp);
4088 	if (lvals)
4089 		kmem_free(lvals, nelems * sizeof (int));
4090 	if (lszs)
4091 		kmem_free(lszs, nelems * sizeof (int));
4092 	if (lnames)
4093 		kmem_free(lnames, nelems * sizeof (char *));
4094 	if (np)
4095 		kmem_free(np, nelems * sizeof (int));
4096 	return (ret);
4097 
4098 errout:
4099 	e_pm_destroy_components(dip);
4100 	*errp = 1;	/* signal failure */
4101 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4102 	for (i = 0; i < nelems - 1; i++)
4103 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4104 	if (nelems != 0)
4105 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4106 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4107 	for (p = phead; p; ) {
4108 		pm_comp_t *pp;
4109 		int n;
4110 
4111 		ptail = p;
4112 		/*
4113 		 * Free component data structures
4114 		 */
4115 		pp = p->comp;
4116 		n = pp->pmc_numlevels;
4117 		if (pp->pmc_name_sz) {
4118 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4119 		}
4120 		if (pp->pmc_lnames_sz) {
4121 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4122 		}
4123 		if (pp->pmc_lnames) {
4124 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4125 		}
4126 		if (pp->pmc_thresh) {
4127 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4128 		}
4129 		if (pp->pmc_lvals) {
4130 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4131 		}
4132 		p = ptail->next;
4133 		kmem_free(ptail, sizeof (*ptail));
4134 	}
4135 	if (ret != NULL)
4136 		kmem_free(ret, components * sizeof (pm_comp_t));
4137 	ret = NULL;
4138 	goto out;
4139 }
4140 
4141 /*
4142  * Set threshold values for a devices components by dividing the target
4143  * threshold (base) by the number of transitions and assign each transition
4144  * that threshold.  This will get the entire device down in the target time if
4145  * all components are idle and even if there are dependencies among components.
4146  *
4147  * Devices may well get powered all the way down before the target time, but
4148  * at least the EPA will be happy.
4149  */
4150 void
4151 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4152 {
4153 	PMD_FUNC(pmf, "set_device_threshold")
4154 	int target_threshold = (base * 95) / 100;
4155 	int level, comp;		/* loop counters */
4156 	int transitions = 0;
4157 	int ncomp = PM_NUMCMPTS(dip);
4158 	int thresh;
4159 	int remainder;
4160 	pm_comp_t *pmc;
4161 	int i, circ;
4162 
4163 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4164 	PM_LOCK_DIP(dip);
4165 	/*
4166 	 * First we handle the easy one.  If we're setting the default
4167 	 * threshold for a node with children, then we set it to the
4168 	 * default nexus threshold (currently 0) and mark it as default
4169 	 * nexus threshold instead
4170 	 */
4171 	if (PM_IS_NEXUS(dip)) {
4172 		if (flag == PMC_DEF_THRESH) {
4173 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4174 			    PM_DEVICE(dip)))
4175 			thresh = pm_default_nexus_threshold;
4176 			for (comp = 0; comp < ncomp; comp++) {
4177 				pmc = &PM_CP(dip, comp)->pmc_comp;
4178 				for (level = 1; level < pmc->pmc_numlevels;
4179 				    level++) {
4180 					pmc->pmc_thresh[level] = thresh;
4181 				}
4182 			}
4183 			DEVI(dip)->devi_pm_dev_thresh =
4184 			    pm_default_nexus_threshold;
4185 			/*
4186 			 * If the nexus node is being reconfigured back to
4187 			 * the default threshold, adjust the notlowest count.
4188 			 */
4189 			if (DEVI(dip)->devi_pm_flags &
4190 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4191 				PM_LOCK_POWER(dip, &circ);
4192 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4193 					if (PM_CURPOWER(dip, i) == 0)
4194 						continue;
4195 					mutex_enter(&pm_compcnt_lock);
4196 					ASSERT(pm_comps_notlowest);
4197 					pm_comps_notlowest--;
4198 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4199 					    "notlowest to %d\n", pmf,
4200 					    PM_DEVICE(dip), pm_comps_notlowest))
4201 					if (pm_comps_notlowest == 0)
4202 						pm_ppm_notify_all_lowest(dip,
4203 						    PM_ALL_LOWEST);
4204 					mutex_exit(&pm_compcnt_lock);
4205 				}
4206 				PM_UNLOCK_POWER(dip, circ);
4207 			}
4208 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4209 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4210 			PM_UNLOCK_DIP(dip);
4211 			return;
4212 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4213 			/*
4214 			 * If the nexus node is being configured for a
4215 			 * non-default threshold, include that node in
4216 			 * the notlowest accounting.
4217 			 */
4218 			PM_LOCK_POWER(dip, &circ);
4219 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4220 				if (PM_CURPOWER(dip, i) == 0)
4221 					continue;
4222 				mutex_enter(&pm_compcnt_lock);
4223 				if (pm_comps_notlowest == 0)
4224 					pm_ppm_notify_all_lowest(dip,
4225 					    PM_NOT_ALL_LOWEST);
4226 				pm_comps_notlowest++;
4227 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4228 				    "notlowest to %d\n", pmf,
4229 				    PM_DEVICE(dip), pm_comps_notlowest))
4230 				mutex_exit(&pm_compcnt_lock);
4231 			}
4232 			PM_UNLOCK_POWER(dip, circ);
4233 		}
4234 	}
4235 	/*
4236 	 * Compute the total number of transitions for all components
4237 	 * of the device.  Distribute the threshold evenly over them
4238 	 */
4239 	for (comp = 0; comp < ncomp; comp++) {
4240 		pmc = &PM_CP(dip, comp)->pmc_comp;
4241 		ASSERT(pmc->pmc_numlevels > 1);
4242 		transitions += pmc->pmc_numlevels - 1;
4243 	}
4244 	ASSERT(transitions);
4245 	thresh = target_threshold / transitions;
4246 
4247 	for (comp = 0; comp < ncomp; comp++) {
4248 		pmc = &PM_CP(dip, comp)->pmc_comp;
4249 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4250 			pmc->pmc_thresh[level] = thresh;
4251 		}
4252 	}
4253 
4254 #ifdef DEBUG
4255 	for (comp = 0; comp < ncomp; comp++) {
4256 		pmc = &PM_CP(dip, comp)->pmc_comp;
4257 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4258 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4259 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4260 			    comp, level, pmc->pmc_thresh[level]))
4261 		}
4262 	}
4263 #endif
4264 	/*
4265 	 * Distribute any remainder till they are all gone
4266 	 */
4267 	remainder = target_threshold - thresh * transitions;
4268 	level = 1;
4269 #ifdef DEBUG
4270 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4271 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4272 	    transitions))
4273 #endif
4274 	while (remainder > 0) {
4275 		comp = 0;
4276 		while (remainder && (comp < ncomp)) {
4277 			pmc = &PM_CP(dip, comp)->pmc_comp;
4278 			if (level < pmc->pmc_numlevels) {
4279 				pmc->pmc_thresh[level] += 1;
4280 				remainder--;
4281 			}
4282 			comp++;
4283 		}
4284 		level++;
4285 	}
4286 #ifdef DEBUG
4287 	for (comp = 0; comp < ncomp; comp++) {
4288 		pmc = &PM_CP(dip, comp)->pmc_comp;
4289 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4290 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4291 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4292 			    comp, level, pmc->pmc_thresh[level]))
4293 		}
4294 	}
4295 #endif
4296 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4297 	DEVI(dip)->devi_pm_dev_thresh = base;
4298 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4299 	DEVI(dip)->devi_pm_flags |= flag;
4300 	PM_UNLOCK_DIP(dip);
4301 }
4302 
4303 /*
4304  * Called when there is no old-style platform power management driver
4305  */
4306 static int
4307 ddi_no_platform_power(power_req_t *req)
4308 {
4309 	_NOTE(ARGUNUSED(req))
4310 	return (DDI_FAILURE);
4311 }
4312 
4313 /*
4314  * This function calls the entry point supplied by the platform-specific
4315  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4316  * The use of global for getting the  function name from platform-specific
4317  * pm driver is not ideal, but it is simple and efficient.
4318  * The previous property lookup was being done in the idle loop on swift
4319  * systems without pmc chips and hurt deskbench performance as well as
4320  * violating scheduler locking rules
4321  */
4322 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4323 
4324 /*
4325  * Old obsolete interface for a device to request a power change (but only
4326  * an increase in power)
4327  */
4328 int
4329 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4330 {
4331 	return (pm_raise_power(dip, cmpt, level));
4332 }
4333 
4334 /*
4335  * The old obsolete interface to platform power management.  Only used by
4336  * Gypsy platform and APM on X86.
4337  */
4338 int
4339 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4340 {
4341 	power_req_t	request;
4342 
4343 	request.request_type = PMR_SET_POWER;
4344 	request.req.set_power_req.who = dip;
4345 	request.req.set_power_req.cmpt = pm_cmpt;
4346 	request.req.set_power_req.level = pm_level;
4347 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4348 }
4349 
4350 /*
4351  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4352  * passed.  Returns true if subsequent processing could result in power being
4353  * removed from the device.  The arg is not currently used because it is
4354  * implicit in the operation of cpr/DR.
4355  */
4356 int
4357 ddi_removing_power(dev_info_t *dip)
4358 {
4359 	_NOTE(ARGUNUSED(dip))
4360 	return (pm_powering_down);
4361 }
4362 
4363 /*
4364  * Returns true if a device indicates that its parent handles suspend/resume
4365  * processing for it.
4366  */
4367 int
4368 e_ddi_parental_suspend_resume(dev_info_t *dip)
4369 {
4370 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4371 }
4372 
4373 /*
4374  * Called for devices which indicate that their parent does suspend/resume
4375  * handling for them
4376  */
4377 int
4378 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4379 {
4380 	power_req_t	request;
4381 	request.request_type = PMR_SUSPEND;
4382 	request.req.suspend_req.who = dip;
4383 	request.req.suspend_req.cmd = cmd;
4384 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4385 }
4386 
4387 /*
4388  * Called for devices which indicate that their parent does suspend/resume
4389  * handling for them
4390  */
4391 int
4392 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4393 {
4394 	power_req_t	request;
4395 	request.request_type = PMR_RESUME;
4396 	request.req.resume_req.who = dip;
4397 	request.req.resume_req.cmd = cmd;
4398 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4399 }
4400 
4401 /*
4402  * Old obsolete exported interface for drivers to create components.
4403  * This is now handled by exporting the pm-components property.
4404  */
4405 int
4406 pm_create_components(dev_info_t *dip, int num_components)
4407 {
4408 	PMD_FUNC(pmf, "pm_create_components")
4409 
4410 	if (num_components < 1)
4411 		return (DDI_FAILURE);
4412 
4413 	if (!DEVI_IS_ATTACHING(dip)) {
4414 		return (DDI_FAILURE);
4415 	}
4416 
4417 	/* don't need to lock dip because attach is single threaded */
4418 	if (DEVI(dip)->devi_pm_components) {
4419 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4420 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4421 		return (DDI_FAILURE);
4422 	}
4423 	e_pm_create_components(dip, num_components);
4424 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4425 	e_pm_default_components(dip, num_components);
4426 	return (DDI_SUCCESS);
4427 }
4428 
4429 /*
4430  * Obsolete interface previously called by drivers to destroy their components
4431  * at detach time.  This is now done automatically.  However, we need to keep
4432  * this for the old drivers.
4433  */
4434 void
4435 pm_destroy_components(dev_info_t *dip)
4436 {
4437 	PMD_FUNC(pmf, "pm_destroy_components")
4438 	dev_info_t *pdip = ddi_get_parent(dip);
4439 
4440 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4441 	    PM_DEVICE(dip)))
4442 	ASSERT(DEVI_IS_DETACHING(dip));
4443 #ifdef DEBUG
4444 	if (!PM_ISBC(dip))
4445 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4446 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4447 		    PM_ADDR(dip));
4448 #endif
4449 	/*
4450 	 * We ignore this unless this is an old-style driver, except for
4451 	 * printing the message above
4452 	 */
4453 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4454 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4455 		    PM_DEVICE(dip)))
4456 		return;
4457 	}
4458 	ASSERT(PM_GET_PM_INFO(dip));
4459 
4460 	/*
4461 	 * pm_unmanage will clear info pointer later, after dealing with
4462 	 * dependencies
4463 	 */
4464 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4465 	/*
4466 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4467 	 * Parents that get notification are not adjusted because their
4468 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4469 	 */
4470 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4471 		pm_rele_power(pdip);
4472 #ifdef DEBUG
4473 	else {
4474 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4475 		    pmf, PM_DEVICE(dip)))
4476 	}
4477 #endif
4478 	e_pm_destroy_components(dip);
4479 	/*
4480 	 * Forget we ever knew anything about the components of this  device
4481 	 */
4482 	DEVI(dip)->devi_pm_flags &=
4483 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4484 }
4485 
4486 /*
4487  * Exported interface for a driver to set a component busy.
4488  */
4489 int
4490 pm_busy_component(dev_info_t *dip, int cmpt)
4491 {
4492 	struct pm_component *cp;
4493 
4494 	ASSERT(dip != NULL);
4495 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4496 		return (DDI_FAILURE);
4497 	PM_LOCK_BUSY(dip);
4498 	cp->pmc_busycount++;
4499 	cp->pmc_timestamp = 0;
4500 	PM_UNLOCK_BUSY(dip);
4501 	return (DDI_SUCCESS);
4502 }
4503 
4504 /*
4505  * Exported interface for a driver to set a component idle.
4506  */
4507 int
4508 pm_idle_component(dev_info_t *dip, int cmpt)
4509 {
4510 	PMD_FUNC(pmf, "pm_idle_component")
4511 	struct pm_component *cp;
4512 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4513 
4514 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4515 		return (DDI_FAILURE);
4516 
4517 	PM_LOCK_BUSY(dip);
4518 	if (cp->pmc_busycount) {
4519 		if (--(cp->pmc_busycount) == 0)
4520 			cp->pmc_timestamp = gethrestime_sec();
4521 	} else {
4522 		cp->pmc_timestamp = gethrestime_sec();
4523 	}
4524 
4525 	PM_UNLOCK_BUSY(dip);
4526 
4527 	/*
4528 	 * if device becomes idle during idle down period, try scan it down
4529 	 */
4530 	if (scanp && PM_IS_PID(dip)) {
4531 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4532 		    PM_DEVICE(dip)))
4533 		pm_rescan(dip);
4534 		return (DDI_SUCCESS);
4535 	}
4536 
4537 	/*
4538 	 * handle scan not running with nexus threshold == 0
4539 	 */
4540 
4541 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4542 		pm_rescan(dip);
4543 	}
4544 
4545 	return (DDI_SUCCESS);
4546 }
4547 
4548 /*
4549  * This is the old  obsolete interface called by drivers to set their normal
4550  * power.  Thus we can't fix its behavior or return a value.
4551  * This functionality is replaced by the pm-component property.
4552  * We'll only get components destroyed while no power management is
4553  * going on (and the device is detached), so we don't need a mutex here
4554  */
4555 void
4556 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4557 {
4558 	PMD_FUNC(pmf, "set_normal_power")
4559 #ifdef DEBUG
4560 	if (!PM_ISBC(dip))
4561 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4562 		    "(driver exporting pm-components property) ignored",
4563 		    PM_NAME(dip), PM_ADDR(dip));
4564 #endif
4565 	if (PM_ISBC(dip)) {
4566 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4567 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4568 		e_pm_set_max_power(dip, comp, level);
4569 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4570 	}
4571 }
4572 
4573 /*
4574  * Called on a successfully detached driver to free pm resources
4575  */
4576 static void
4577 pm_stop(dev_info_t *dip)
4578 {
4579 	PMD_FUNC(pmf, "stop")
4580 	dev_info_t *pdip = ddi_get_parent(dip);
4581 
4582 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4583 	/* stopping scan, destroy scan data structure */
4584 	if (!PM_ISBC(dip)) {
4585 		pm_scan_stop(dip);
4586 		pm_scan_fini(dip);
4587 	}
4588 
4589 	if (PM_GET_PM_INFO(dip) != NULL) {
4590 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4591 			/*
4592 			 * Old style driver may have called
4593 			 * pm_destroy_components already, but just in case ...
4594 			 */
4595 			e_pm_destroy_components(dip);
4596 		} else {
4597 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4598 			    pmf, PM_DEVICE(dip)))
4599 		}
4600 	} else {
4601 		if (PM_NUMCMPTS(dip))
4602 			e_pm_destroy_components(dip);
4603 		else {
4604 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4605 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4606 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4607 					pm_rele_power(pdip);
4608 				} else if (pdip && MDI_VHCI(pdip)) {
4609 					(void) mdi_power(pdip,
4610 					    MDI_PM_RELE_POWER,
4611 					    (void *)dip, NULL, 0);
4612 				}
4613 			}
4614 		}
4615 	}
4616 }
4617 
4618 /*
4619  * The node is the subject of a reparse pm props ioctl. Throw away the old
4620  * info and start over.
4621  */
4622 int
4623 e_new_pm_props(dev_info_t *dip)
4624 {
4625 	if (PM_GET_PM_INFO(dip) != NULL) {
4626 		pm_stop(dip);
4627 
4628 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4629 			return (DDI_FAILURE);
4630 		}
4631 	}
4632 	e_pm_props(dip);
4633 	return (DDI_SUCCESS);
4634 }
4635 
4636 /*
4637  * Device has been attached, so process its pm properties
4638  */
4639 void
4640 e_pm_props(dev_info_t *dip)
4641 {
4642 	char *pp;
4643 	int len;
4644 	int flags = 0;
4645 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4646 
4647 	/*
4648 	 * It doesn't matter if we do this more than once, we should always
4649 	 * get the same answers, and if not, then the last one in is the
4650 	 * best one.
4651 	 */
4652 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4653 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4654 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4655 			flags = PMC_NEEDS_SR;
4656 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4657 			flags = PMC_NO_SR;
4658 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4659 			flags = PMC_PARENTAL_SR;
4660 		} else {
4661 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4662 			    "%s property value '%s'", PM_NAME(dip),
4663 			    PM_ADDR(dip), "pm-hardware-state", pp);
4664 		}
4665 		kmem_free(pp, len);
4666 	}
4667 	/*
4668 	 * This next segment (PMC_WANTS_NOTIFY) is in
4669 	 * support of nexus drivers which will want to be involved in
4670 	 * (or at least notified of) their child node's power level transitions.
4671 	 * "pm-want-child-notification?" is defined by the parent.
4672 	 */
4673 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4674 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4675 		flags |= PMC_WANTS_NOTIFY;
4676 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4677 	    dip, propflag, "pm-want-child-notification?"));
4678 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4679 	    "no-involuntary-power-cycles"))
4680 		flags |= PMC_NO_INVOL;
4681 	/* devfs single threads us */
4682 	DEVI(dip)->devi_pm_flags |= flags;
4683 }
4684 
4685 /*
4686  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4687  * driver which has claimed a node.
4688  * Sets old_power in arg struct.
4689  */
4690 static int
4691 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4692     ddi_ctl_enum_t ctlop, void *arg, void *result)
4693 {
4694 	_NOTE(ARGUNUSED(dip))
4695 	PMD_FUNC(pmf, "ctlops")
4696 	power_req_t *reqp = (power_req_t *)arg;
4697 	int retval;
4698 	dev_info_t *target_dip;
4699 	int new_level, old_level, cmpt;
4700 #ifdef DEBUG
4701 	char *format;
4702 #endif
4703 
4704 	/*
4705 	 * The interface for doing the actual power level changes is now
4706 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4707 	 * different platform-specific power control drivers.
4708 	 *
4709 	 * This driver implements the "default" version of this interface.
4710 	 * If no ppm driver has been installed then this interface is called
4711 	 * instead.
4712 	 */
4713 	ASSERT(dip == NULL);
4714 	switch (ctlop) {
4715 	case DDI_CTLOPS_POWER:
4716 		switch (reqp->request_type) {
4717 		case PMR_PPM_SET_POWER:
4718 		{
4719 			target_dip = reqp->req.ppm_set_power_req.who;
4720 			ASSERT(target_dip == rdip);
4721 			new_level = reqp->req.ppm_set_power_req.new_level;
4722 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4723 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4724 			old_level = PM_CURPOWER(target_dip, cmpt);
4725 			reqp->req.ppm_set_power_req.old_level = old_level;
4726 			retval = pm_power(target_dip, cmpt, new_level);
4727 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4728 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4729 			    old_level, new_level, (retval == DDI_SUCCESS ?
4730 			    "chd" : "no chg")))
4731 			return (retval);
4732 		}
4733 
4734 		case PMR_PPM_PRE_DETACH:
4735 		case PMR_PPM_POST_DETACH:
4736 		case PMR_PPM_PRE_ATTACH:
4737 		case PMR_PPM_POST_ATTACH:
4738 		case PMR_PPM_PRE_PROBE:
4739 		case PMR_PPM_POST_PROBE:
4740 		case PMR_PPM_PRE_RESUME:
4741 		case PMR_PPM_INIT_CHILD:
4742 		case PMR_PPM_UNINIT_CHILD:
4743 #ifdef DEBUG
4744 			switch (reqp->request_type) {
4745 				case PMR_PPM_PRE_DETACH:
4746 					format = "%s: PMR_PPM_PRE_DETACH "
4747 					    "%s@%s(%s#%d)\n";
4748 					break;
4749 				case PMR_PPM_POST_DETACH:
4750 					format = "%s: PMR_PPM_POST_DETACH "
4751 					    "%s@%s(%s#%d) rets %d\n";
4752 					break;
4753 				case PMR_PPM_PRE_ATTACH:
4754 					format = "%s: PMR_PPM_PRE_ATTACH "
4755 					    "%s@%s(%s#%d)\n";
4756 					break;
4757 				case PMR_PPM_POST_ATTACH:
4758 					format = "%s: PMR_PPM_POST_ATTACH "
4759 					    "%s@%s(%s#%d) rets %d\n";
4760 					break;
4761 				case PMR_PPM_PRE_PROBE:
4762 					format = "%s: PMR_PPM_PRE_PROBE "
4763 					    "%s@%s(%s#%d)\n";
4764 					break;
4765 				case PMR_PPM_POST_PROBE:
4766 					format = "%s: PMR_PPM_POST_PROBE "
4767 					    "%s@%s(%s#%d) rets %d\n";
4768 					break;
4769 				case PMR_PPM_PRE_RESUME:
4770 					format = "%s: PMR_PPM_PRE_RESUME "
4771 					    "%s@%s(%s#%d) rets %d\n";
4772 					break;
4773 				case PMR_PPM_INIT_CHILD:
4774 					format = "%s: PMR_PPM_INIT_CHILD "
4775 					    "%s@%s(%s#%d)\n";
4776 					break;
4777 				case PMR_PPM_UNINIT_CHILD:
4778 					format = "%s: PMR_PPM_UNINIT_CHILD "
4779 					    "%s@%s(%s#%d)\n";
4780 					break;
4781 				default:
4782 					break;
4783 			}
4784 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4785 			    reqp->req.ppm_config_req.result))
4786 #endif
4787 			return (DDI_SUCCESS);
4788 
4789 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4790 			/*
4791 			 * Nothing for us to do
4792 			 */
4793 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4794 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4795 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4796 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4797 			    reqp->req.ppm_notify_level_req.cmpt,
4798 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4799 			    reqp->req.ppm_notify_level_req.cmpt),
4800 			    reqp->req.ppm_notify_level_req.new_level))
4801 			return (DDI_SUCCESS);
4802 
4803 		case PMR_PPM_UNMANAGE:
4804 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4805 			    pmf, PM_DEVICE(rdip)))
4806 			return (DDI_SUCCESS);
4807 
4808 		case PMR_PPM_LOCK_POWER:
4809 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4810 			    reqp->req.ppm_lock_power_req.circp);
4811 			return (DDI_SUCCESS);
4812 
4813 		case PMR_PPM_UNLOCK_POWER:
4814 			pm_unlock_power_single(
4815 			    reqp->req.ppm_unlock_power_req.who,
4816 			    reqp->req.ppm_unlock_power_req.circ);
4817 			return (DDI_SUCCESS);
4818 
4819 		case PMR_PPM_TRY_LOCK_POWER:
4820 			*(int *)result = pm_try_locking_power_single(
4821 			    reqp->req.ppm_lock_power_req.who,
4822 			    reqp->req.ppm_lock_power_req.circp);
4823 			return (DDI_SUCCESS);
4824 
4825 		case PMR_PPM_POWER_LOCK_OWNER:
4826 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4827 			ASSERT(target_dip == rdip);
4828 			reqp->req.ppm_power_lock_owner_req.owner =
4829 			    DEVI(rdip)->devi_busy_thread;
4830 			return (DDI_SUCCESS);
4831 		default:
4832 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4833 			return (DDI_FAILURE);
4834 		}
4835 
4836 	default:
4837 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4838 		return (DDI_FAILURE);
4839 	}
4840 }
4841 
4842 /*
4843  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4844  * power_ops struct for this functionality instead?
4845  * However, we only ever do this on a ppm driver.
4846  */
4847 int
4848 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4849 {
4850 	int (*fp)();
4851 
4852 	/* if no ppm handler, call the default routine */
4853 	if (d == NULL) {
4854 		return (pm_default_ctlops(d, r, op, a, v));
4855 	}
4856 	if (!d || !r)
4857 		return (DDI_FAILURE);
4858 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4859 		DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4860 
4861 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4862 	return ((*fp)(d, r, op, a, v));
4863 }
4864 
4865 /*
4866  * Called on a node when attach completes or the driver makes its first pm
4867  * call (whichever comes first).
4868  * In the attach case, device may not be power manageable at all.
4869  * Don't need to lock the dip because we're single threaded by the devfs code
4870  */
4871 static int
4872 pm_start(dev_info_t *dip)
4873 {
4874 	PMD_FUNC(pmf, "start")
4875 	int ret;
4876 	dev_info_t *pdip = ddi_get_parent(dip);
4877 	int e_pm_manage(dev_info_t *, int);
4878 	void pm_noinvol_specd(dev_info_t *dip);
4879 
4880 	e_pm_props(dip);
4881 	pm_noinvol_specd(dip);
4882 	/*
4883 	 * If this dip has already been processed, don't mess with it
4884 	 * (but decrement the speculative count we did above, as whatever
4885 	 * code put it under pm already will have dealt with it)
4886 	 */
4887 	if (PM_GET_PM_INFO(dip)) {
4888 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
4889 		    pmf, PM_DEVICE(dip)))
4890 		return (0);
4891 	}
4892 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
4893 
4894 	if (PM_GET_PM_INFO(dip) == NULL) {
4895 		/*
4896 		 * keep the kidsupcount increment as is
4897 		 */
4898 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
4899 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4900 			pm_hold_power(pdip);
4901 		} else if (pdip && MDI_VHCI(pdip)) {
4902 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
4903 			    (void *)dip, NULL, 0);
4904 		}
4905 
4906 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
4907 		    "left up\n", pmf, PM_DEVICE(dip)))
4908 	}
4909 
4910 	return (ret);
4911 }
4912 
4913 /*
4914  * Keep a list of recorded thresholds.  For now we just keep a list and
4915  * search it linearly.  We don't expect too many entries.  Can always hash it
4916  * later if we need to.
4917  */
4918 void
4919 pm_record_thresh(pm_thresh_rec_t *rp)
4920 {
4921 	pm_thresh_rec_t *pptr, *ptr;
4922 
4923 	ASSERT(*rp->ptr_physpath);
4924 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
4925 	for (pptr = NULL, ptr = pm_thresh_head;
4926 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
4927 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
4928 			/* replace this one */
4929 			rp->ptr_next = ptr->ptr_next;
4930 			if (pptr) {
4931 				pptr->ptr_next = rp;
4932 			} else {
4933 				pm_thresh_head = rp;
4934 			}
4935 			rw_exit(&pm_thresh_rwlock);
4936 			kmem_free(ptr, ptr->ptr_size);
4937 			return;
4938 		}
4939 		continue;
4940 	}
4941 	/*
4942 	 * There was not a match in the list, insert this one in front
4943 	 */
4944 	if (pm_thresh_head) {
4945 		rp->ptr_next = pm_thresh_head;
4946 		pm_thresh_head = rp;
4947 	} else {
4948 		rp->ptr_next = NULL;
4949 		pm_thresh_head = rp;
4950 	}
4951 	rw_exit(&pm_thresh_rwlock);
4952 }
4953 
4954 /*
4955  * Create a new dependency record and hang a new dependency entry off of it
4956  */
4957 pm_pdr_t *
4958 newpdr(char *kept, char *keeps, int isprop)
4959 {
4960 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
4961 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
4962 	p->pdr_size = size;
4963 	p->pdr_isprop = isprop;
4964 	p->pdr_kept_paths = NULL;
4965 	p->pdr_kept_count = 0;
4966 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
4967 	(void) strcpy(p->pdr_kept, kept);
4968 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
4969 	(void) strcpy(p->pdr_keeper, keeps);
4970 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
4971 	    (intptr_t)p + size);
4972 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
4973 	    (intptr_t)p + size);
4974 	return (p);
4975 }
4976 
4977 /*
4978  * Keep a list of recorded dependencies.  We only keep the
4979  * keeper -> kept list for simplification. At this point We do not
4980  * care about whether the devices are attached or not yet,
4981  * this would be done in pm_keeper() and pm_kept().
4982  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
4983  * and it is up to the user to issue the ioctl again if they want it
4984  * (e.g. pmconfig)
4985  * Returns true if dependency already exists in the list.
4986  */
4987 int
4988 pm_record_keeper(char *kept, char *keeper, int isprop)
4989 {
4990 	PMD_FUNC(pmf, "record_keeper")
4991 	pm_pdr_t *npdr, *ppdr, *pdr;
4992 
4993 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
4994 	ASSERT(kept && keeper);
4995 #ifdef DEBUG
4996 	if (pm_debug & PMD_KEEPS)
4997 		prdeps("pm_record_keeper entry");
4998 #endif
4999 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5000 	    ppdr = pdr, pdr = pdr->pdr_next) {
5001 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5002 		    pdr->pdr_keeper))
5003 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5004 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5005 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5006 			return (1);
5007 		}
5008 	}
5009 	/*
5010 	 * We did not find any match, so we have to make an entry
5011 	 */
5012 	npdr = newpdr(kept, keeper, isprop);
5013 	if (ppdr) {
5014 		ASSERT(ppdr->pdr_next == NULL);
5015 		ppdr->pdr_next = npdr;
5016 	} else {
5017 		ASSERT(pm_dep_head == NULL);
5018 		pm_dep_head = npdr;
5019 	}
5020 #ifdef DEBUG
5021 	if (pm_debug & PMD_KEEPS)
5022 		prdeps("pm_record_keeper after new record");
5023 #endif
5024 	if (!isprop)
5025 		pm_unresolved_deps++;
5026 	else
5027 		pm_prop_deps++;
5028 	return (0);
5029 }
5030 
5031 /*
5032  * Look up this device in the set of devices we've seen ioctls for
5033  * to see if we are holding a threshold spec for it.  If so, make it so.
5034  * At ioctl time, we were given the physical path of the device.
5035  */
5036 int
5037 pm_thresh_specd(dev_info_t *dip)
5038 {
5039 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5040 	char *path = 0;
5041 	char pathbuf[MAXNAMELEN];
5042 	pm_thresh_rec_t *rp;
5043 
5044 	path = ddi_pathname(dip, pathbuf);
5045 
5046 	rw_enter(&pm_thresh_rwlock, RW_READER);
5047 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5048 		if (strcmp(rp->ptr_physpath, path) != 0)
5049 			continue;
5050 		pm_apply_recorded_thresh(dip, rp);
5051 		rw_exit(&pm_thresh_rwlock);
5052 		return (1);
5053 	}
5054 	rw_exit(&pm_thresh_rwlock);
5055 	return (0);
5056 }
5057 
5058 static int
5059 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5060 {
5061 	PMD_FUNC(pmf, "set_keeping")
5062 	pm_info_t *kept_info;
5063 	int j, up = 0, circ;
5064 	void prdeps(char *);
5065 
5066 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5067 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5068 #ifdef DEBUG
5069 	if (pm_debug & PMD_KEEPS)
5070 		prdeps("Before PAD\n");
5071 #endif
5072 	ASSERT(keeper != kept);
5073 	if (PM_GET_PM_INFO(keeper) == NULL) {
5074 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5075 		    "%s@%s(%s#%d), but the latter is not power managed",
5076 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5077 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5078 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5079 		return (0);
5080 	}
5081 	kept_info = PM_GET_PM_INFO(kept);
5082 	ASSERT(kept_info);
5083 	PM_LOCK_POWER(keeper, &circ);
5084 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5085 		if (PM_CURPOWER(keeper, j)) {
5086 			up++;
5087 			break;
5088 		}
5089 	}
5090 	if (up) {
5091 		/* Bringup and maintain a hold on the kept */
5092 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5093 		    PM_DEVICE(kept)))
5094 		bring_pmdep_up(kept, 1);
5095 	}
5096 	PM_UNLOCK_POWER(keeper, circ);
5097 #ifdef DEBUG
5098 	if (pm_debug & PMD_KEEPS)
5099 		prdeps("After PAD\n");
5100 #endif
5101 	return (1);
5102 }
5103 
5104 /*
5105  * Should this device keep up another device?
5106  * Look up this device in the set of devices we've seen ioctls for
5107  * to see if we are holding a dependency spec for it.  If so, make it so.
5108  * Because we require the kept device to be attached already in order to
5109  * make the list entry (and hold it), we only need to look for keepers.
5110  * At ioctl time, we were given the physical path of the device.
5111  */
5112 int
5113 pm_keeper(char *keeper)
5114 {
5115 	PMD_FUNC(pmf, "keeper")
5116 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5117 	dev_info_t *dip;
5118 	pm_pdr_t *dp;
5119 	dev_info_t *kept = NULL;
5120 	int ret = 0;
5121 	int i;
5122 
5123 	if (!pm_unresolved_deps && !pm_prop_deps)
5124 		return (0);
5125 	ASSERT(keeper != NULL);
5126 	dip = pm_name_to_dip(keeper, 1);
5127 	if (dip == NULL)
5128 		return (0);
5129 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5130 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5131 		if (!dp->pdr_isprop) {
5132 			if (!pm_unresolved_deps)
5133 				continue;
5134 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5135 			if (dp->pdr_satisfied) {
5136 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5137 				continue;
5138 			}
5139 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5140 				ret += pm_apply_recorded_dep(dip, dp);
5141 			}
5142 		} else {
5143 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5144 				continue;
5145 			for (i = 0; i < dp->pdr_kept_count; i++) {
5146 				if (dp->pdr_kept_paths[i] == NULL)
5147 					continue;
5148 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5149 				if (kept == NULL)
5150 					continue;
5151 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5152 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5153 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5154 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5155 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5156 				    dp->pdr_kept_count))
5157 				if (kept != dip) {
5158 					ret += pm_set_keeping(dip, kept);
5159 				}
5160 				ddi_release_devi(kept);
5161 			}
5162 
5163 		}
5164 	}
5165 	ddi_release_devi(dip);
5166 	return (ret);
5167 }
5168 
5169 /*
5170  * Should this device be kept up by another device?
5171  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5172  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5173  * kept device lists.
5174  */
5175 static int
5176 pm_kept(char *keptp)
5177 {
5178 	PMD_FUNC(pmf, "kept")
5179 	pm_pdr_t *dp;
5180 	int found = 0;
5181 	int ret = 0;
5182 	dev_info_t *keeper;
5183 	dev_info_t *kept;
5184 	size_t length;
5185 	int i;
5186 	char **paths;
5187 	char *path;
5188 
5189 	ASSERT(keptp != NULL);
5190 	kept = pm_name_to_dip(keptp, 1);
5191 	if (kept == NULL)
5192 		return (0);
5193 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5194 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5195 		if (dp->pdr_isprop) {
5196 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5197 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5198 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5199 				/*
5200 				 * Dont allow self dependency.
5201 				 */
5202 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5203 					continue;
5204 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5205 				if (keeper == NULL)
5206 					continue;
5207 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5208 				    "%p\n", pmf, (void *)kept))
5209 #ifdef DEBUG
5210 				if (pm_debug & PMD_DEP)
5211 					prdeps("Before Adding from pm_kept\n");
5212 #endif
5213 				/*
5214 				 * Add ourselves to the dip list.
5215 				 */
5216 				if (dp->pdr_kept_count == 0) {
5217 					length = strlen(keptp) + 1;
5218 					path =
5219 					    kmem_alloc(length, KM_SLEEP);
5220 					paths = kmem_alloc(sizeof (char **),
5221 						    KM_SLEEP);
5222 					(void) strcpy(path, keptp);
5223 					paths[0] = path;
5224 					dp->pdr_kept_paths = paths;
5225 					dp->pdr_kept_count++;
5226 				} else {
5227 					/* Check to see if already on list */
5228 					for (i = 0; i < dp->pdr_kept_count;
5229 					    i++) {
5230 						if (strcmp(keptp,
5231 						    dp->pdr_kept_paths[i])
5232 						    == 0) {
5233 							found++;
5234 							break;
5235 						}
5236 					}
5237 					if (found) {
5238 						ddi_release_devi(keeper);
5239 						continue;
5240 					}
5241 					length = dp->pdr_kept_count *
5242 					    sizeof (char **);
5243 					paths = kmem_alloc(
5244 					    length + sizeof (char **),
5245 					    KM_SLEEP);
5246 					if (dp->pdr_kept_count) {
5247 						bcopy(dp->pdr_kept_paths,
5248 						    paths, length);
5249 						kmem_free(dp->pdr_kept_paths,
5250 							length);
5251 					}
5252 					dp->pdr_kept_paths = paths;
5253 					length = strlen(keptp) + 1;
5254 					path =
5255 					    kmem_alloc(length, KM_SLEEP);
5256 					(void) strcpy(path, keptp);
5257 					dp->pdr_kept_paths[i] = path;
5258 					dp->pdr_kept_count++;
5259 				}
5260 #ifdef DEBUG
5261 				if (pm_debug & PMD_DEP)
5262 					prdeps("After from pm_kept\n");
5263 #endif
5264 				if (keeper) {
5265 					ret += pm_set_keeping(keeper, kept);
5266 					ddi_release_devi(keeper);
5267 				}
5268 			}
5269 		} else {
5270 			/*
5271 			 * pm_keeper would be called later to do
5272 			 * the actual pm_set_keeping.
5273 			 */
5274 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5275 			    pmf, (void *)kept))
5276 #ifdef DEBUG
5277 			if (pm_debug & PMD_DEP)
5278 				prdeps("Before Adding from pm_kept\n");
5279 #endif
5280 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5281 				if (dp->pdr_kept_paths == NULL) {
5282 					length = strlen(keptp) + 1;
5283 					path =
5284 					    kmem_alloc(length, KM_SLEEP);
5285 					paths = kmem_alloc(sizeof (char **),
5286 						KM_SLEEP);
5287 					(void) strcpy(path, keptp);
5288 					paths[0] = path;
5289 					dp->pdr_kept_paths = paths;
5290 					dp->pdr_kept_count++;
5291 				}
5292 			}
5293 #ifdef DEBUG
5294 			if (pm_debug & PMD_DEP)
5295 			    prdeps("After from pm_kept\n");
5296 #endif
5297 		}
5298 	}
5299 	ddi_release_devi(kept);
5300 	return (ret);
5301 }
5302 
5303 /*
5304  * Apply a recorded dependency.  dp specifies the dependency, and
5305  * keeper is already known to be the device that keeps up the other (kept) one.
5306  * We have to the whole tree for the "kept" device, then apply
5307  * the dependency (which may already be applied).
5308  */
5309 int
5310 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5311 {
5312 	PMD_FUNC(pmf, "apply_recorded_dep")
5313 	dev_info_t *kept = NULL;
5314 	int ret = 0;
5315 	char *keptp = NULL;
5316 
5317 	/*
5318 	 * Device to Device dependency can only be 1 to 1.
5319 	 */
5320 	if (dp->pdr_kept_paths == NULL)
5321 		return (0);
5322 	keptp = dp->pdr_kept_paths[0];
5323 	if (keptp == NULL)
5324 		return (0);
5325 	ASSERT(*keptp != '\0');
5326 	kept = pm_name_to_dip(keptp, 1);
5327 	if (kept == NULL)
5328 		return (0);
5329 	if (kept) {
5330 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5331 		    dp->pdr_keeper, keptp))
5332 		if (pm_set_keeping(keeper, kept)) {
5333 			ASSERT(dp->pdr_satisfied == 0);
5334 			dp->pdr_satisfied = 1;
5335 			ASSERT(pm_unresolved_deps);
5336 			pm_unresolved_deps--;
5337 			ret++;
5338 		}
5339 	}
5340 	ddi_release_devi(kept);
5341 
5342 	return (ret);
5343 }
5344 
5345 /*
5346  * Called from common/io/pm.c
5347  */
5348 int
5349 pm_cur_power(pm_component_t *cp)
5350 {
5351 	return (cur_power(cp));
5352 }
5353 
5354 /*
5355  * External interface to sanity-check a power level.
5356  */
5357 int
5358 pm_valid_power(dev_info_t *dip, int comp, int level)
5359 {
5360 	PMD_FUNC(pmf, "valid_power")
5361 
5362 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5363 		return (e_pm_valid_power(dip, comp, level));
5364 	else {
5365 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5366 		    pmf, comp, PM_NUMCMPTS(dip), level))
5367 		return (0);
5368 	}
5369 }
5370 
5371 /*
5372  * Called when a device that is direct power managed needs to change state.
5373  * This routine arranges to block the request until the process managing
5374  * the device makes the change (or some other incompatible change) or
5375  * the process closes /dev/pm.
5376  */
5377 static int
5378 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5379 {
5380 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5381 	int ret = 0;
5382 	void pm_dequeue_blocked(pm_rsvp_t *);
5383 	void pm_enqueue_blocked(pm_rsvp_t *);
5384 
5385 	ASSERT(!pm_processes_stopped);
5386 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5387 	new->pr_dip = dip;
5388 	new->pr_comp = comp;
5389 	new->pr_newlevel = newpower;
5390 	new->pr_oldlevel = oldpower;
5391 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5392 	mutex_enter(&pm_rsvp_lock);
5393 	pm_enqueue_blocked(new);
5394 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5395 	    PM_CANBLOCK_BLOCK);
5396 	PM_UNLOCK_DIP(dip);
5397 	/*
5398 	 * truss may make the cv_wait_sig return prematurely
5399 	 */
5400 	while (ret == 0) {
5401 		/*
5402 		 * Normally there will be no user context involved, but if
5403 		 * there is (e.g. we are here via an ioctl call to a driver)
5404 		 * then we should allow the process to abort the request,
5405 		 * or we get an unkillable process if the same thread does
5406 		 * PM_DIRECT_PM and pm_raise_power
5407 		 */
5408 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5409 			ret = PMP_FAIL;
5410 		} else {
5411 			ret = new->pr_retval;
5412 		}
5413 	}
5414 	pm_dequeue_blocked(new);
5415 	mutex_exit(&pm_rsvp_lock);
5416 	cv_destroy(&new->pr_cv);
5417 	kmem_free(new, sizeof (*new));
5418 	return (ret);
5419 }
5420 
5421 /*
5422  * Returns true if the process is interested in power level changes (has issued
5423  * PM_GET_STATE_CHANGE ioctl).
5424  */
5425 int
5426 pm_interest_registered(int clone)
5427 {
5428 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5429 	return (pm_interest[clone]);
5430 }
5431 
5432 /*
5433  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5434  * watch all state transitions (dip == NULL).  Set up data
5435  * structs to communicate with process about state changes.
5436  */
5437 void
5438 pm_register_watcher(int clone, dev_info_t *dip)
5439 {
5440 	pscc_t	*p;
5441 	psce_t	*psce;
5442 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5443 
5444 	/*
5445 	 * We definitely need a control struct, then we have to search to see
5446 	 * there is already an entries struct (in the dip != NULL case).
5447 	 */
5448 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5449 	pscc->pscc_clone = clone;
5450 	pscc->pscc_dip = dip;
5451 
5452 	if (dip) {
5453 		int found = 0;
5454 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5455 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5456 			/*
5457 			 * Already an entry for this clone, so just use it
5458 			 * for the new one (for the case where a single
5459 			 * process is watching multiple devices)
5460 			 */
5461 			if (p->pscc_clone == clone) {
5462 				ASSERT(p->pscc_dip != dip);
5463 				pscc->pscc_entries = p->pscc_entries;
5464 				pscc->pscc_entries->psce_references++;
5465 				found++;
5466 			}
5467 		}
5468 		if (!found) {		/* create a new one */
5469 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5470 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5471 			psce->psce_first =
5472 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5473 			    KM_SLEEP);
5474 			psce->psce_in = psce->psce_out = psce->psce_first;
5475 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5476 			psce->psce_references = 1;
5477 			pscc->pscc_entries = psce;
5478 		}
5479 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5480 		rw_exit(&pm_pscc_direct_rwlock);
5481 	} else {
5482 		ASSERT(!pm_interest_registered(clone));
5483 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5484 #ifdef DEBUG
5485 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5486 			/*
5487 			 * Should not be an entry for this clone!
5488 			 */
5489 			ASSERT(p->pscc_clone != clone);
5490 		}
5491 #endif
5492 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5493 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5494 		    PSCCOUNT, KM_SLEEP);
5495 		psce->psce_in = psce->psce_out = psce->psce_first;
5496 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5497 		psce->psce_references = 1;
5498 		pscc->pscc_entries = psce;
5499 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5500 		pm_interest[clone] = 1;
5501 		rw_exit(&pm_pscc_interest_rwlock);
5502 	}
5503 }
5504 
5505 /*
5506  * Remove the given entry from the blocked list
5507  */
5508 void
5509 pm_dequeue_blocked(pm_rsvp_t *p)
5510 {
5511 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5512 	if (pm_blocked_list == p) {
5513 		ASSERT(p->pr_prev == NULL);
5514 		if (p->pr_next != NULL)
5515 			p->pr_next->pr_prev = NULL;
5516 		pm_blocked_list = p->pr_next;
5517 	} else {
5518 		ASSERT(p->pr_prev != NULL);
5519 		p->pr_prev->pr_next = p->pr_next;
5520 		if (p->pr_next != NULL)
5521 			p->pr_next->pr_prev = p->pr_prev;
5522 	}
5523 }
5524 
5525 /*
5526  * Remove the given control struct from the given list
5527  */
5528 static void
5529 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5530 {
5531 	if (*list == p) {
5532 		ASSERT(p->pscc_prev == NULL);
5533 		if (p->pscc_next != NULL)
5534 			p->pscc_next->pscc_prev = NULL;
5535 		*list = p->pscc_next;
5536 	} else {
5537 		ASSERT(p->pscc_prev != NULL);
5538 		p->pscc_prev->pscc_next = p->pscc_next;
5539 		if (p->pscc_next != NULL)
5540 			p->pscc_next->pscc_prev = p->pscc_prev;
5541 	}
5542 }
5543 
5544 /*
5545  * Stick the control struct specified on the front of the list
5546  */
5547 static void
5548 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5549 {
5550 	pscc_t *h;	/* entry at head of list */
5551 	if ((h = *list) == NULL) {
5552 		*list = p;
5553 		ASSERT(p->pscc_next == NULL);
5554 		ASSERT(p->pscc_prev == NULL);
5555 	} else {
5556 		p->pscc_next = h;
5557 		ASSERT(h->pscc_prev == NULL);
5558 		h->pscc_prev = p;
5559 		ASSERT(p->pscc_prev == NULL);
5560 		*list = p;
5561 	}
5562 }
5563 
5564 /*
5565  * If dip is NULL, process is closing "clone" clean up all its registrations.
5566  * Otherwise only clean up those for dip because process is just giving up
5567  * control of a direct device.
5568  */
5569 void
5570 pm_deregister_watcher(int clone, dev_info_t *dip)
5571 {
5572 	pscc_t	*p, *pn;
5573 	psce_t	*psce;
5574 	int found = 0;
5575 
5576 	if (dip == NULL) {
5577 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5578 		for (p = pm_pscc_interest; p; p = pn) {
5579 			pn = p->pscc_next;
5580 			if (p->pscc_clone == clone) {
5581 				pm_dequeue_pscc(p, &pm_pscc_interest);
5582 				psce = p->pscc_entries;
5583 				ASSERT(psce->psce_references == 1);
5584 				mutex_destroy(&psce->psce_lock);
5585 				kmem_free(psce->psce_first,
5586 				    sizeof (pm_state_change_t) * PSCCOUNT);
5587 				kmem_free(psce, sizeof (*psce));
5588 				kmem_free(p, sizeof (*p));
5589 			}
5590 		}
5591 		pm_interest[clone] = 0;
5592 		rw_exit(&pm_pscc_interest_rwlock);
5593 	}
5594 	found = 0;
5595 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5596 	for (p = pm_pscc_direct; p; p = pn) {
5597 		pn = p->pscc_next;
5598 		if ((dip && p->pscc_dip == dip) ||
5599 		    (dip == NULL && clone == p->pscc_clone)) {
5600 			ASSERT(clone == p->pscc_clone);
5601 			found++;
5602 			/*
5603 			 * Remove from control list
5604 			 */
5605 			pm_dequeue_pscc(p, &pm_pscc_direct);
5606 			/*
5607 			 * If we're the last reference, free the
5608 			 * entries struct.
5609 			 */
5610 			psce = p->pscc_entries;
5611 			ASSERT(psce);
5612 			if (psce->psce_references == 1) {
5613 				kmem_free(psce->psce_first,
5614 				    PSCCOUNT * sizeof (pm_state_change_t));
5615 				kmem_free(psce, sizeof (*psce));
5616 			} else {
5617 				psce->psce_references--;
5618 			}
5619 			kmem_free(p, sizeof (*p));
5620 		}
5621 	}
5622 	ASSERT(dip == NULL || found);
5623 	rw_exit(&pm_pscc_direct_rwlock);
5624 }
5625 
5626 /*
5627  * Search the indicated list for an entry that matches clone, and return a
5628  * pointer to it.  To be interesting, the entry must have something ready to
5629  * be passed up to the controlling process.
5630  * The returned entry will be locked upon return from this call.
5631  */
5632 static psce_t *
5633 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5634 {
5635 	pscc_t	*p;
5636 	psce_t	*psce;
5637 	rw_enter(lock, RW_READER);
5638 	for (p = *list; p; p = p->pscc_next) {
5639 		if (clone == p->pscc_clone) {
5640 			psce = p->pscc_entries;
5641 			mutex_enter(&psce->psce_lock);
5642 			if (psce->psce_out->size) {
5643 				rw_exit(lock);
5644 				return (psce);
5645 			} else {
5646 				mutex_exit(&psce->psce_lock);
5647 			}
5648 		}
5649 	}
5650 	rw_exit(lock);
5651 	return (NULL);
5652 }
5653 
5654 /*
5655  * Find an entry for a particular clone in the direct list.
5656  */
5657 psce_t *
5658 pm_psc_clone_to_direct(int clone)
5659 {
5660 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5661 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5662 	    &pm_pscc_direct_rwlock));
5663 }
5664 
5665 /*
5666  * Find an entry for a particular clone in the interest list.
5667  */
5668 psce_t *
5669 pm_psc_clone_to_interest(int clone)
5670 {
5671 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5672 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5673 	    &pm_pscc_interest_rwlock));
5674 }
5675 
5676 /*
5677  * Put the given entry at the head of the blocked list
5678  */
5679 void
5680 pm_enqueue_blocked(pm_rsvp_t *p)
5681 {
5682 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5683 	ASSERT(p->pr_next == NULL);
5684 	ASSERT(p->pr_prev == NULL);
5685 	if (pm_blocked_list != NULL) {
5686 		p->pr_next = pm_blocked_list;
5687 		ASSERT(pm_blocked_list->pr_prev == NULL);
5688 		pm_blocked_list->pr_prev = p;
5689 		pm_blocked_list = p;
5690 	} else {
5691 		pm_blocked_list = p;
5692 	}
5693 }
5694 
5695 /*
5696  * Sets every power managed device back to its default threshold
5697  */
5698 void
5699 pm_all_to_default_thresholds(void)
5700 {
5701 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5702 	    (void *) &pm_system_idle_threshold);
5703 }
5704 
5705 static int
5706 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5707 {
5708 	int thr = (int)(*(int *)arg);
5709 
5710 	if (!PM_GET_PM_INFO(dip))
5711 		return (DDI_WALK_CONTINUE);
5712 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5713 	return (DDI_WALK_CONTINUE);
5714 }
5715 
5716 /*
5717  * Returns the current threshold value (in seconds) for the indicated component
5718  */
5719 int
5720 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5721 {
5722 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5723 		return (DDI_FAILURE);
5724 	} else {
5725 		*threshp = cur_threshold(dip, comp);
5726 		return (DDI_SUCCESS);
5727 	}
5728 }
5729 
5730 /*
5731  * To be called when changing the power level of a component of a device.
5732  * On some platforms, changing power on one device may require that power
5733  * be changed on other, related devices in the same transaction.  Thus, we
5734  * always pass this request to the platform power manager so that all the
5735  * affected devices will be locked.
5736  */
5737 void
5738 pm_lock_power(dev_info_t *dip, int *circp)
5739 {
5740 	power_req_t power_req;
5741 	int result;
5742 
5743 	power_req.request_type = PMR_PPM_LOCK_POWER;
5744 	power_req.req.ppm_lock_power_req.who = dip;
5745 	power_req.req.ppm_lock_power_req.circp = circp;
5746 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5747 }
5748 
5749 /*
5750  * Release the lock (or locks) acquired to change the power of a device.
5751  * See comments for pm_lock_power.
5752  */
5753 void
5754 pm_unlock_power(dev_info_t *dip, int circ)
5755 {
5756 	power_req_t power_req;
5757 	int result;
5758 
5759 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5760 	power_req.req.ppm_unlock_power_req.who = dip;
5761 	power_req.req.ppm_unlock_power_req.circ = circ;
5762 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5763 }
5764 
5765 
5766 /*
5767  * Attempt (without blocking) to acquire the lock(s) needed to change the
5768  * power of a component of a device.  See comments for pm_lock_power.
5769  *
5770  * Return: 1 if lock(s) acquired, 0 if not.
5771  */
5772 int
5773 pm_try_locking_power(dev_info_t *dip, int *circp)
5774 {
5775 	power_req_t power_req;
5776 	int result;
5777 
5778 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5779 	power_req.req.ppm_lock_power_req.who = dip;
5780 	power_req.req.ppm_lock_power_req.circp = circp;
5781 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5782 	return (result);
5783 }
5784 
5785 
5786 /*
5787  * Lock power state of a device.
5788  *
5789  * The implementation handles a special case where another thread may have
5790  * acquired the lock and created/launched this thread to do the work.  If
5791  * the lock cannot be acquired immediately, we check to see if this thread
5792  * is registered as a borrower of the lock.  If so, we may proceed without
5793  * the lock.  This assumes that the lending thread blocks on the completion
5794  * of this thread.
5795  *
5796  * Note 1: for use by ppm only.
5797  *
5798  * Note 2: On failing to get the lock immediately, we search lock_loan list
5799  * for curthread (as borrower of the lock).  On a hit, we check that the
5800  * lending thread already owns the lock we want.  It is safe to compare
5801  * devi_busy_thread and thread id of the lender because in the == case (the
5802  * only one we care about) we know that the owner is blocked.  Similarly,
5803  * If we find that curthread isn't registered as a lock borrower, it is safe
5804  * to use the blocking call (ndi_devi_enter) because we know that if we
5805  * weren't already listed as a borrower (upstream on the call stack) we won't
5806  * become one.
5807  */
5808 void
5809 pm_lock_power_single(dev_info_t *dip, int *circp)
5810 {
5811 	lock_loan_t *cur;
5812 
5813 	/* if the lock is available, we are done. */
5814 	if (ndi_devi_tryenter(dip, circp))
5815 		return;
5816 
5817 	mutex_enter(&pm_loan_lock);
5818 	/* see if our thread is registered as a lock borrower. */
5819 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5820 		if (cur->pmlk_borrower == curthread)
5821 			break;
5822 	mutex_exit(&pm_loan_lock);
5823 
5824 	/* if this thread not already registered, it is safe to block */
5825 	if (cur == NULL)
5826 		ndi_devi_enter(dip, circp);
5827 	else {
5828 		/* registered: does lender own the lock we want? */
5829 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5830 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5831 			cur->pmlk_dip = dip;
5832 		} else /* no: just block for it */
5833 			ndi_devi_enter(dip, circp);
5834 
5835 	}
5836 }
5837 
5838 /*
5839  * Drop the lock on the device's power state.  See comment for
5840  * pm_lock_power_single() for special implementation considerations.
5841  *
5842  * Note: for use by ppm only.
5843  */
5844 void
5845 pm_unlock_power_single(dev_info_t *dip, int circ)
5846 {
5847 	lock_loan_t *cur;
5848 
5849 	/* optimization: mutex not needed to check empty list */
5850 	if (lock_loan_head.pmlk_next == NULL) {
5851 		ndi_devi_exit(dip, circ);
5852 		return;
5853 	}
5854 
5855 	mutex_enter(&pm_loan_lock);
5856 	/* see if our thread is registered as a lock borrower. */
5857 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5858 		if (cur->pmlk_borrower == curthread)
5859 			break;
5860 	mutex_exit(&pm_loan_lock);
5861 
5862 	if (cur == NULL || cur->pmlk_dip != dip)
5863 		/* we acquired the lock directly, so return it */
5864 		ndi_devi_exit(dip, circ);
5865 }
5866 
5867 /*
5868  * Try to take the lock for changing the power level of a component.
5869  *
5870  * Note: for use by ppm only.
5871  */
5872 int
5873 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5874 {
5875 	return (ndi_devi_tryenter(dip, circp));
5876 }
5877 
5878 #ifdef	DEBUG
5879 /*
5880  * The following are used only to print out data structures for debugging
5881  */
5882 void
5883 prdeps(char *msg)
5884 {
5885 
5886 	pm_pdr_t *rp;
5887 	int i;
5888 
5889 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
5890 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
5891 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
5892 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
5893 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
5894 		    (void *)rp->pdr_next);
5895 		if (rp->pdr_kept_count != 0) {
5896 			pm_log("kept list = ");
5897 			i = 0;
5898 			while (i < rp->pdr_kept_count) {
5899 				pm_log("%s ", rp->pdr_kept_paths[i]);
5900 				i++;
5901 			}
5902 			pm_log("\n");
5903 		}
5904 	}
5905 }
5906 
5907 void
5908 pr_noinvol(char *hdr)
5909 {
5910 	pm_noinvol_t *ip;
5911 
5912 	pm_log("%s\n", hdr);
5913 	rw_enter(&pm_noinvol_rwlock, RW_READER);
5914 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
5915 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
5916 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
5917 	rw_exit(&pm_noinvol_rwlock);
5918 }
5919 #endif
5920 
5921 /*
5922  * Attempt to apply the thresholds indicated by rp to the node specified by
5923  * dip.
5924  */
5925 void
5926 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5927 {
5928 	PMD_FUNC(pmf, "apply_recorded_thresh")
5929 	int i, j;
5930 	int comps = PM_NUMCMPTS(dip);
5931 	struct pm_component *cp;
5932 	pm_pte_t *ep;
5933 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
5934 
5935 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
5936 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
5937 	PM_LOCK_DIP(dip);
5938 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
5939 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
5940 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
5941 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
5942 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
5943 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
5944 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
5945 		PM_UNLOCK_DIP(dip);
5946 		return;
5947 	}
5948 
5949 	ep = rp->ptr_entries;
5950 	/*
5951 	 * Here we do the special case of a device threshold
5952 	 */
5953 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
5954 		ASSERT(ep && ep->pte_numthresh == 1);
5955 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
5956 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
5957 		PM_UNLOCK_DIP(dip);
5958 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
5959 		if (autopm_enabled)
5960 			pm_rescan(dip);
5961 		return;
5962 	}
5963 	for (i = 0; i < comps; i++) {
5964 		cp = PM_CP(dip, i);
5965 		for (j = 0; j < ep->pte_numthresh; j++) {
5966 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
5967 			    "to %x\n", pmf, j, PM_DEVICE(dip),
5968 			    i, ep->pte_thresh[j]))
5969 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
5970 		}
5971 		ep++;
5972 	}
5973 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
5974 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
5975 	PM_UNLOCK_DIP(dip);
5976 
5977 	if (autopm_enabled)
5978 		pm_rescan(dip);
5979 }
5980 
5981 /*
5982  * Returns true if the threshold specified by rp could be applied to dip
5983  * (that is, the number of components and transitions are the same)
5984  */
5985 int
5986 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5987 {
5988 	PMD_FUNC(pmf, "valid_thresh")
5989 	int comps, i;
5990 	pm_component_t *cp;
5991 	pm_pte_t *ep;
5992 
5993 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
5994 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
5995 		    rp->ptr_physpath))
5996 		return (0);
5997 	}
5998 	/*
5999 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6000 	 * an entry with numcomps == 0, (since we don't know how many
6001 	 * components there are in advance).  This is always a valid
6002 	 * spec.
6003 	 */
6004 	if (rp->ptr_numcomps == 0) {
6005 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6006 		return (1);
6007 	}
6008 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6009 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6010 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6011 		return (0);
6012 	}
6013 	ep = rp->ptr_entries;
6014 	for (i = 0; i < comps; i++) {
6015 		cp = PM_CP(dip, i);
6016 		if ((ep + i)->pte_numthresh !=
6017 		    cp->pmc_comp.pmc_numlevels - 1) {
6018 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6019 			    pmf, rp->ptr_physpath, i,
6020 			    cp->pmc_comp.pmc_numlevels - 1,
6021 			    (ep + i)->pte_numthresh))
6022 			return (0);
6023 		}
6024 	}
6025 	return (1);
6026 }
6027 
6028 /*
6029  * Remove any recorded threshold for device physpath
6030  * We know there will be at most one.
6031  */
6032 void
6033 pm_unrecord_threshold(char *physpath)
6034 {
6035 	pm_thresh_rec_t *pptr, *ptr;
6036 
6037 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6038 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6039 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6040 			if (pptr) {
6041 				pptr->ptr_next = ptr->ptr_next;
6042 			} else {
6043 				ASSERT(pm_thresh_head == ptr);
6044 				pm_thresh_head = ptr->ptr_next;
6045 			}
6046 			kmem_free(ptr, ptr->ptr_size);
6047 			break;
6048 		}
6049 		pptr = ptr;
6050 	}
6051 	rw_exit(&pm_thresh_rwlock);
6052 }
6053 
6054 /*
6055  * Discard all recorded thresholds.  We are returning to the default pm state.
6056  */
6057 void
6058 pm_discard_thresholds(void)
6059 {
6060 	pm_thresh_rec_t *rp;
6061 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6062 	while (pm_thresh_head) {
6063 		rp = pm_thresh_head;
6064 		pm_thresh_head = rp->ptr_next;
6065 		kmem_free(rp, rp->ptr_size);
6066 	}
6067 	rw_exit(&pm_thresh_rwlock);
6068 }
6069 
6070 /*
6071  * Discard all recorded dependencies.  We are returning to the default pm state.
6072  */
6073 void
6074 pm_discard_dependencies(void)
6075 {
6076 	pm_pdr_t *rp;
6077 	int i;
6078 	size_t length;
6079 
6080 #ifdef DEBUG
6081 	if (pm_debug & PMD_DEP)
6082 		prdeps("Before discard\n");
6083 #endif
6084 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6085 
6086 #ifdef DEBUG
6087 	if (pm_debug & PMD_DEP)
6088 		prdeps("After discard\n");
6089 #endif
6090 	while (pm_dep_head) {
6091 		rp = pm_dep_head;
6092 		if (!rp->pdr_isprop) {
6093 			ASSERT(rp->pdr_satisfied == 0);
6094 			ASSERT(pm_unresolved_deps);
6095 			pm_unresolved_deps--;
6096 		} else {
6097 			ASSERT(pm_prop_deps);
6098 			pm_prop_deps--;
6099 		}
6100 		pm_dep_head = rp->pdr_next;
6101 		if (rp->pdr_kept_count)  {
6102 			for (i = 0; i < rp->pdr_kept_count; i++) {
6103 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6104 				kmem_free(rp->pdr_kept_paths[i], length);
6105 			}
6106 			kmem_free(rp->pdr_kept_paths,
6107 				rp->pdr_kept_count * sizeof (char **));
6108 		}
6109 		kmem_free(rp, rp->pdr_size);
6110 	}
6111 }
6112 
6113 
6114 static int
6115 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6116 {
6117 	_NOTE(ARGUNUSED(arg))
6118 	char *pathbuf;
6119 
6120 	if (PM_GET_PM_INFO(dip) == NULL)
6121 		return (DDI_WALK_CONTINUE);
6122 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6123 	(void) ddi_pathname(dip, pathbuf);
6124 	pm_free_keeper(pathbuf, 0);
6125 	kmem_free(pathbuf, MAXPATHLEN);
6126 	return (DDI_WALK_CONTINUE);
6127 }
6128 
6129 static int
6130 pm_kept_walk(dev_info_t *dip, void *arg)
6131 {
6132 	_NOTE(ARGUNUSED(arg))
6133 	char *pathbuf;
6134 
6135 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6136 	(void) ddi_pathname(dip, pathbuf);
6137 	(void) pm_kept(pathbuf);
6138 	kmem_free(pathbuf, MAXPATHLEN);
6139 
6140 	return (DDI_WALK_CONTINUE);
6141 }
6142 
6143 static int
6144 pm_keeper_walk(dev_info_t *dip, void *arg)
6145 {
6146 	_NOTE(ARGUNUSED(arg))
6147 	char *pathbuf;
6148 
6149 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6150 	(void) ddi_pathname(dip, pathbuf);
6151 	(void) pm_keeper(pathbuf);
6152 	kmem_free(pathbuf, MAXPATHLEN);
6153 
6154 	return (DDI_WALK_CONTINUE);
6155 }
6156 
6157 static char *
6158 pdw_type_decode(int type)
6159 {
6160 	switch (type) {
6161 	case PM_DEP_WK_POWER_ON:
6162 		return ("power on");
6163 	case PM_DEP_WK_POWER_OFF:
6164 		return ("power off");
6165 	case PM_DEP_WK_DETACH:
6166 		return ("detach");
6167 	case PM_DEP_WK_REMOVE_DEP:
6168 		return ("remove dep");
6169 	case PM_DEP_WK_BRINGUP_SELF:
6170 		return ("bringup self");
6171 	case PM_DEP_WK_RECORD_KEEPER:
6172 		return ("add dependent");
6173 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6174 		return ("add dependent property");
6175 	case PM_DEP_WK_KEPT:
6176 		return ("kept");
6177 	case PM_DEP_WK_KEEPER:
6178 		return ("keeper");
6179 	case PM_DEP_WK_ATTACH:
6180 		return ("attach");
6181 	case PM_DEP_WK_CHECK_KEPT:
6182 		return ("check kept");
6183 	case PM_DEP_WK_CPR_SUSPEND:
6184 		return ("suspend");
6185 	case PM_DEP_WK_CPR_RESUME:
6186 		return ("resume");
6187 	default:
6188 		return ("unknown");
6189 	}
6190 
6191 }
6192 
6193 static void
6194 pm_rele_dep(char *keeper)
6195 {
6196 	PMD_FUNC(pmf, "rele_dep")
6197 	pm_pdr_t *dp;
6198 	char *kept_path = NULL;
6199 	dev_info_t *kept = NULL;
6200 	int count = 0;
6201 
6202 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6203 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6204 			continue;
6205 		for (count = 0; count < dp->pdr_kept_count; count++) {
6206 			kept_path = dp->pdr_kept_paths[count];
6207 			if (kept_path == NULL)
6208 				continue;
6209 			kept = pm_name_to_dip(kept_path, 1);
6210 			if (kept) {
6211 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6212 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6213 				    keeper))
6214 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6215 				pm_rele_power(kept);
6216 				ddi_release_devi(kept);
6217 			}
6218 		}
6219 	}
6220 }
6221 
6222 /*
6223  * Called when we are just released from direct PM.  Bring ourself up
6224  * if our keeper is up since dependency is not honored while a kept
6225  * device is under direct PM.
6226  */
6227 static void
6228 pm_bring_self_up(char *keptpath)
6229 {
6230 	PMD_FUNC(pmf, "bring_self_up")
6231 	dev_info_t *kept;
6232 	dev_info_t *keeper;
6233 	pm_pdr_t *dp;
6234 	int i, j;
6235 	int up = 0, circ;
6236 
6237 	kept = pm_name_to_dip(keptpath, 1);
6238 	if (kept == NULL)
6239 		return;
6240 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6241 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6242 		if (dp->pdr_kept_count == 0)
6243 			continue;
6244 		for (i = 0; i < dp->pdr_kept_count; i++) {
6245 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6246 				continue;
6247 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6248 			if (keeper) {
6249 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6250 				    pmf, PM_DEVICE(keeper)))
6251 				PM_LOCK_POWER(keeper, &circ);
6252 				for (j = 0; j < PM_NUMCMPTS(keeper);
6253 				    j++) {
6254 					if (PM_CURPOWER(keeper, j)) {
6255 						PMD(PMD_KEEPS, ("%s: comp="
6256 						    "%d is up\n", pmf, j))
6257 						up++;
6258 					}
6259 				}
6260 				if (up) {
6261 					if (PM_SKBU(kept))
6262 						DEVI(kept)->devi_pm_flags &=
6263 						    ~PMC_SKIP_BRINGUP;
6264 					bring_pmdep_up(kept, 1);
6265 				}
6266 				PM_UNLOCK_POWER(keeper, circ);
6267 				ddi_release_devi(keeper);
6268 			}
6269 		}
6270 	}
6271 	ddi_release_devi(kept);
6272 }
6273 
6274 static void
6275 pm_process_dep_request(pm_dep_wk_t *work)
6276 {
6277 	PMD_FUNC(pmf, "dep_req")
6278 	int ret;
6279 
6280 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6281 	    pdw_type_decode(work->pdw_type)))
6282 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6283 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6284 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6285 
6286 	switch (work->pdw_type) {
6287 	case PM_DEP_WK_POWER_ON:
6288 		/* Bring up the kept devices and put a hold on them */
6289 		bring_wekeeps_up(work->pdw_keeper);
6290 		break;
6291 	case PM_DEP_WK_POWER_OFF:
6292 		/* Release the kept devices */
6293 		pm_rele_dep(work->pdw_keeper);
6294 		break;
6295 	case PM_DEP_WK_DETACH:
6296 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6297 		break;
6298 	case PM_DEP_WK_REMOVE_DEP:
6299 		pm_discard_dependencies();
6300 		break;
6301 	case PM_DEP_WK_BRINGUP_SELF:
6302 		/*
6303 		 * We deferred satisfying our dependency till now, so satisfy
6304 		 * it again and bring ourselves up.
6305 		 */
6306 		pm_bring_self_up(work->pdw_kept);
6307 		break;
6308 	case PM_DEP_WK_RECORD_KEEPER:
6309 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6310 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6311 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6312 		break;
6313 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6314 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6315 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6316 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6317 		break;
6318 	case PM_DEP_WK_KEPT:
6319 		ret = pm_kept(work->pdw_kept);
6320 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6321 		    ret))
6322 		break;
6323 	case PM_DEP_WK_KEEPER:
6324 		ret = pm_keeper(work->pdw_keeper);
6325 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6326 		    pmf, ret))
6327 		break;
6328 	case PM_DEP_WK_ATTACH:
6329 		ret = pm_keeper(work->pdw_keeper);
6330 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6331 		    pmf, ret))
6332 		ret = pm_kept(work->pdw_kept);
6333 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6334 		    pmf, ret))
6335 		break;
6336 	case PM_DEP_WK_CHECK_KEPT:
6337 		ret = pm_is_kept(work->pdw_kept);
6338 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6339 		    pmf, work->pdw_kept, ret))
6340 		break;
6341 	case PM_DEP_WK_CPR_SUSPEND:
6342 		pm_discard_dependencies();
6343 		break;
6344 	case PM_DEP_WK_CPR_RESUME:
6345 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6346 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6347 		break;
6348 	default:
6349 		ASSERT(0);
6350 		break;
6351 	}
6352 	/*
6353 	 * Free the work structure if the requester is not waiting
6354 	 * Otherwise it is the requester's responsiblity to free it.
6355 	 */
6356 	if (!work->pdw_wait) {
6357 		if (work->pdw_keeper)
6358 			kmem_free(work->pdw_keeper,
6359 			    strlen(work->pdw_keeper) + 1);
6360 		if (work->pdw_kept)
6361 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6362 		kmem_free(work, sizeof (pm_dep_wk_t));
6363 	} else {
6364 		/*
6365 		 * Notify requester if it is waiting for it.
6366 		 */
6367 		work->pdw_ret = ret;
6368 		work->pdw_done = 1;
6369 		cv_signal(&work->pdw_cv);
6370 	}
6371 }
6372 
6373 /*
6374  * Process PM dependency requests.
6375  */
6376 static void
6377 pm_dep_thread(void)
6378 {
6379 	pm_dep_wk_t *work;
6380 	callb_cpr_t cprinfo;
6381 
6382 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6383 	    "pm_dep_thread");
6384 	for (;;) {
6385 		mutex_enter(&pm_dep_thread_lock);
6386 		if (pm_dep_thread_workq == NULL) {
6387 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6388 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6389 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6390 		}
6391 		work = pm_dep_thread_workq;
6392 		pm_dep_thread_workq = work->pdw_next;
6393 		if (pm_dep_thread_tail == work)
6394 			pm_dep_thread_tail = work->pdw_next;
6395 		mutex_exit(&pm_dep_thread_lock);
6396 		pm_process_dep_request(work);
6397 
6398 	}
6399 	/*NOTREACHED*/
6400 }
6401 
6402 /*
6403  * Set the power level of the indicated device to unknown (if it is not a
6404  * backwards compatible device), as it has just been resumed, and it won't
6405  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6406  */
6407 void
6408 pm_forget_power_level(dev_info_t *dip)
6409 {
6410 	dev_info_t *pdip = ddi_get_parent(dip);
6411 	int i, count = 0;
6412 
6413 	if (!PM_ISBC(dip)) {
6414 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6415 			count += (PM_CURPOWER(dip, i) == 0);
6416 
6417 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6418 			e_pm_hold_rele_power(pdip, count);
6419 
6420 		/*
6421 		 * Count this as a power cycle if we care
6422 		 */
6423 		if (DEVI(dip)->devi_pm_volpmd &&
6424 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6425 			DEVI(dip)->devi_pm_volpmd = 0;
6426 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6427 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6428 	}
6429 }
6430 
6431 /*
6432  * This function advises the caller whether it should make a power-off
6433  * transition at this time or not.  If the transition is not advised
6434  * at this time, the time that the next power-off transition can
6435  * be made from now is returned through "intervalp" pointer.
6436  * This function returns:
6437  *
6438  *  1  power-off advised
6439  *  0  power-off not advised, intervalp will point to seconds from
6440  *	  now that a power-off is advised.  If it is passed the number
6441  *	  of years that policy specifies the device should last,
6442  *	  a large number is returned as the time interval.
6443  *  -1  error
6444  */
6445 int
6446 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6447 {
6448 	PMD_FUNC(pmf, "pm_trans_check")
6449 	char dbuf[DC_SCSI_MFR_LEN];
6450 	struct pm_scsi_cycles *scp;
6451 	int service_years, service_weeks, full_years;
6452 	time_t now, service_seconds, tdiff;
6453 	time_t within_year, when_allowed;
6454 	char *ptr;
6455 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6456 	int cycles_diff, cycles_over;
6457 
6458 	if (datap == NULL) {
6459 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6460 		return (-1);
6461 	}
6462 
6463 	if (datap->format == DC_SCSI_FORMAT) {
6464 		/*
6465 		 * Power cycles of the scsi drives are distributed
6466 		 * over 5 years with the following percentage ratio:
6467 		 *
6468 		 *	30%, 25%, 20%, 15%, and 10%
6469 		 *
6470 		 * The power cycle quota for each year is distributed
6471 		 * linearly through out the year.  The equation for
6472 		 * determining the expected cycles is:
6473 		 *
6474 		 *	e = a * (n / y)
6475 		 *
6476 		 * e = expected cycles
6477 		 * a = allocated cycles for this year
6478 		 * n = number of seconds since beginning of this year
6479 		 * y = number of seconds in a year
6480 		 *
6481 		 * Note that beginning of the year starts the day that
6482 		 * the drive has been put on service.
6483 		 *
6484 		 * If the drive has passed its expected cycles, we
6485 		 * can determine when it can start to power cycle
6486 		 * again to keep it on track to meet the 5-year
6487 		 * life expectancy.  The equation for determining
6488 		 * when to power cycle is:
6489 		 *
6490 		 *	w = y * (c / a)
6491 		 *
6492 		 * w = when it can power cycle again
6493 		 * y = number of seconds in a year
6494 		 * c = current number of cycles
6495 		 * a = allocated cycles for the year
6496 		 *
6497 		 */
6498 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6499 
6500 		scp = &datap->un.scsi_cycles;
6501 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6502 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6503 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6504 		if (scp->ncycles < 0 || scp->flag != 0) {
6505 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6506 			return (-1);
6507 		}
6508 
6509 		if (scp->ncycles > scp->lifemax) {
6510 			*intervalp = (LONG_MAX / hz);
6511 			return (0);
6512 		}
6513 
6514 		/*
6515 		 * convert service date to time_t
6516 		 */
6517 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6518 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6519 		ptr = dbuf;
6520 		service_years = stoi(&ptr) - EPOCH_YEAR;
6521 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6522 		    DC_SCSI_WEEK_LEN);
6523 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6524 
6525 		/*
6526 		 * scsi standard does not specify WW data,
6527 		 * could be (00-51) or (01-52)
6528 		 */
6529 		ptr = dbuf;
6530 		service_weeks = stoi(&ptr);
6531 		if (service_years < 0 ||
6532 		    service_weeks < 0 || service_weeks > 52) {
6533 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6534 			    pmf, service_years, service_weeks))
6535 			return (-1);
6536 		}
6537 
6538 		/*
6539 		 * calculate service date in seconds-since-epoch,
6540 		 * adding one day for each leap-year.
6541 		 *
6542 		 * (years-since-epoch + 2) fixes integer truncation,
6543 		 * example: (8) leap-years during [1972, 2000]
6544 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6545 		 */
6546 		service_seconds = (service_years * DC_SPY) +
6547 		    (service_weeks * DC_SPW) +
6548 		    (((service_years + 2) / 4) * DC_SPD);
6549 
6550 		now = gethrestime_sec();
6551 		/*
6552 		 * since the granularity of 'svc_date' is day not second,
6553 		 * 'now' should be rounded up to full day.
6554 		 */
6555 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6556 		if (service_seconds > now) {
6557 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6558 			    "than now (%ld)!\n", pmf, service_seconds, now))
6559 			return (-1);
6560 		}
6561 
6562 		tdiff = now - service_seconds;
6563 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6564 
6565 		/*
6566 		 * NOTE - Leap years are not considered in the calculations
6567 		 * below.
6568 		 */
6569 		full_years = (tdiff / DC_SPY);
6570 		if ((full_years >= DC_SCSI_NPY) &&
6571 		    (scp->ncycles <= scp->lifemax))
6572 			return (1);
6573 
6574 		/*
6575 		 * Determine what is the normal cycle usage for the
6576 		 * device at the beginning and the end of this year.
6577 		 */
6578 		lower_bound_cycles = (!full_years) ? 0 :
6579 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6580 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6581 
6582 		if (scp->ncycles <= lower_bound_cycles)
6583 			return (1);
6584 
6585 		/*
6586 		 * The linear slope that determines how many cycles
6587 		 * are allowed this year is number of seconds
6588 		 * passed this year over total number of seconds in a year.
6589 		 */
6590 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6591 		within_year = (tdiff % DC_SPY);
6592 		cycles_allowed = lower_bound_cycles +
6593 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6594 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6595 		    full_years, within_year))
6596 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6597 		    cycles_allowed))
6598 
6599 		if (scp->ncycles <= cycles_allowed)
6600 			return (1);
6601 
6602 		/*
6603 		 * The transition is not advised now but we can
6604 		 * determine when the next transition can be made.
6605 		 *
6606 		 * Depending on how many cycles the device has been
6607 		 * over-used, we may need to skip years with
6608 		 * different percentage quota in order to determine
6609 		 * when the next transition can be made.
6610 		 */
6611 		cycles_over = (scp->ncycles - lower_bound_cycles);
6612 		while (cycles_over > cycles_diff) {
6613 			full_years++;
6614 			if (full_years >= DC_SCSI_NPY) {
6615 				*intervalp = (LONG_MAX / hz);
6616 				return (0);
6617 			}
6618 			cycles_over -= cycles_diff;
6619 			lower_bound_cycles = upper_bound_cycles;
6620 			upper_bound_cycles =
6621 			    (scp->lifemax * pcnt[full_years]) / 100;
6622 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6623 		}
6624 
6625 		/*
6626 		 * The linear slope that determines when the next transition
6627 		 * can be made is the relative position of used cycles within a
6628 		 * year over total number of cycles within that year.
6629 		 */
6630 		when_allowed = service_seconds + (full_years * DC_SPY) +
6631 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6632 		*intervalp = (when_allowed - now);
6633 		if (*intervalp > (LONG_MAX / hz))
6634 			*intervalp = (LONG_MAX / hz);
6635 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6636 		    *intervalp))
6637 		return (0);
6638 	}
6639 
6640 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6641 	return (-1);
6642 }
6643 
6644 /*
6645  * Nexus drivers call into pm framework to indicate which child driver is about
6646  * to be installed.  In some platforms, ppm may need to configure the hardware
6647  * for successful installation of a driver.
6648  */
6649 int
6650 pm_init_child(dev_info_t *dip)
6651 {
6652 	power_req_t power_req;
6653 
6654 	ASSERT(ddi_binding_name(dip));
6655 	ASSERT(ddi_get_name_addr(dip));
6656 	pm_ppm_claim(dip);
6657 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6658 		power_req.request_type = PMR_PPM_INIT_CHILD;
6659 		power_req.req.ppm_config_req.who = dip;
6660 		ASSERT(PPM(dip) != NULL);
6661 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6662 		    NULL));
6663 	} else {
6664 #ifdef DEBUG
6665 		/* pass it to the default handler so we can debug things */
6666 		power_req.request_type = PMR_PPM_INIT_CHILD;
6667 		power_req.req.ppm_config_req.who = dip;
6668 		(void) pm_ctlops(NULL, dip,
6669 		    DDI_CTLOPS_POWER, &power_req, NULL);
6670 #endif
6671 	}
6672 	return (DDI_SUCCESS);
6673 }
6674 
6675 /*
6676  * Bring parent of a node that is about to be probed up to full power, and
6677  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6678  * it is time to let it go down again
6679  */
6680 void
6681 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6682 {
6683 	int result;
6684 	power_req_t power_req;
6685 
6686 	bzero(cp, sizeof (*cp));
6687 	cp->ppc_dip = dip;
6688 
6689 	pm_ppm_claim(dip);
6690 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6691 		power_req.request_type = PMR_PPM_PRE_PROBE;
6692 		power_req.req.ppm_config_req.who = dip;
6693 		ASSERT(PPM(dip) != NULL);
6694 		(void) pm_ctlops(PPM(dip), dip,
6695 		    DDI_CTLOPS_POWER, &power_req, &result);
6696 		cp->ppc_ppm = PPM(dip);
6697 	} else {
6698 #ifdef DEBUG
6699 		/* pass it to the default handler so we can debug things */
6700 		power_req.request_type = PMR_PPM_PRE_PROBE;
6701 		power_req.req.ppm_config_req.who = dip;
6702 		(void) pm_ctlops(NULL, dip,
6703 		    DDI_CTLOPS_POWER, &power_req, &result);
6704 #endif
6705 		cp->ppc_ppm = NULL;
6706 	}
6707 }
6708 
6709 int
6710 pm_pre_config(dev_info_t *dip, char *devnm)
6711 {
6712 	PMD_FUNC(pmf, "pre_config")
6713 	int ret;
6714 
6715 	if (MDI_VHCI(dip)) {
6716 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6717 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6718 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6719 	} else if (!PM_GET_PM_INFO(dip))
6720 		return (DDI_SUCCESS);
6721 
6722 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6723 	pm_hold_power(dip);
6724 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6725 	if (ret != DDI_SUCCESS)
6726 		pm_rele_power(dip);
6727 	return (ret);
6728 }
6729 
6730 /*
6731  * This routine is called by devfs during its walk to unconfigue a node.
6732  * If the call is due to auto mod_unloads and the dip is not at its
6733  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6734  * return DDI_SUCCESS.
6735  */
6736 int
6737 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6738 {
6739 	PMD_FUNC(pmf, "pre_unconfig")
6740 	int ret;
6741 
6742 	if (MDI_VHCI(dip)) {
6743 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6744 		    PM_DEVICE(dip), flags))
6745 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6746 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6747 	} else if (!PM_GET_PM_INFO(dip))
6748 		return (DDI_SUCCESS);
6749 
6750 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6751 	    flags))
6752 	*held = 0;
6753 
6754 	/*
6755 	 * If the dip is a leaf node, don't power it up.
6756 	 */
6757 	if (!ddi_get_child(dip))
6758 		return (DDI_SUCCESS);
6759 
6760 	/*
6761 	 * Do not power up the node if it is called due to auto-modunload.
6762 	 */
6763 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6764 		return (DDI_FAILURE);
6765 
6766 	pm_hold_power(dip);
6767 	*held = 1;
6768 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6769 	if (ret != DDI_SUCCESS) {
6770 		pm_rele_power(dip);
6771 		*held = 0;
6772 	}
6773 	return (ret);
6774 }
6775 
6776 /*
6777  * Notify ppm of attach action.  Parent is already held at full power by
6778  * probe action.
6779  */
6780 void
6781 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6782 {
6783 	static char *me = "pm_pre_attach";
6784 	power_req_t power_req;
6785 	int result;
6786 
6787 	/*
6788 	 * Initialize and fill in the PPM cookie
6789 	 */
6790 	bzero(cp, sizeof (*cp));
6791 	cp->ppc_cmd = (int)cmd;
6792 	cp->ppc_ppm = PPM(dip);
6793 	cp->ppc_dip = dip;
6794 
6795 	/*
6796 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6797 	 * Power Management stuff. DDI_RESUME also has to purge it's
6798 	 * powerlevel information.
6799 	 */
6800 	switch (cmd) {
6801 	case DDI_ATTACH:
6802 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6803 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6804 			power_req.req.ppm_config_req.who = dip;
6805 			ASSERT(PPM(dip));
6806 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6807 			    &power_req, &result);
6808 		}
6809 #ifdef DEBUG
6810 		else {
6811 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6812 			power_req.req.ppm_config_req.who = dip;
6813 			(void) pm_ctlops(NULL, dip,
6814 			    DDI_CTLOPS_POWER, &power_req, &result);
6815 		}
6816 #endif
6817 		break;
6818 	case DDI_RESUME:
6819 		pm_forget_power_level(dip);
6820 
6821 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6822 			power_req.request_type = PMR_PPM_PRE_RESUME;
6823 			power_req.req.resume_req.who = cp->ppc_dip;
6824 			power_req.req.resume_req.cmd =
6825 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6826 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6827 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6828 			    DDI_CTLOPS_POWER, &power_req, &result);
6829 		}
6830 #ifdef DEBUG
6831 		else {
6832 			power_req.request_type = PMR_PPM_PRE_RESUME;
6833 			power_req.req.resume_req.who = cp->ppc_dip;
6834 			power_req.req.resume_req.cmd =
6835 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6836 			(void) pm_ctlops(NULL, cp->ppc_dip,
6837 			    DDI_CTLOPS_POWER, &power_req, &result);
6838 		}
6839 #endif
6840 		break;
6841 
6842 	case DDI_PM_RESUME:
6843 		break;
6844 
6845 	default:
6846 		panic(me);
6847 	}
6848 }
6849 
6850 /*
6851  * Nexus drivers call into pm framework to indicate which child driver is
6852  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6853  * hardware since the device driver is no longer installed.
6854  */
6855 int
6856 pm_uninit_child(dev_info_t *dip)
6857 {
6858 	power_req_t power_req;
6859 
6860 	ASSERT(ddi_binding_name(dip));
6861 	ASSERT(ddi_get_name_addr(dip));
6862 	pm_ppm_claim(dip);
6863 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6864 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6865 		power_req.req.ppm_config_req.who = dip;
6866 		ASSERT(PPM(dip));
6867 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6868 		    NULL));
6869 	} else {
6870 #ifdef DEBUG
6871 		/* pass it to the default handler so we can debug things */
6872 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6873 		power_req.req.ppm_config_req.who = dip;
6874 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6875 #endif
6876 	}
6877 	return (DDI_SUCCESS);
6878 }
6879 /*
6880  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
6881  * Also notify ppm of result of probe if there is a ppm that cares
6882  */
6883 void
6884 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
6885 {
6886 	_NOTE(ARGUNUSED(probe_failed))
6887 	int result;
6888 	power_req_t power_req;
6889 
6890 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6891 		power_req.request_type = PMR_PPM_POST_PROBE;
6892 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6893 		power_req.req.ppm_config_req.result = ret;
6894 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6895 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
6896 		    &power_req, &result);
6897 	}
6898 #ifdef DEBUG
6899 	else {
6900 		power_req.request_type = PMR_PPM_POST_PROBE;
6901 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6902 		power_req.req.ppm_config_req.result = ret;
6903 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
6904 		    &power_req, &result);
6905 	}
6906 #endif
6907 }
6908 
6909 void
6910 pm_post_config(dev_info_t *dip, char *devnm)
6911 {
6912 	PMD_FUNC(pmf, "post_config")
6913 
6914 	if (MDI_VHCI(dip)) {
6915 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6916 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
6917 		return;
6918 	} else if (!PM_GET_PM_INFO(dip))
6919 		return;
6920 
6921 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6922 	pm_rele_power(dip);
6923 }
6924 
6925 void
6926 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
6927 {
6928 	PMD_FUNC(pmf, "post_unconfig")
6929 
6930 	if (MDI_VHCI(dip)) {
6931 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
6932 		    PM_DEVICE(dip), held))
6933 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
6934 		return;
6935 	} else if (!PM_GET_PM_INFO(dip))
6936 		return;
6937 
6938 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
6939 	    held))
6940 	if (!held)
6941 		return;
6942 	/*
6943 	 * We have held power in pre_unconfig, release it here.
6944 	 */
6945 	pm_rele_power(dip);
6946 }
6947 
6948 /*
6949  * Notify ppm of result of attach if there is a ppm that cares
6950  */
6951 void
6952 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
6953 {
6954 	int result;
6955 	power_req_t power_req;
6956 	dev_info_t	*dip;
6957 
6958 	if (cp->ppc_cmd != DDI_ATTACH)
6959 		return;
6960 
6961 	dip = cp->ppc_dip;
6962 
6963 	if (ret == DDI_SUCCESS) {
6964 		/*
6965 		 * Attach succeeded, so proceed to doing post-attach pm tasks
6966 		 */
6967 		if (PM_GET_PM_INFO(dip) == NULL)
6968 			(void) pm_start(dip);
6969 	} else {
6970 		/*
6971 		 * Attach may have got pm started before failing
6972 		 */
6973 		pm_stop(dip);
6974 	}
6975 
6976 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6977 		power_req.request_type = PMR_PPM_POST_ATTACH;
6978 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6979 		power_req.req.ppm_config_req.result = ret;
6980 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6981 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6982 		    DDI_CTLOPS_POWER, &power_req, &result);
6983 	}
6984 #ifdef DEBUG
6985 	else {
6986 		power_req.request_type = PMR_PPM_POST_ATTACH;
6987 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6988 		power_req.req.ppm_config_req.result = ret;
6989 		(void) pm_ctlops(NULL, cp->ppc_dip,
6990 		    DDI_CTLOPS_POWER, &power_req, &result);
6991 	}
6992 #endif
6993 }
6994 
6995 /*
6996  * Notify ppm of attach action.  Parent is already held at full power by
6997  * probe action.
6998  */
6999 void
7000 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7001 {
7002 	int result;
7003 	power_req_t power_req;
7004 
7005 	bzero(cp, sizeof (*cp));
7006 	cp->ppc_dip = dip;
7007 	cp->ppc_cmd = (int)cmd;
7008 
7009 	switch (cmd) {
7010 	case DDI_DETACH:
7011 		pm_detaching(dip);		/* suspend pm while detaching */
7012 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7013 			power_req.request_type = PMR_PPM_PRE_DETACH;
7014 			power_req.req.ppm_config_req.who = dip;
7015 			ASSERT(PPM(dip));
7016 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7017 			    &power_req, &result);
7018 			cp->ppc_ppm = PPM(dip);
7019 		} else {
7020 #ifdef DEBUG
7021 			/* pass to the default handler so we can debug things */
7022 			power_req.request_type = PMR_PPM_PRE_DETACH;
7023 			power_req.req.ppm_config_req.who = dip;
7024 			(void) pm_ctlops(NULL, dip,
7025 			    DDI_CTLOPS_POWER, &power_req, &result);
7026 #endif
7027 			cp->ppc_ppm = NULL;
7028 		}
7029 		break;
7030 
7031 	default:
7032 		break;
7033 	}
7034 }
7035 
7036 /*
7037  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7038  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7039  * make an entry to record the details, which includes certain flag settings.
7040  */
7041 static void
7042 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7043     int wasvolpmd, major_t major)
7044 {
7045 	PMD_FUNC(pmf, "record_invol_path")
7046 	major_t pm_path_to_major(char *);
7047 	size_t plen;
7048 	pm_noinvol_t *ip, *np, *pp;
7049 	pp = NULL;
7050 
7051 	plen = strlen(path) + 1;
7052 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7053 	np->ni_size = plen;
7054 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7055 	np->ni_noinvolpm = noinvolpm;
7056 	np->ni_volpmd = volpmd;
7057 	np->ni_wasvolpmd = wasvolpmd;
7058 	np->ni_flags = flags;
7059 	(void) strcpy(np->ni_path, path);
7060 	/*
7061 	 * If we haven't actually seen the node attached, it is hard to figure
7062 	 * out its major.  If we could hold the node by path, we would be much
7063 	 * happier here.
7064 	 */
7065 	if (major == (major_t)-1) {
7066 		np->ni_major = pm_path_to_major(path);
7067 	} else {
7068 		np->ni_major = major;
7069 	}
7070 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7071 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7072 		int comp = strcmp(path, ip->ni_path);
7073 		if (comp < 0) {
7074 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7075 			    pmf, path, ip->ni_path))
7076 			/* insert before current entry */
7077 			np->ni_next = ip;
7078 			if (pp) {
7079 				pp->ni_next = np;
7080 			} else {
7081 				pm_noinvol_head = np;
7082 			}
7083 			rw_exit(&pm_noinvol_rwlock);
7084 #ifdef DEBUG
7085 			if (pm_debug & PMD_NOINVOL)
7086 				pr_noinvol("record_invol_path exit0");
7087 #endif
7088 			return;
7089 		} else if (comp == 0) {
7090 			panic("%s already in pm_noinvol list", path);
7091 		}
7092 	}
7093 	/*
7094 	 * If we did not find an entry in the list that this should go before,
7095 	 * then it must go at the end
7096 	 */
7097 	if (pp) {
7098 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7099 		    pp->ni_path))
7100 		ASSERT(pp->ni_next == 0);
7101 		pp->ni_next = np;
7102 	} else {
7103 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7104 		ASSERT(!pm_noinvol_head);
7105 		pm_noinvol_head = np;
7106 	}
7107 	rw_exit(&pm_noinvol_rwlock);
7108 #ifdef DEBUG
7109 	if (pm_debug & PMD_NOINVOL)
7110 		pr_noinvol("record_invol_path exit");
7111 #endif
7112 }
7113 
7114 void
7115 pm_record_invol(dev_info_t *dip)
7116 {
7117 	char *pathbuf;
7118 	int pm_all_components_off(dev_info_t *);
7119 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7120 
7121 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7122 	(void) ddi_pathname(dip, pathbuf);
7123 
7124 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7125 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7126 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7127 
7128 	/*
7129 	 * If this child's detach will be holding up its ancestors, then we
7130 	 * allow for an exception to that if all children of this type have
7131 	 * gone down voluntarily.
7132 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7133 	 */
7134 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7135 	    dip);
7136 	kmem_free(pathbuf, MAXPATHLEN);
7137 }
7138 
7139 void
7140 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7141 {
7142 	dev_info_t *dip = cp->ppc_dip;
7143 	int result;
7144 	power_req_t power_req;
7145 
7146 	switch (cp->ppc_cmd) {
7147 	case DDI_DETACH:
7148 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7149 			power_req.request_type = PMR_PPM_POST_DETACH;
7150 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7151 			power_req.req.ppm_config_req.result = ret;
7152 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7153 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7154 			    DDI_CTLOPS_POWER, &power_req, &result);
7155 		}
7156 #ifdef DEBUG
7157 		else {
7158 			power_req.request_type = PMR_PPM_POST_DETACH;
7159 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7160 			power_req.req.ppm_config_req.result = ret;
7161 			(void) pm_ctlops(NULL, cp->ppc_dip,
7162 			    DDI_CTLOPS_POWER, &power_req, &result);
7163 		}
7164 #endif
7165 		if (ret == DDI_SUCCESS) {
7166 			/*
7167 			 * For hotplug detach we assume it is *really* gone
7168 			 */
7169 			if (cp->ppc_cmd == DDI_DETACH &&
7170 			    ((DEVI(dip)->devi_pm_flags &
7171 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7172 			    DEVI(dip)->devi_pm_noinvolpm))
7173 				pm_record_invol(dip);
7174 			DEVI(dip)->devi_pm_flags &=
7175 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7176 
7177 			/*
7178 			 * If console fb is detaching, then we don't need to
7179 			 * worry any more about it going off (pm_detaching has
7180 			 * brought up all components)
7181 			 */
7182 			if (PM_IS_CFB(dip)) {
7183 				mutex_enter(&pm_cfb_lock);
7184 				ASSERT(cfb_dip_detaching);
7185 				ASSERT(cfb_dip == NULL);
7186 				ASSERT(pm_cfb_comps_off == 0);
7187 				cfb_dip_detaching = NULL;
7188 				mutex_exit(&pm_cfb_lock);
7189 			}
7190 			pm_stop(dip);	/* make it permanent */
7191 		} else {
7192 			if (PM_IS_CFB(dip)) {
7193 				mutex_enter(&pm_cfb_lock);
7194 				ASSERT(cfb_dip_detaching);
7195 				ASSERT(cfb_dip == NULL);
7196 				ASSERT(pm_cfb_comps_off == 0);
7197 				cfb_dip = cfb_dip_detaching;
7198 				cfb_dip_detaching = NULL;
7199 				mutex_exit(&pm_cfb_lock);
7200 			}
7201 			pm_detach_failed(dip);	/* resume power management */
7202 		}
7203 		break;
7204 	case DDI_PM_SUSPEND:
7205 		break;
7206 	case DDI_SUSPEND:
7207 		break;				/* legal, but nothing to do */
7208 	default:
7209 #ifdef DEBUG
7210 		panic("pm_post_detach: unrecognized cmd %d for detach",
7211 		    cp->ppc_cmd);
7212 		/*NOTREACHED*/
7213 #else
7214 		break;
7215 #endif
7216 	}
7217 }
7218 
7219 /*
7220  * Called after vfs_mountroot has got the clock started to fix up timestamps
7221  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7222  * devices look busy but have a 0 busycnt
7223  */
7224 int
7225 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7226 {
7227 	_NOTE(ARGUNUSED(arg))
7228 
7229 	pm_info_t *info = PM_GET_PM_INFO(dip);
7230 	struct pm_component *cp;
7231 	int i;
7232 
7233 	if (!info)
7234 		return (DDI_WALK_CONTINUE);
7235 	PM_LOCK_BUSY(dip);
7236 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7237 		cp = PM_CP(dip, i);
7238 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7239 			cp->pmc_timestamp = gethrestime_sec();
7240 	}
7241 	PM_UNLOCK_BUSY(dip);
7242 	return (DDI_WALK_CONTINUE);
7243 }
7244 
7245 /*
7246  * Called at attach time to see if the device being attached has a record in
7247  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7248  * parents and set a flag in the dip
7249  */
7250 void
7251 pm_noinvol_specd(dev_info_t *dip)
7252 {
7253 	PMD_FUNC(pmf, "noinvol_specd")
7254 	char *pathbuf;
7255 	pm_noinvol_t *ip, *pp = NULL;
7256 	int wasvolpmd;
7257 	int found = 0;
7258 
7259 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7260 		return;
7261 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7262 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7263 	(void) ddi_pathname(dip, pathbuf);
7264 
7265 	PM_LOCK_DIP(dip);
7266 	DEVI(dip)->devi_pm_volpmd = 0;
7267 	DEVI(dip)->devi_pm_noinvolpm = 0;
7268 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7269 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7270 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7271 		    pmf, pathbuf, ip->ni_path))
7272 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7273 			found++;
7274 			break;
7275 		}
7276 	}
7277 	rw_exit(&pm_noinvol_rwlock);
7278 	if (!found) {
7279 		PM_UNLOCK_DIP(dip);
7280 		kmem_free(pathbuf, MAXPATHLEN);
7281 		return;
7282 	}
7283 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7284 	pp = NULL;
7285 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7286 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7287 		    pmf, pathbuf, ip->ni_path))
7288 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7289 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7290 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7291 			/*
7292 			 * Handle special case of console fb
7293 			 */
7294 			if (PM_IS_CFB(dip)) {
7295 				mutex_enter(&pm_cfb_lock);
7296 				cfb_dip = dip;
7297 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7298 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7299 				mutex_exit(&pm_cfb_lock);
7300 			}
7301 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7302 			ASSERT((DEVI(dip)->devi_pm_flags &
7303 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7304 			    DEVI(dip)->devi_pm_noinvolpm);
7305 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7306 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7307 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7308 			    ip->ni_noinvolpm, ip->ni_volpmd,
7309 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7310 			/*
7311 			 * free the entry in hopes the list will now be empty
7312 			 * and we won't have to search it any more until the
7313 			 * device detaches
7314 			 */
7315 			if (pp) {
7316 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7317 				    pmf, ip->ni_path, pp->ni_path))
7318 				pp->ni_next = ip->ni_next;
7319 			} else {
7320 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7321 				    pmf, ip->ni_path))
7322 				ASSERT(pm_noinvol_head == ip);
7323 				pm_noinvol_head = ip->ni_next;
7324 			}
7325 			PM_UNLOCK_DIP(dip);
7326 			wasvolpmd = ip->ni_wasvolpmd;
7327 			rw_exit(&pm_noinvol_rwlock);
7328 			kmem_free(ip->ni_path, ip->ni_size);
7329 			kmem_free(ip, sizeof (*ip));
7330 			/*
7331 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7332 			 * (and volpmd if appropriate)
7333 			 */
7334 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7335 			    wasvolpmd, pathbuf, dip);
7336 #ifdef DEBUG
7337 			if (pm_debug & PMD_NOINVOL)
7338 				pr_noinvol("noinvol_specd exit");
7339 #endif
7340 			kmem_free(pathbuf, MAXPATHLEN);
7341 			return;
7342 		}
7343 	}
7344 	kmem_free(pathbuf, MAXPATHLEN);
7345 	rw_exit(&pm_noinvol_rwlock);
7346 	PM_UNLOCK_DIP(dip);
7347 }
7348 
7349 int
7350 pm_all_components_off(dev_info_t *dip)
7351 {
7352 	int i;
7353 	pm_component_t *cp;
7354 
7355 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7356 		cp = PM_CP(dip, i);
7357 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7358 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7359 			return (0);
7360 	}
7361 	return (1);	/* all off */
7362 }
7363 
7364 /*
7365  * Make sure that all "no involuntary power cycles" devices are attached.
7366  * Called before doing a cpr suspend to make sure the driver has a say about
7367  * the power cycle
7368  */
7369 int
7370 pm_reattach_noinvol(void)
7371 {
7372 	PMD_FUNC(pmf, "reattach_noinvol")
7373 	pm_noinvol_t *ip;
7374 	char *path;
7375 	dev_info_t *dip;
7376 
7377 	/*
7378 	 * Prevent the modunload thread from unloading any modules until we
7379 	 * have completely stopped all kernel threads.
7380 	 */
7381 	modunload_disable();
7382 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7383 		/*
7384 		 * Forget we'v ever seen any entry
7385 		 */
7386 		ip->ni_persistent = 0;
7387 	}
7388 restart:
7389 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7390 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7391 		major_t maj;
7392 		maj = ip->ni_major;
7393 		path = ip->ni_path;
7394 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7395 			if (ip->ni_persistent) {
7396 				/*
7397 				 * If we weren't able to make this entry
7398 				 * go away, then we give up, as
7399 				 * holding/attaching the driver ought to have
7400 				 * resulted in this entry being deleted
7401 				 */
7402 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7403 				    "(%s|%d)\n", pmf, ip->ni_path,
7404 				    ddi_major_to_name(maj), (int)maj))
7405 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7406 				    ip->ni_path);
7407 				modunload_enable();
7408 				rw_exit(&pm_noinvol_rwlock);
7409 				return (0);
7410 			}
7411 			ip->ni_persistent++;
7412 			rw_exit(&pm_noinvol_rwlock);
7413 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7414 			dip = e_ddi_hold_devi_by_path(path, 0);
7415 			if (dip == NULL) {
7416 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7417 				    pmf, path, (int)maj))
7418 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7419 				    "driver", path);
7420 				modunload_enable();
7421 				return (0);
7422 			} else {
7423 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7424 				/*
7425 				 * Since the modunload thread is stopped, we
7426 				 * don't have to keep the driver held, which
7427 				 * saves a ton of bookkeeping
7428 				 */
7429 				ddi_release_devi(dip);
7430 				goto restart;
7431 			}
7432 		} else {
7433 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7434 			    pmf, ip->ni_path))
7435 			continue;
7436 		}
7437 	}
7438 	rw_exit(&pm_noinvol_rwlock);
7439 	return (1);
7440 }
7441 
7442 void
7443 pm_reattach_noinvol_fini(void)
7444 {
7445 	modunload_enable();
7446 }
7447 
7448 /*
7449  * Display pm support code
7450  */
7451 
7452 
7453 /*
7454  * console frame-buffer power-mgmt gets enabled when debugging
7455  * services are not present or console fbpm override is set
7456  */
7457 void
7458 pm_cfb_setup(const char *stdout_path)
7459 {
7460 	PMD_FUNC(pmf, "cfb_setup")
7461 	extern int obpdebug;
7462 	char *devname;
7463 	dev_info_t *dip;
7464 	int devname_len;
7465 	extern dev_info_t *fbdip;
7466 
7467 	/*
7468 	 * By virtue of this function being called (from consconfig),
7469 	 * we know stdout is a framebuffer.
7470 	 */
7471 	stdout_is_framebuffer = 1;
7472 
7473 	if (obpdebug || (boothowto & RB_DEBUG)) {
7474 		if (pm_cfb_override == 0) {
7475 			/*
7476 			 * Console is frame buffer, but we want to suppress
7477 			 * pm on it because of debugging setup
7478 			 */
7479 			pm_cfb_enabled = 0;
7480 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7481 			    "console power management.");
7482 			/*
7483 			 * however, we still need to know which is the console
7484 			 * fb in order to suppress pm on it
7485 			 */
7486 		} else {
7487 			cmn_err(CE_WARN, "Kernel debugger present: see "
7488 			    "kmdb(1M) for interaction with power management.");
7489 		}
7490 	}
7491 #ifdef DEBUG
7492 	/*
7493 	 * IF console is fb and is power managed, don't do prom_printfs from
7494 	 * pm debug macro
7495 	 */
7496 	if (pm_cfb_enabled) {
7497 		if (pm_debug)
7498 			prom_printf("pm debug output will be to log only\n");
7499 		pm_divertdebug++;
7500 	}
7501 #endif
7502 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7503 	devname_len = strlen(devname) + 1;
7504 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7505 	/* if the driver is attached */
7506 	if ((dip = fbdip) != NULL) {
7507 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7508 		    PM_DEVICE(dip)))
7509 		/*
7510 		 * We set up here as if the driver were power manageable in case
7511 		 * we get a later attach of a pm'able driver (which would result
7512 		 * in a panic later)
7513 		 */
7514 		cfb_dip = dip;
7515 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7516 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7517 		    PM_DEVICE(dip)))
7518 #ifdef DEBUG
7519 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7520 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7521 			    pmf, PM_DEVICE(dip)))
7522 		}
7523 #endif
7524 	} else {
7525 		char *ep;
7526 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7527 		pm_record_invol_path(devname,
7528 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7529 		    (major_t)-1);
7530 		for (ep = strrchr(devname, '/'); ep != devname;
7531 		    ep = strrchr(devname, '/')) {
7532 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7533 			*ep = '\0';
7534 			dip = pm_name_to_dip(devname, 0);
7535 			if (dip != NULL) {
7536 				/*
7537 				 * Walk up the tree incrementing
7538 				 * devi_pm_noinvolpm
7539 				 */
7540 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7541 				    0, 0, devname, dip);
7542 				break;
7543 			} else {
7544 				pm_record_invol_path(devname,
7545 				    PMC_NO_INVOL, 1, 0, 0, (major_t)-1);
7546 			}
7547 		}
7548 	}
7549 	kmem_free(devname, devname_len);
7550 }
7551 
7552 void
7553 pm_cfb_rele(void)
7554 {
7555 	mutex_enter(&pm_cfb_lock);
7556 	/*
7557 	 * this call isn't using the console any  more, it is ok to take it
7558 	 * down if the count goes to 0
7559 	 */
7560 	cfb_inuse--;
7561 	mutex_exit(&pm_cfb_lock);
7562 }
7563 
7564 /*
7565  * software interrupt handler for fbpm; this function exists because we can't
7566  * bring up the frame buffer power from above lock level.  So if we need to,
7567  * we instead schedule a softint that runs this routine and takes us into
7568  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7569  */
7570 static uint_t
7571 pm_cfb_softint(caddr_t int_handler_arg)
7572 {
7573 	_NOTE(ARGUNUSED(int_handler_arg))
7574 	int rval = DDI_INTR_UNCLAIMED;
7575 
7576 	mutex_enter(&pm_cfb_lock);
7577 	if (pm_soft_pending) {
7578 		mutex_exit(&pm_cfb_lock);
7579 		debug_enter((char *)NULL);
7580 		/* acquired in debug_enter before calling pm_cfb_trigger */
7581 		pm_cfb_rele();
7582 		mutex_enter(&pm_cfb_lock);
7583 		pm_soft_pending = 0;
7584 		mutex_exit(&pm_cfb_lock);
7585 		rval = DDI_INTR_CLAIMED;
7586 	} else
7587 		mutex_exit(&pm_cfb_lock);
7588 
7589 	return (rval);
7590 }
7591 
7592 void
7593 pm_cfb_setup_intr(void)
7594 {
7595 	PMD_FUNC(pmf, "cfb_setup_intr")
7596 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7597 	void pm_cfb_check_and_powerup(void);
7598 
7599 	if (!stdout_is_framebuffer) {
7600 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7601 		return;
7602 	}
7603 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7604 #ifdef DEBUG
7605 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7606 #endif
7607 	/*
7608 	 * setup software interrupt handler
7609 	 */
7610 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7611 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7612 		panic("pm: unable to register soft intr.");
7613 
7614 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7615 }
7616 
7617 /*
7618  * Checks to see if it is safe to write to the console wrt power management
7619  * (i.e. if the console is a framebuffer, then it must be at full power)
7620  * returns 1 when power is off (power-up is needed)
7621  * returns 0 when power is on (power-up not needed)
7622  */
7623 int
7624 pm_cfb_check_and_hold(void)
7625 {
7626 	/*
7627 	 * cfb_dip is set iff console is a power manageable frame buffer
7628 	 * device
7629 	 */
7630 	extern int modrootloaded;
7631 
7632 	mutex_enter(&pm_cfb_lock);
7633 	cfb_inuse++;
7634 	ASSERT(cfb_inuse);	/* wrap? */
7635 	if (modrootloaded && cfb_dip) {
7636 		/*
7637 		 * don't power down the frame buffer, the prom is using it
7638 		 */
7639 		if (pm_cfb_comps_off) {
7640 			mutex_exit(&pm_cfb_lock);
7641 			return (1);
7642 		}
7643 	}
7644 	mutex_exit(&pm_cfb_lock);
7645 	return (0);
7646 }
7647 
7648 /*
7649  * turn on cfb power (which is known to be off).
7650  * Must be called below lock level!
7651  */
7652 void
7653 pm_cfb_powerup(void)
7654 {
7655 	pm_info_t *info;
7656 	int norm;
7657 	int ccount, ci;
7658 	int unused;
7659 #ifdef DEBUG
7660 	/*
7661 	 * Can't reenter prom_prekern, so suppress pm debug messages
7662 	 * (still go to circular buffer).
7663 	 */
7664 	mutex_enter(&pm_debug_lock);
7665 	pm_divertdebug++;
7666 	mutex_exit(&pm_debug_lock);
7667 #endif
7668 	info = PM_GET_PM_INFO(cfb_dip);
7669 	ASSERT(info);
7670 
7671 	ccount = PM_NUMCMPTS(cfb_dip);
7672 	for (ci = 0; ci < ccount; ci++) {
7673 		norm = pm_get_normal_power(cfb_dip, ci);
7674 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7675 		    PM_CANBLOCK_BYPASS, 0, &unused);
7676 	}
7677 #ifdef DEBUG
7678 	mutex_enter(&pm_debug_lock);
7679 	pm_divertdebug--;
7680 	mutex_exit(&pm_debug_lock);
7681 #endif
7682 }
7683 
7684 /*
7685  * Check if the console framebuffer is powered up.  If not power it up.
7686  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7687  * must be released by calling pm_cfb_rele when the console fb operation
7688  * is completed.
7689  */
7690 void
7691 pm_cfb_check_and_powerup(void)
7692 {
7693 	if (pm_cfb_check_and_hold())
7694 		pm_cfb_powerup();
7695 }
7696 
7697 /*
7698  * Trigger a low level interrupt to power up console frame buffer.
7699  */
7700 void
7701 pm_cfb_trigger(void)
7702 {
7703 	if (cfb_dip == NULL)
7704 		return;
7705 
7706 	mutex_enter(&pm_cfb_lock);
7707 	/*
7708 	 * If machine appears to be hung, pulling the keyboard connector of
7709 	 * the console will cause a high level interrupt and go to debug_enter.
7710 	 * But, if the fb is powered down, this routine will be called to bring
7711 	 * it up (by generating a softint to do the work).  If soft interrupts
7712 	 * are not running, and the keyboard connector is pulled again, the
7713 	 * following code detects this condition and calls panic which allows
7714 	 * the fb to be brought up from high level.
7715 	 *
7716 	 * If two nearly simultaneous calls to debug_enter occur (both from
7717 	 * high level) the code described above will cause a panic.
7718 	 */
7719 	if (lbolt <= pm_soft_pending) {
7720 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7721 		panic(panicstr);	/* does a power up at any intr level */
7722 		/* NOTREACHED */
7723 	}
7724 	pm_soft_pending = lbolt;
7725 	mutex_exit(&pm_cfb_lock);
7726 	ddi_trigger_softintr(pm_soft_id);
7727 }
7728 
7729 major_t
7730 pm_path_to_major(char *path)
7731 {
7732 	PMD_FUNC(pmf, "path_to_major")
7733 	char *np, *ap, *bp;
7734 	major_t ret;
7735 	size_t len;
7736 	static major_t i_path_to_major(char *, char *);
7737 
7738 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7739 
7740 	np = strrchr(path, '/');
7741 	if (np != NULL)
7742 		np++;
7743 	else
7744 		np = path;
7745 	len = strlen(np) + 1;
7746 	bp = kmem_alloc(len, KM_SLEEP);
7747 	(void) strcpy(bp, np);
7748 	if ((ap = strchr(bp, '@')) != NULL) {
7749 		*ap = '\0';
7750 	}
7751 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7752 	ret = i_path_to_major(path, np);
7753 	kmem_free(bp, len);
7754 	return (ret);
7755 }
7756 
7757 #ifdef DEBUG
7758 
7759 char *pm_msgp;
7760 char *pm_bufend;
7761 char *pm_msgbuf = NULL;
7762 int   pm_logpages = 2;
7763 
7764 #define	PMLOGPGS	pm_logpages
7765 
7766 /*PRINTFLIKE1*/
7767 void
7768 pm_log(const char *fmt, ...)
7769 {
7770 	va_list adx;
7771 	size_t size;
7772 
7773 	mutex_enter(&pm_debug_lock);
7774 	if (pm_msgbuf == NULL) {
7775 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7776 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7777 		pm_msgp = pm_msgbuf;
7778 	}
7779 	va_start(adx, fmt);
7780 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7781 	va_end(adx);
7782 	va_start(adx, fmt);
7783 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7784 		bzero(pm_msgp, pm_bufend - pm_msgp);
7785 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7786 		if (!pm_divertdebug)
7787 			prom_printf("%s", pm_msgp);
7788 		pm_msgp = pm_msgbuf + size;
7789 	} else {
7790 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7791 		if (!pm_divertdebug)
7792 			prom_printf("%s", pm_msgp);
7793 		pm_msgp += size;
7794 	}
7795 	va_end(adx);
7796 	mutex_exit(&pm_debug_lock);
7797 }
7798 #endif	/* DEBUG */
7799 
7800 /*
7801  * We want to save the state of any directly pm'd devices over the suspend/
7802  * resume process so that we can put them back the way the controlling
7803  * process left them.
7804  */
7805 void
7806 pm_save_direct_levels(void)
7807 {
7808 	pm_processes_stopped = 1;
7809 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7810 }
7811 
7812 static int
7813 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7814 {
7815 	_NOTE(ARGUNUSED(arg))
7816 	int i;
7817 	int *ip;
7818 	pm_info_t *info = PM_GET_PM_INFO(dip);
7819 
7820 	if (!info)
7821 		return (DDI_WALK_CONTINUE);
7822 
7823 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7824 		if (PM_NUMCMPTS(dip) > 2) {
7825 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7826 			    sizeof (int), KM_SLEEP);
7827 			ip = info->pmi_lp;
7828 		} else {
7829 			ip = info->pmi_levels;
7830 		}
7831 		/* autopm and processes are stopped, ok not to lock power */
7832 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7833 			*ip++ = PM_CURPOWER(dip, i);
7834 		/*
7835 		 * There is a small window between stopping the
7836 		 * processes and setting pm_processes_stopped where
7837 		 * a driver could get hung up in a pm_raise_power()
7838 		 * call.  Free any such driver now.
7839 		 */
7840 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7841 	}
7842 
7843 	return (DDI_WALK_CONTINUE);
7844 }
7845 
7846 void
7847 pm_restore_direct_levels(void)
7848 {
7849 	/*
7850 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
7851 	 * threads failed) then we don't want to try to restore them
7852 	 */
7853 	if (!pm_processes_stopped)
7854 		return;
7855 
7856 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
7857 	pm_processes_stopped = 0;
7858 }
7859 
7860 static int
7861 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
7862 {
7863 	_NOTE(ARGUNUSED(arg))
7864 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
7865 	int i, nc, result;
7866 	int *ip;
7867 
7868 	pm_info_t *info = PM_GET_PM_INFO(dip);
7869 	if (!info)
7870 		return (DDI_WALK_CONTINUE);
7871 
7872 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7873 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
7874 			ip = &info->pmi_lp[nc - 1];
7875 		} else {
7876 			ip = &info->pmi_levels[nc - 1];
7877 		}
7878 		/*
7879 		 * Because fb drivers fail attempts to turn off the
7880 		 * fb when the monitor is on, but treat a request to
7881 		 * turn on the monitor as a request to turn on the
7882 		 * fb too, we process components in descending order
7883 		 * Because autopm is disabled and processes aren't
7884 		 * running, it is ok to examine current power outside
7885 		 * of the power lock
7886 		 */
7887 		for (i = nc - 1; i >= 0; i--, ip--) {
7888 			if (PM_CURPOWER(dip, i) == *ip)
7889 				continue;
7890 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
7891 			    PM_CANBLOCK_BYPASS, 0, &result) !=
7892 				DDI_SUCCESS) {
7893 				cmn_err(CE_WARN, "cpr: unable "
7894 				    "to restore power level of "
7895 				    "component %d of directly "
7896 				    "power manged device %s@%s"
7897 				    " to %d",
7898 				    i, PM_NAME(dip),
7899 				    PM_ADDR(dip), *ip);
7900 				PMD(PMD_FAIL, ("%s: failed to restore "
7901 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
7902 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
7903 				    PM_CURPOWER(dip, i), *ip, result))
7904 			}
7905 		}
7906 		if (nc > 2) {
7907 			kmem_free(info->pmi_lp, nc * sizeof (int));
7908 			info->pmi_lp = NULL;
7909 		}
7910 	}
7911 	return (DDI_WALK_CONTINUE);
7912 }
7913 
7914 /*
7915  * Stolen from the bootdev module
7916  * attempt to convert a path to a major number
7917  */
7918 static major_t
7919 i_path_to_major(char *path, char *leaf_name)
7920 {
7921 	extern major_t path_to_major(char *pathname);
7922 	major_t maj;
7923 
7924 	if ((maj = path_to_major(path)) == (major_t)-1) {
7925 		maj = ddi_name_to_major(leaf_name);
7926 	}
7927 
7928 	return (maj);
7929 }
7930 
7931 /*
7932  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
7933  * An entry in the list means that the device is detached, so we need to
7934  * adjust its ancestors as if they had just seen this attach, and any detached
7935  * ancestors need to have their list entries adjusted.
7936  */
7937 void
7938 pm_driver_removed(major_t major)
7939 {
7940 	static void i_pm_driver_removed(major_t major);
7941 
7942 	/*
7943 	 * Serialize removal of drivers. This is to keep ancestors of
7944 	 * a node that is being deleted from getting deleted and added back
7945 	 * with different counters.
7946 	 */
7947 	mutex_enter(&pm_remdrv_lock);
7948 	i_pm_driver_removed(major);
7949 	mutex_exit(&pm_remdrv_lock);
7950 }
7951 
7952 /*
7953  * This routine is called recursively by pm_noinvol_process_ancestors()
7954  */
7955 static void
7956 i_pm_driver_removed(major_t major)
7957 {
7958 	PMD_FUNC(pmf, "driver_removed")
7959 	static void adjust_ancestors(char *, int);
7960 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
7961 	static void pm_noinvol_process_ancestors(char *);
7962 	pm_noinvol_t *ip, *pp = NULL;
7963 	int wasvolpmd;
7964 	ASSERT(major != (major_t)-1);
7965 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
7966 again:
7967 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7968 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7969 		if (major != ip->ni_major)
7970 			continue;
7971 		/*
7972 		 * If it is an ancestor of no-invol node, which is
7973 		 * not removed, skip it. This is to cover the case of
7974 		 * ancestor removed without removing its descendants.
7975 		 */
7976 		if (pm_is_noinvol_ancestor(ip)) {
7977 			ip->ni_flags |= PMC_DRIVER_REMOVED;
7978 			continue;
7979 		}
7980 		wasvolpmd = ip->ni_wasvolpmd;
7981 		/*
7982 		 * remove the entry from the list
7983 		 */
7984 		if (pp) {
7985 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
7986 			    pmf, ip->ni_path, pp->ni_path))
7987 			pp->ni_next = ip->ni_next;
7988 		} else {
7989 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
7990 			    ip->ni_path))
7991 			ASSERT(pm_noinvol_head == ip);
7992 			pm_noinvol_head = ip->ni_next;
7993 		}
7994 		rw_exit(&pm_noinvol_rwlock);
7995 		adjust_ancestors(ip->ni_path, wasvolpmd);
7996 		/*
7997 		 * Had an ancestor been removed before this node, it would have
7998 		 * been skipped. Adjust the no-invol counters for such skipped
7999 		 * ancestors.
8000 		 */
8001 		pm_noinvol_process_ancestors(ip->ni_path);
8002 		kmem_free(ip->ni_path, ip->ni_size);
8003 		kmem_free(ip, sizeof (*ip));
8004 		goto again;
8005 	}
8006 	rw_exit(&pm_noinvol_rwlock);
8007 }
8008 
8009 /*
8010  * returns 1, if *aip is a ancestor of a no-invol node
8011  *	   0, otherwise
8012  */
8013 static int
8014 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8015 {
8016 	pm_noinvol_t *ip;
8017 
8018 	ASSERT(strlen(aip->ni_path) != 0);
8019 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8020 		if (ip == aip)
8021 			continue;
8022 		/*
8023 		 * To be an ancestor, the path must be an initial substring of
8024 		 * the descendent, and end just before a '/' in the
8025 		 * descendent's path.
8026 		 */
8027 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8028 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8029 			return (1);
8030 	}
8031 	return (0);
8032 }
8033 
8034 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8035 /*
8036  * scan through the pm_noinvolpm list adjusting ancestors of the current
8037  * node;  Modifies string *path.
8038  */
8039 static void
8040 adjust_ancestors(char *path, int wasvolpmd)
8041 {
8042 	PMD_FUNC(pmf, "adjust_ancestors")
8043 	char *cp;
8044 	pm_noinvol_t *lp;
8045 	pm_noinvol_t *pp = NULL;
8046 	major_t locked = (major_t)UINT_MAX;
8047 	dev_info_t *dip;
8048 	char	*pathbuf;
8049 	size_t pathbuflen = strlen(path) + 1;
8050 
8051 	/*
8052 	 * First we look up the ancestor's dip.  If we find it, then we
8053 	 * adjust counts up the tree
8054 	 */
8055 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8056 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8057 	(void) strcpy(pathbuf, path);
8058 	cp = strrchr(pathbuf, '/');
8059 	if (cp == NULL)	{
8060 		/* if no ancestors, then nothing to do */
8061 		kmem_free(pathbuf, pathbuflen);
8062 		return;
8063 	}
8064 	*cp = '\0';
8065 	dip = pm_name_to_dip(pathbuf, 1);
8066 	if (dip != NULL) {
8067 		locked = PM_MAJOR(dip);
8068 
8069 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8070 		    path, dip);
8071 
8072 		if (locked != (major_t)UINT_MAX)
8073 			ddi_release_devi(dip);
8074 	} else {
8075 		char *apath;
8076 		size_t len = strlen(pathbuf) + 1;
8077 		int  lock_held = 1;
8078 
8079 		/*
8080 		 * Now check for ancestors that exist only in the list
8081 		 */
8082 		apath = kmem_alloc(len, KM_SLEEP);
8083 		(void) strcpy(apath, pathbuf);
8084 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8085 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8086 			/*
8087 			 * This can only happen once.  Since we have to drop
8088 			 * the lock, we need to extract the relevant info.
8089 			 */
8090 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8091 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8092 				    lp->ni_path, lp->ni_noinvolpm,
8093 				    lp->ni_noinvolpm - 1))
8094 				lp->ni_noinvolpm--;
8095 				if (wasvolpmd && lp->ni_volpmd) {
8096 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8097 					    "%d\n", pmf, lp->ni_path,
8098 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8099 					lp->ni_volpmd--;
8100 				}
8101 				/*
8102 				 * remove the entry from the list, if there
8103 				 * are no more no-invol descendants and node
8104 				 * itself is not a no-invol node.
8105 				 */
8106 				if (!(lp->ni_noinvolpm ||
8107 				    (lp->ni_flags & PMC_NO_INVOL))) {
8108 					ASSERT(lp->ni_volpmd == 0);
8109 					if (pp) {
8110 						PMD(PMD_NOINVOL, ("%s: freeing "
8111 						    "%s, prev is %s\n", pmf,
8112 						    lp->ni_path, pp->ni_path))
8113 						pp->ni_next = lp->ni_next;
8114 					} else {
8115 						PMD(PMD_NOINVOL, ("%s: free %s "
8116 						    "head\n", pmf, lp->ni_path))
8117 						ASSERT(pm_noinvol_head == lp);
8118 						pm_noinvol_head = lp->ni_next;
8119 					}
8120 					lock_held = 0;
8121 					rw_exit(&pm_noinvol_rwlock);
8122 					adjust_ancestors(apath, wasvolpmd);
8123 					/* restore apath */
8124 					(void) strcpy(apath, pathbuf);
8125 					kmem_free(lp->ni_path, lp->ni_size);
8126 					kmem_free(lp, sizeof (*lp));
8127 				}
8128 				break;
8129 			}
8130 		}
8131 		if (lock_held)
8132 			rw_exit(&pm_noinvol_rwlock);
8133 		adjust_ancestors(apath, wasvolpmd);
8134 		kmem_free(apath, len);
8135 	}
8136 	kmem_free(pathbuf, pathbuflen);
8137 }
8138 
8139 /*
8140  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8141  * which were skipped even though their drivers were removed.
8142  */
8143 static void
8144 pm_noinvol_process_ancestors(char *path)
8145 {
8146 	pm_noinvol_t *lp;
8147 
8148 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8149 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8150 		if (strstr(path, lp->ni_path) &&
8151 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8152 			rw_exit(&pm_noinvol_rwlock);
8153 			i_pm_driver_removed(lp->ni_major);
8154 			return;
8155 		}
8156 	}
8157 	rw_exit(&pm_noinvol_rwlock);
8158 }
8159 
8160 /*
8161  * Returns true if (detached) device needs to be kept up because it exported the
8162  * "no-involuntary-power-cycles" property or we're pretending it did (console
8163  * fb case) or it is an ancestor of such a device and has used up the "one
8164  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8165  * upon detach.  In any event, we need an exact hit on the path or we return
8166  * false.
8167  */
8168 int
8169 pm_noinvol_detached(char *path)
8170 {
8171 	PMD_FUNC(pmf, "noinvol_detached")
8172 	pm_noinvol_t *ip;
8173 	int ret = 0;
8174 
8175 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8176 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8177 		if (strcmp(path, ip->ni_path) == 0) {
8178 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8179 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8180 				    "%s\n", pmf, path))
8181 				ret = 1;
8182 				break;
8183 			}
8184 #ifdef	DEBUG
8185 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8186 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8187 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8188 				    path))
8189 #endif
8190 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8191 			break;
8192 		}
8193 	}
8194 	rw_exit(&pm_noinvol_rwlock);
8195 	return (ret);
8196 }
8197 
8198 int
8199 pm_is_cfb(dev_info_t *dip)
8200 {
8201 	return (dip == cfb_dip);
8202 }
8203 
8204 #ifdef	DEBUG
8205 /*
8206  * Return true if all components of the console frame buffer are at
8207  * "normal" power, i.e., fully on.  For the case where the console is not
8208  * a framebuffer, we also return true
8209  */
8210 int
8211 pm_cfb_is_up(void)
8212 {
8213 	return (pm_cfb_comps_off == 0);
8214 }
8215 #endif
8216 
8217 /*
8218  * Preventing scan from powering down the node by incrementing the
8219  * kidsupcnt.
8220  */
8221 void
8222 pm_hold_power(dev_info_t *dip)
8223 {
8224 	e_pm_hold_rele_power(dip, 1);
8225 }
8226 
8227 /*
8228  * Releasing the hold by decrementing the kidsupcnt allowing scan
8229  * to power down the node if all conditions are met.
8230  */
8231 void
8232 pm_rele_power(dev_info_t *dip)
8233 {
8234 	e_pm_hold_rele_power(dip, -1);
8235 }
8236 
8237 /*
8238  * A wrapper of pm_all_to_normal() to power up a dip
8239  * to its normal level
8240  */
8241 int
8242 pm_powerup(dev_info_t *dip)
8243 {
8244 	PMD_FUNC(pmf, "pm_powerup")
8245 
8246 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8247 	ASSERT(!(servicing_interrupt()));
8248 
8249 	/*
8250 	 * in case this node is not already participating pm
8251 	 */
8252 	if (!PM_GET_PM_INFO(dip)) {
8253 		if (!DEVI_IS_ATTACHING(dip))
8254 			return (DDI_SUCCESS);
8255 		if (pm_start(dip) != DDI_SUCCESS)
8256 			return (DDI_FAILURE);
8257 		if (!PM_GET_PM_INFO(dip))
8258 			return (DDI_SUCCESS);
8259 	}
8260 
8261 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8262 }
8263 
8264 int
8265 pm_rescan_walk(dev_info_t *dip, void *arg)
8266 {
8267 	_NOTE(ARGUNUSED(arg))
8268 
8269 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8270 		return (DDI_WALK_CONTINUE);
8271 
8272 	/*
8273 	 * Currently pm_cpr_callb/resume code is the only caller
8274 	 * and it needs to make sure that stopped scan get
8275 	 * reactivated. Otherwise, rescan walk needn't reactive
8276 	 * stopped scan.
8277 	 */
8278 	pm_scan_init(dip);
8279 
8280 	(void) pm_rescan(dip);
8281 	return (DDI_WALK_CONTINUE);
8282 }
8283 
8284 static dev_info_t *
8285 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8286 {
8287 	dev_info_t *wdip, *pdip;
8288 
8289 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8290 		pdip = ddi_get_parent(wdip);
8291 		if (pdip == dip)
8292 			return (wdip);
8293 	}
8294 	return (NULL);
8295 }
8296 
8297 int
8298 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8299     void *arg, void *result)
8300 {
8301 	PMD_FUNC(pmf, "bp_bus_power")
8302 	dev_info_t	*cdip;
8303 	pm_info_t	*cinfo;
8304 	pm_bp_child_pwrchg_t	*bpc;
8305 	pm_sp_misc_t		*pspm;
8306 	pm_bp_nexus_pwrup_t *bpn;
8307 	pm_bp_child_pwrchg_t new_bpc;
8308 	pm_bp_noinvol_t *bpi;
8309 	dev_info_t *tdip;
8310 	char *pathbuf;
8311 	int		ret = DDI_SUCCESS;
8312 	int		errno = 0;
8313 	pm_component_t *cp;
8314 
8315 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8316 	    pm_decode_op(op)))
8317 	switch (op) {
8318 	case BUS_POWER_CHILD_PWRCHG:
8319 		bpc = (pm_bp_child_pwrchg_t *)arg;
8320 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8321 		tdip = bpc->bpc_dip;
8322 		cdip = pm_get_next_descendent(dip, tdip);
8323 		cinfo = PM_GET_PM_INFO(cdip);
8324 		if (cdip != tdip) {
8325 			/*
8326 			 * If the node is an involved parent, it needs to
8327 			 * power up the node as it is needed.  There is nothing
8328 			 * else the framework can do here.
8329 			 */
8330 			if (PM_WANTS_NOTIFICATION(cdip)) {
8331 				PMD(PMD_SET, ("%s: call bus_power for "
8332 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8333 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8334 				    impl_arg, op, arg, result));
8335 			}
8336 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8337 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8338 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8339 			/*
8340 			 * we presume that the parent needs to be up in
8341 			 * order for the child to change state (either
8342 			 * because it must already be on if the child is on
8343 			 * (and the pm_all_to_normal_nexus() will be a nop)
8344 			 * or because it will need to be on for the child
8345 			 * to come on; so we make the call regardless
8346 			 */
8347 			pm_hold_power(cdip);
8348 			if (cinfo) {
8349 				pm_canblock_t canblock = pspm->pspm_canblock;
8350 				ret = pm_all_to_normal_nexus(cdip, canblock);
8351 				if (ret != DDI_SUCCESS) {
8352 					pm_rele_power(cdip);
8353 					return (ret);
8354 				}
8355 			}
8356 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8357 			    PM_DEVICE(cdip)))
8358 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8359 			    result);
8360 			pm_rele_power(cdip);
8361 		} else {
8362 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8363 			    result);
8364 		}
8365 		return (ret);
8366 
8367 	case BUS_POWER_NEXUS_PWRUP:
8368 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8369 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8370 
8371 		if (!e_pm_valid_info(dip, NULL) ||
8372 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8373 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8374 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8375 			    pmf, PM_DEVICE(dip)))
8376 			*pspm->pspm_errnop = EIO;
8377 			*(int *)result = DDI_FAILURE;
8378 			return (DDI_FAILURE);
8379 		}
8380 
8381 		ASSERT(bpn->bpn_dip == dip);
8382 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8383 		    PM_DEVICE(dip)))
8384 		new_bpc.bpc_dip = dip;
8385 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8386 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8387 		new_bpc.bpc_comp = bpn->bpn_comp;
8388 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8389 		new_bpc.bpc_nlevel = bpn->bpn_level;
8390 		new_bpc.bpc_private = bpn->bpn_private;
8391 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8392 		    PM_LEVEL_UPONLY;
8393 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8394 		    &errno;
8395 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8396 		    (void *)&new_bpc, result);
8397 		kmem_free(pathbuf, MAXPATHLEN);
8398 		return (ret);
8399 
8400 	case BUS_POWER_NOINVOL:
8401 		bpi = (pm_bp_noinvol_t *)arg;
8402 		tdip = bpi->bpni_dip;
8403 		cdip = pm_get_next_descendent(dip, tdip);
8404 
8405 		/* In case of rem_drv, the leaf node has been removed */
8406 		if (cdip == NULL)
8407 			return (DDI_SUCCESS);
8408 
8409 		cinfo = PM_GET_PM_INFO(cdip);
8410 		if (cdip != tdip) {
8411 			if (PM_WANTS_NOTIFICATION(cdip)) {
8412 				PMD(PMD_NOINVOL,
8413 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8414 				    pmf, PM_DEVICE(cdip)))
8415 				ret = (*PM_BUS_POWER_FUNC(cdip))
8416 				    (cdip, NULL, op, arg, result);
8417 				if ((cinfo) && (ret == DDI_SUCCESS))
8418 					(void) pm_noinvol_update_node(cdip,
8419 					    bpi);
8420 				return (ret);
8421 			} else {
8422 				PMD(PMD_NOINVOL,
8423 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8424 				    PM_DEVICE(cdip)))
8425 				ret = pm_busop_bus_power(cdip, NULL, op,
8426 				    arg, result);
8427 				/*
8428 				 * Update the current node.
8429 				 */
8430 				if ((cinfo) && (ret == DDI_SUCCESS))
8431 					(void) pm_noinvol_update_node(cdip,
8432 					    bpi);
8433 				return (ret);
8434 			}
8435 		} else {
8436 			/*
8437 			 * For attach, detach, power up:
8438 			 * Do nothing for leaf node since its
8439 			 * counts are already updated.
8440 			 * For CFB and driver removal, since the
8441 			 * path and the target dip passed in is up to and incl.
8442 			 * the immediate ancestor, need to do the update.
8443 			 */
8444 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8445 			    "reached\n", pmf, PM_DEVICE(cdip)))
8446 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8447 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8448 				(void) pm_noinvol_update_node(cdip, bpi);
8449 			return (DDI_SUCCESS);
8450 		}
8451 
8452 	default:
8453 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8454 		return (DDI_FAILURE);
8455 	}
8456 }
8457 
8458 static int
8459 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8460     void *arg, void *resultp)
8461 {
8462 	_NOTE(ARGUNUSED(impl_arg))
8463 	PMD_FUNC(pmf, "bp_set_power")
8464 	pm_ppm_devlist_t *devl;
8465 	int clevel, circ;
8466 #ifdef	DEBUG
8467 	int circ_db, ccirc_db;
8468 #endif
8469 	int ret = DDI_SUCCESS;
8470 	dev_info_t *cdip;
8471 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8472 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8473 	pm_canblock_t canblock = pspm->pspm_canblock;
8474 	int scan = pspm->pspm_scan;
8475 	int comp = bpc->bpc_comp;
8476 	int olevel = bpc->bpc_olevel;
8477 	int nlevel = bpc->bpc_nlevel;
8478 	int comps_off_incr = 0;
8479 	dev_info_t *pdip = ddi_get_parent(dip);
8480 	int dodeps;
8481 	int direction = pspm->pspm_direction;
8482 	int *errnop = pspm->pspm_errnop;
8483 	char *dir = pm_decode_direction(direction);
8484 	int *iresp = (int *)resultp;
8485 	time_t	idletime, thresh;
8486 	pm_component_t *cp = PM_CP(dip, comp);
8487 	int work_type;
8488 
8489 	*iresp = DDI_SUCCESS;
8490 	*errnop = 0;
8491 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8492 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8493 	    pm_decode_op(op)))
8494 
8495 	/*
8496 	 * The following set of conditions indicate we are here to handle a
8497 	 * driver's pm_[raise|lower]_power request, but the device is being
8498 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8499 	 * we want to pm_block and pass a status back to the caller based
8500 	 * on whether the controlling process's next activity on the device
8501 	 * matches the current request or not.  This distinction tells
8502 	 * downstream functions to avoid calling into a driver or changing
8503 	 * the framework's power state.  To actually block, we need:
8504 	 *
8505 	 * PM_ISDIRECT(dip)
8506 	 *	no reason to block unless a process is directly controlling dev
8507 	 * direction != PM_LEVEL_EXACT
8508 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8509 	 * !pm_processes_stopped
8510 	 *	don't block if controlling proc already be stopped for cpr
8511 	 * canblock != PM_CANBLOCK_BYPASS
8512 	 *	our caller must not have explicitly prevented blocking
8513 	 */
8514 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8515 		PM_LOCK_DIP(dip);
8516 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8517 			/* releases dip lock */
8518 			ret = pm_busop_match_request(dip, bpc);
8519 			if (ret == EAGAIN) {
8520 				PM_LOCK_DIP(dip);
8521 				continue;
8522 			}
8523 			return (*iresp = ret);
8524 		}
8525 		PM_UNLOCK_DIP(dip);
8526 	}
8527 	/* BC device is never scanned, so power will stick until we are done */
8528 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8529 	    direction != PM_LEVEL_DOWNONLY) {
8530 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8531 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8532 		    canblock, 0, resultp) != DDI_SUCCESS) {
8533 			/* *resultp set by pm_set_power */
8534 			return (DDI_FAILURE);
8535 		}
8536 	}
8537 	if (PM_WANTS_NOTIFICATION(pdip)) {
8538 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8539 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8540 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8541 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8542 		if (ret != DDI_SUCCESS) {
8543 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8544 			    pmf, PM_DEVICE(pdip)))
8545 			return (DDI_FAILURE);
8546 		}
8547 	} else {
8548 		/*
8549 		 * Since we don't know what the actual power level is,
8550 		 * we place a power hold on the parent no matter what
8551 		 * component and level is changing.
8552 		 */
8553 		pm_hold_power(pdip);
8554 	}
8555 	PM_LOCK_POWER(dip, &circ);
8556 	clevel = PM_CURPOWER(dip, comp);
8557 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8558 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8559 	    clevel, dir))
8560 	switch (direction) {
8561 	case PM_LEVEL_UPONLY:
8562 		/* Powering up */
8563 		if (clevel >= nlevel) {
8564 			PMD(PMD_SET, ("%s: current level is already "
8565 			    "at or above the requested level.\n", pmf))
8566 			*iresp = DDI_SUCCESS;
8567 			ret = DDI_SUCCESS;
8568 			goto post_notify;
8569 		}
8570 		break;
8571 	case PM_LEVEL_EXACT:
8572 		/* specific level request */
8573 		if (clevel == nlevel && !PM_ISBC(dip)) {
8574 			PMD(PMD_SET, ("%s: current level is already "
8575 			    "at the requested level.\n", pmf))
8576 			*iresp = DDI_SUCCESS;
8577 			ret = DDI_SUCCESS;
8578 			goto post_notify;
8579 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8580 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8581 			if (!pm_cfb_enabled) {
8582 				PMD(PMD_ERROR | PMD_CFB,
8583 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8584 				*errnop = EINVAL;
8585 				*iresp = DDI_FAILURE;
8586 				ret = DDI_FAILURE;
8587 				goto post_notify;
8588 			}
8589 			mutex_enter(&pm_cfb_lock);
8590 			while (cfb_inuse) {
8591 				mutex_exit(&pm_cfb_lock);
8592 				if (delay_sig(1) == EINTR) {
8593 					ret = DDI_FAILURE;
8594 					*iresp = DDI_FAILURE;
8595 					*errnop = EINTR;
8596 					goto post_notify;
8597 				}
8598 				mutex_enter(&pm_cfb_lock);
8599 			}
8600 			mutex_exit(&pm_cfb_lock);
8601 		}
8602 		break;
8603 	case PM_LEVEL_DOWNONLY:
8604 		/* Powering down */
8605 		thresh = cur_threshold(dip, comp);
8606 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8607 		if (scan && ((PM_KUC(dip) != 0) ||
8608 		    (cp->pmc_busycount > 0) ||
8609 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8610 #ifdef	DEBUG
8611 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8612 				PMD(PMD_SET, ("%s: scan failed: "
8613 				    "kidsupcnt != 0\n", pmf))
8614 			if (cp->pmc_busycount > 0)
8615 				PMD(PMD_SET, ("%s: scan failed: "
8616 				    "device become busy\n", pmf))
8617 			if (idletime < thresh)
8618 				PMD(PMD_SET, ("%s: scan failed: device "
8619 				    "hasn't been idle long enough\n", pmf))
8620 #endif
8621 			*iresp = DDI_FAILURE;
8622 			*errnop = EBUSY;
8623 			ret = DDI_FAILURE;
8624 			goto post_notify;
8625 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8626 			PMD(PMD_SET, ("%s: current level is already at "
8627 			    "or below the requested level.\n", pmf))
8628 			*iresp = DDI_SUCCESS;
8629 			ret = DDI_SUCCESS;
8630 			goto post_notify;
8631 		}
8632 		break;
8633 	}
8634 
8635 	if (PM_IS_CFB(dip) && (comps_off_incr =
8636 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8637 		/*
8638 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8639 		 * component from full power.  Remember that we tried to
8640 		 * lower power in case it fails and we need to back out
8641 		 * the adjustment.
8642 		 */
8643 		update_comps_off(comps_off_incr, dip);
8644 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8645 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8646 		    pm_cfb_comps_off))
8647 	}
8648 
8649 	if ((*iresp = power_dev(dip,
8650 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8651 #ifdef DEBUG
8652 		/*
8653 		 * All descendents of this node should already be powered off.
8654 		 */
8655 		if (PM_CURPOWER(dip, comp) == 0) {
8656 			pm_desc_pwrchk_t pdpchk;
8657 			pdpchk.pdpc_dip = dip;
8658 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8659 			ndi_devi_enter(dip, &circ_db);
8660 			for (cdip = ddi_get_child(dip); cdip != NULL;
8661 			    cdip = ddi_get_next_sibling(cdip)) {
8662 				ndi_devi_enter(cdip, &ccirc_db);
8663 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8664 				    (void *)&pdpchk);
8665 				ndi_devi_exit(cdip, ccirc_db);
8666 			}
8667 			ndi_devi_exit(dip, circ_db);
8668 		}
8669 #endif
8670 		/*
8671 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8672 		 * back up to full power.
8673 		 */
8674 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8675 			update_comps_off(comps_off_incr, dip);
8676 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8677 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8678 			    comp, clevel, nlevel, pm_cfb_comps_off))
8679 		}
8680 		dodeps = 0;
8681 		if (POWERING_OFF(clevel, nlevel)) {
8682 			if (PM_ISBC(dip)) {
8683 				dodeps = (comp == 0);
8684 			} else {
8685 				int i;
8686 				dodeps = 1;
8687 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8688 					/* if some component still on */
8689 					if (PM_CURPOWER(dip, i)) {
8690 						dodeps = 0;
8691 						break;
8692 					}
8693 				}
8694 			}
8695 			if (dodeps)
8696 				work_type = PM_DEP_WK_POWER_OFF;
8697 		} else if (POWERING_ON(clevel, nlevel)) {
8698 			if (PM_ISBC(dip)) {
8699 				dodeps = (comp == 0);
8700 			} else {
8701 				int i;
8702 				dodeps = 1;
8703 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8704 					if (i == comp)
8705 						continue;
8706 					if (PM_CURPOWER(dip, i) > 0) {
8707 						dodeps = 0;
8708 						break;
8709 					}
8710 				}
8711 			}
8712 			if (dodeps)
8713 				work_type = PM_DEP_WK_POWER_ON;
8714 		}
8715 
8716 		if (dodeps) {
8717 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8718 
8719 			(void) ddi_pathname(dip, pathbuf);
8720 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8721 			    PM_DEP_NOWAIT, NULL, 0);
8722 			kmem_free(pathbuf, MAXPATHLEN);
8723 		}
8724 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8725 			int old;
8726 
8727 			/* If old power cached during deadlock, use it. */
8728 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8729 			    cp->pmc_phc_pwr : olevel);
8730 			mutex_enter(&pm_rsvp_lock);
8731 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8732 			    old, canblock);
8733 			pm_enqueue_notify_others(&devl, canblock);
8734 			mutex_exit(&pm_rsvp_lock);
8735 		}
8736 
8737 		/*
8738 		 * If we are coming from a scan, don't do it again,
8739 		 * else we can have infinite loops.
8740 		 */
8741 		if (!scan)
8742 			pm_rescan(dip);
8743 	} else {
8744 		/* if we incremented pm_comps_off_count, but failed */
8745 		if (comps_off_incr > 0) {
8746 			update_comps_off(-comps_off_incr, dip);
8747 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8748 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8749 			    comp, clevel, nlevel, pm_cfb_comps_off))
8750 		}
8751 		*errnop = EIO;
8752 	}
8753 
8754 post_notify:
8755 	/*
8756 	 * This thread may have been in deadlock with pm_power_has_changed.
8757 	 * Before releasing power lock, clear the flag which marks this
8758 	 * condition.
8759 	 */
8760 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8761 
8762 	/*
8763 	 * Update the old power level in the bus power structure with the
8764 	 * actual power level before the transition was made to the new level.
8765 	 * Some involved parents depend on this information to keep track of
8766 	 * their children's power transition.
8767 	 */
8768 	if (*iresp != DDI_FAILURE)
8769 		bpc->bpc_olevel = clevel;
8770 
8771 	if (PM_WANTS_NOTIFICATION(pdip)) {
8772 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8773 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8774 		PM_UNLOCK_POWER(dip, circ);
8775 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8776 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8777 		    PM_DEVICE(dip), ret))
8778 	} else {
8779 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8780 		PM_UNLOCK_POWER(dip, circ);
8781 		/*
8782 		 * Now that we know what power transition has occurred
8783 		 * (if any), release the power hold.  Leave the hold
8784 		 * in effect in the case of OFF->ON transition.
8785 		 */
8786 		if (!(clevel == 0 && nlevel > 0 &&
8787 		    (!PM_ISBC(dip) || comp == 0)))
8788 			pm_rele_power(pdip);
8789 		/*
8790 		 * If the power transition was an ON->OFF transition,
8791 		 * remove the power hold from the parent.
8792 		 */
8793 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8794 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8795 			pm_rele_power(pdip);
8796 	}
8797 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8798 		return (DDI_FAILURE);
8799 	else
8800 		return (DDI_SUCCESS);
8801 }
8802 
8803 /*
8804  * If an app (SunVTS or Xsun) has taken control, then block until it
8805  * gives it up or makes the requested power level change, unless
8806  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8807  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8808  */
8809 static int
8810 pm_busop_match_request(dev_info_t *dip, void *arg)
8811 {
8812 	PMD_FUNC(pmf, "bp_match_request")
8813 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8814 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8815 	int comp = bpc->bpc_comp;
8816 	int nlevel = bpc->bpc_nlevel;
8817 	pm_canblock_t canblock = pspm->pspm_canblock;
8818 	int direction = pspm->pspm_direction;
8819 	int clevel, circ;
8820 
8821 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8822 	PM_LOCK_POWER(dip, &circ);
8823 	clevel = PM_CURPOWER(dip, comp);
8824 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
8825 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
8826 	if (direction == PM_LEVEL_UPONLY) {
8827 		if (clevel >= nlevel) {
8828 			PM_UNLOCK_POWER(dip, circ);
8829 			PM_UNLOCK_DIP(dip);
8830 			return (DDI_SUCCESS);
8831 		}
8832 	} else if (clevel == nlevel) {
8833 		PM_UNLOCK_POWER(dip, circ);
8834 		PM_UNLOCK_DIP(dip);
8835 		return (DDI_SUCCESS);
8836 	}
8837 	if (canblock == PM_CANBLOCK_FAIL) {
8838 		PM_UNLOCK_POWER(dip, circ);
8839 		PM_UNLOCK_DIP(dip);
8840 		return (DDI_FAILURE);
8841 	}
8842 	if (canblock == PM_CANBLOCK_BLOCK) {
8843 		/*
8844 		 * To avoid a deadlock, we must not hold the
8845 		 * power lock when we pm_block.
8846 		 */
8847 		PM_UNLOCK_POWER(dip, circ);
8848 		PMD(PMD_SET, ("%s: blocking\n", pmf))
8849 		    /* pm_block releases dip lock */
8850 		    switch (pm_block(dip, comp, nlevel, clevel)) {
8851 		    case PMP_RELEASE:
8852 				return (EAGAIN);
8853 		    case PMP_SUCCEED:
8854 				return (DDI_SUCCESS);
8855 		    case PMP_FAIL:
8856 				return (DDI_FAILURE);
8857 		    }
8858 	} else {
8859 		ASSERT(0);
8860 	}
8861 	_NOTE(NOTREACHED);
8862 	return (DDI_FAILURE);	/* keep gcc happy */
8863 }
8864 
8865 static int
8866 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
8867 {
8868 	PMD_FUNC(pmf, "all_to_normal_nexus")
8869 	int		*normal;
8870 	int		i, ncomps;
8871 	size_t		size;
8872 	int		changefailed = 0;
8873 	int		ret, result = DDI_SUCCESS;
8874 	pm_bp_nexus_pwrup_t	bpn;
8875 	pm_sp_misc_t	pspm;
8876 
8877 	ASSERT(PM_GET_PM_INFO(dip));
8878 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8879 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
8880 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
8881 		return (DDI_FAILURE);
8882 	}
8883 	ncomps = PM_NUMCMPTS(dip);
8884 	for (i = 0; i < ncomps; i++) {
8885 		bpn.bpn_dip = dip;
8886 		bpn.bpn_comp = i;
8887 		bpn.bpn_level = normal[i];
8888 		pspm.pspm_canblock = canblock;
8889 		pspm.pspm_scan = 0;
8890 		bpn.bpn_private = &pspm;
8891 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
8892 		    (void *)&bpn, (void *)&result);
8893 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
8894 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
8895 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
8896 			    i, normal[i], result))
8897 			changefailed++;
8898 		}
8899 	}
8900 	kmem_free(normal, size);
8901 	if (changefailed) {
8902 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
8903 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
8904 		return (DDI_FAILURE);
8905 	}
8906 	return (DDI_SUCCESS);
8907 }
8908 
8909 int
8910 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
8911     dev_info_t *tdip)
8912 {
8913 	PMD_FUNC(pmf, "noinvol_update")
8914 	pm_bp_noinvol_t args;
8915 	int ret;
8916 	int result = DDI_SUCCESS;
8917 
8918 	args.bpni_path = path;
8919 	args.bpni_dip = tdip;
8920 	args.bpni_cmd = subcmd;
8921 	args.bpni_wasvolpmd = wasvolpmd;
8922 	args.bpni_volpmd = volpmd;
8923 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
8924 	    "volpmd %d wasvolpmd %d\n", pmf,
8925 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
8926 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
8927 	    &args, &result);
8928 	return (ret);
8929 }
8930 
8931 void
8932 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
8933 {
8934 	PMD_FUNC(pmf, "noinvol_update_node")
8935 
8936 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8937 	switch (req->bpni_cmd) {
8938 	case PM_BP_NOINVOL_ATTACH:
8939 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
8940 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8941 		    DEVI(dip)->devi_pm_noinvolpm,
8942 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8943 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8944 		PM_LOCK_DIP(dip);
8945 		DEVI(dip)->devi_pm_noinvolpm--;
8946 		if (req->bpni_wasvolpmd) {
8947 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
8948 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8949 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8950 			    DEVI(dip)->devi_pm_volpmd - 1))
8951 			if (DEVI(dip)->devi_pm_volpmd)
8952 				DEVI(dip)->devi_pm_volpmd--;
8953 		}
8954 		PM_UNLOCK_DIP(dip);
8955 		break;
8956 
8957 	case PM_BP_NOINVOL_DETACH:
8958 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
8959 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
8960 		    DEVI(dip)->devi_pm_noinvolpm,
8961 		    DEVI(dip)->devi_pm_noinvolpm + 1))
8962 		PM_LOCK_DIP(dip);
8963 		DEVI(dip)->devi_pm_noinvolpm++;
8964 		if (req->bpni_wasvolpmd) {
8965 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
8966 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8967 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8968 			    DEVI(dip)->devi_pm_volpmd + 1))
8969 			DEVI(dip)->devi_pm_volpmd++;
8970 		}
8971 		PM_UNLOCK_DIP(dip);
8972 		break;
8973 
8974 	case PM_BP_NOINVOL_REMDRV:
8975 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8976 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8977 		    DEVI(dip)->devi_pm_noinvolpm,
8978 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8979 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8980 		PM_LOCK_DIP(dip);
8981 		DEVI(dip)->devi_pm_noinvolpm--;
8982 		if (req->bpni_wasvolpmd) {
8983 			PMD(PMD_NOINVOL,
8984 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8985 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
8986 			    DEVI(dip)->devi_pm_volpmd,
8987 			    DEVI(dip)->devi_pm_volpmd - 1))
8988 			/*
8989 			 * A power up could come in between and
8990 			 * clear the volpmd, if that's the case,
8991 			 * volpmd would be clear.
8992 			 */
8993 			if (DEVI(dip)->devi_pm_volpmd)
8994 				DEVI(dip)->devi_pm_volpmd--;
8995 		}
8996 		PM_UNLOCK_DIP(dip);
8997 		break;
8998 
8999 	case PM_BP_NOINVOL_CFB:
9000 		PMD(PMD_NOINVOL,
9001 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9002 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9003 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9004 		PM_LOCK_DIP(dip);
9005 		DEVI(dip)->devi_pm_noinvolpm++;
9006 		PM_UNLOCK_DIP(dip);
9007 		break;
9008 
9009 	case PM_BP_NOINVOL_POWER:
9010 		PMD(PMD_NOINVOL,
9011 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9012 		    pmf, PM_DEVICE(dip),
9013 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9014 		    req->bpni_volpmd))
9015 		PM_LOCK_DIP(dip);
9016 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9017 		PM_UNLOCK_DIP(dip);
9018 		break;
9019 
9020 	default:
9021 		break;
9022 	}
9023 
9024 }
9025 
9026 #ifdef DEBUG
9027 static int
9028 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9029 {
9030 	PMD_FUNC(pmf, "desc_pwrchk")
9031 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9032 	pm_info_t *info = PM_GET_PM_INFO(dip);
9033 	int i, curpwr, ce_level;
9034 
9035 	if (!info)
9036 		return (DDI_WALK_CONTINUE);
9037 
9038 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9039 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9040 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9041 			continue;
9042 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9043 		    CE_WARN;
9044 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9045 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9046 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9047 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9048 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9049 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9050 	}
9051 	return (DDI_WALK_CONTINUE);
9052 }
9053 #endif
9054 
9055 /*
9056  * Record the fact that one thread is borrowing the lock on a device node.
9057  * Use is restricted to the case where the lending thread will block until
9058  * the borrowing thread (always curthread) completes.
9059  */
9060 void
9061 pm_borrow_lock(kthread_t *lender)
9062 {
9063 	lock_loan_t *prev = &lock_loan_head;
9064 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9065 
9066 	cur->pmlk_borrower = curthread;
9067 	cur->pmlk_lender = lender;
9068 	mutex_enter(&pm_loan_lock);
9069 	cur->pmlk_next = prev->pmlk_next;
9070 	prev->pmlk_next = cur;
9071 	mutex_exit(&pm_loan_lock);
9072 }
9073 
9074 /*
9075  * Return the borrowed lock.  A thread can borrow only one.
9076  */
9077 void
9078 pm_return_lock(void)
9079 {
9080 	lock_loan_t *cur;
9081 	lock_loan_t *prev = &lock_loan_head;
9082 
9083 	mutex_enter(&pm_loan_lock);
9084 	ASSERT(prev->pmlk_next != NULL);
9085 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9086 		if (cur->pmlk_borrower == curthread)
9087 			break;
9088 
9089 	ASSERT(cur != NULL);
9090 	prev->pmlk_next = cur->pmlk_next;
9091 	mutex_exit(&pm_loan_lock);
9092 	kmem_free(cur, sizeof (*cur));
9093 }
9094