xref: /titanic_44/usr/src/uts/common/os/sunpm.c (revision 28cdc3d776761766afeb198769d1b70ed7e0f2e1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sunpm.c builds sunpm.o	"power management framework"
30  *	kernel-resident power management code.  Implements power management
31  *	policy
32  *	Assumes: all backwards compat. device components wake up on &
33  *		 the pm_info pointer in dev_info is initially NULL
34  *
35  * PM - (device) Power Management
36  *
37  * Each device may have 0 or more components.  If a device has no components,
38  * then it can't be power managed.  Each component has 2 or more
39  * power states.
40  *
41  * "Backwards Compatible" (bc) devices:
42  * There are two different types of devices from the point of view of this
43  * code.  The original type, left over from the original PM implementation on
44  * the voyager platform are known in this code as "backwards compatible"
45  * devices (PM_ISBC(dip) returns true).
46  * They are recognized by the pm code by the lack of a pm-components property
47  * and a call made by the driver to pm_create_components(9F).
48  * For these devices, component 0 is special, and represents the power state
49  * of the device.  If component 0 is to be set to power level 0 (off), then
50  * the framework must first call into the driver's detach(9E) routine with
51  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
52  * After setting component 0 from 0 to a non-zero power level, a call must be
53  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
54  *
55  * Currently, the only way to get a bc device power managed is via a set of
56  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
57  *
58  * For non-bc devices, the driver describes the components by exporting a
59  * pm-components(9P) property that tells how many components there are,
60  * tells what each component's power state values are, and provides human
61  * readable strings (currently unused) for each component name and power state.
62  * Devices which export pm-components(9P) are automatically power managed
63  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
64  * after parsing power.conf(4)).
65  * For these devices, all components are considered independent of each other,
66  * and it is up to the driver to decide when a transition requires saving or
67  * restoring hardware state.
68  *
69  * Each device component also has a threshold time associated with each power
70  * transition (see power.conf(4)), and a busy/idle state maintained by the
71  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
72  * Components are created idle.
73  *
74  * The PM framework provides several functions:
75  * -implement PM policy as described in power.conf(4)
76  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
77  *  Policies consist of:
78  *    -set threshold values (defaults if none provided by pmconfig)
79  *    -set dependencies among devices
80  *    -enable/disable autopm
81  *    -turn down idle components based on thresholds (if autopm is enabled)
82  *     (aka scanning)
83  *    -maintain power states based on dependencies among devices
84  *    -upon request, or when the frame buffer powers off, attempt to turn off
85  *     all components that are idle or become idle over the next (10 sec)
86  *     period in an attempt to get down to an EnergyStar compliant state
87  *    -prevent powering off of a device which exported the
88  *     pm-no-involuntary-power-cycles property without active involvement of
89  *     the device's driver (so no removing power when the device driver is
90  *     not attached)
91  * -provide a mechanism for a device driver to request that a device's component
92  *  be brought back to the power level necessary for the use of the device
93  * -allow a process to directly control the power levels of device components
94  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
95  * -ensure that the console frame buffer is powered up before being referenced
96  *  via prom_printf() or other prom calls that might generate console output
97  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
98  * -provide "backwards compatible" behavior for devices without pm-components
99  *  property
100  *
101  * Scanning:
102  * Whenever autopm is enabled, the framework attempts to bring each component
103  * of each device to its lowest power based on the threshold of idleness
104  * associated with each transition and the busy/idle state of the component.
105  *
106  * The actual work of this is done by pm_scan_dev(), which cycles through each
107  * component of a device, checking its idleness against its current threshold,
108  * and calling pm_set_power() as appropriate to change the power level.
109  * This function also indicates when it would next be profitable to scan the
110  * device again, and a new scan is scheduled after that time.
111  *
112  * Dependencies:
113  * It is possible to establish a dependency between the power states of two
114  * otherwise unrelated devices.  This is currently done to ensure that the
115  * cdrom is always up whenever the console framebuffer is up, so that the user
116  * can insert a cdrom and see a popup as a result.
117  *
118  * The dependency terminology used in power.conf(4) is not easy to understand,
119  * so we've adopted a different terminology in the implementation.  We write
120  * of a "keeps up" and a "kept up" device.  A relationship can be established
121  * where one device keeps up another.  That means that if the keepsup device
122  * has any component that is at a non-zero power level, all components of the
123  * "kept up" device must be brought to full power.  This relationship is
124  * asynchronous.  When the keeping device is powered up, a request is queued
125  * to a worker thread to bring up the kept device.  The caller does not wait.
126  * Scan will not turn down a kept up device.
127  *
128  * Direct PM:
129  * A device may be directly power managed by a process.  If a device is
130  * directly pm'd, then it will not be scanned, and dependencies will not be
131  * enforced.  * If a directly pm'd device's driver requests a power change (via
132  * pm_raise_power(9F)), then the request is blocked and notification is sent
133  * to the controlling process, which must issue the requested power change for
134  * the driver to proceed.
135  *
136  */
137 
138 #include <sys/types.h>
139 #include <sys/errno.h>
140 #include <sys/callb.h>		/* callback registration during CPR */
141 #include <sys/conf.h>		/* driver flags and functions */
142 #include <sys/open.h>		/* OTYP_CHR definition */
143 #include <sys/stat.h>		/* S_IFCHR definition */
144 #include <sys/pathname.h>	/* name -> dev_info xlation */
145 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
146 #include <sys/kmem.h>		/* memory alloc stuff */
147 #include <sys/debug.h>
148 #include <sys/archsystm.h>
149 #include <sys/pm.h>
150 #include <sys/ddi.h>
151 #include <sys/sunddi.h>
152 #include <sys/sunndi.h>
153 #include <sys/sunpm.h>
154 #include <sys/epm.h>
155 #include <sys/vfs.h>
156 #include <sys/mode.h>
157 #include <sys/mkdev.h>
158 #include <sys/promif.h>
159 #include <sys/consdev.h>
160 #include <sys/esunddi.h>
161 #include <sys/modctl.h>
162 #include <sys/fs/ufs_fs.h>
163 #include <sys/note.h>
164 #include <sys/taskq.h>
165 #include <sys/bootconf.h>
166 #include <sys/reboot.h>
167 #include <sys/spl.h>
168 #include <sys/disp.h>
169 #include <sys/sobject.h>
170 #include <sys/sunmdi.h>
171 
172 
173 /*
174  * PM LOCKING
175  *	The list of locks:
176  * Global pm mutex locks.
177  *
178  * pm_scan_lock:
179  *		It protects the timeout id of the scan thread, and the value
180  *		of autopm_enabled.  This lock is not held concurrently with
181  *		any other PM locks.
182  *
183  * pm_clone_lock:	Protects the clone list and count of poll events
184  *		pending for the pm driver.
185  *		Lock ordering:
186  *			pm_clone_lock -> pm_pscc_interest_rwlock,
187  *			pm_clone_lock -> pm_pscc_direct_rwlock.
188  *
189  * pm_rsvp_lock:
190  *		Used to synchronize the data structures used for processes
191  *		to rendezvous with state change information when doing
192  *		direct PM.
193  *		Lock ordering:
194  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
195  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
196  *			pm_rsvp_lock -> pm_clone_lock.
197  *
198  * ppm_lock:	protects the list of registered ppm drivers
199  *		Lock ordering:
200  *			ppm_lock -> ppm driver unit_lock
201  *
202  * pm_compcnt_lock:
203  *		Protects count of components that are not at their lowest
204  *		power level.
205  *		Lock ordering:
206  *			pm_compcnt_lock -> ppm_lock.
207  *
208  * pm_dep_thread_lock:
209  *		Protects work list for pm_dep_thread.  Not taken concurrently
210  *		with any other pm lock.
211  *
212  * pm_remdrv_lock:
213  *		Serializes the operation of removing noinvol data structure
214  *		entries for a branch of the tree when a driver has been
215  *		removed from the system (modctl_rem_major).
216  *		Lock ordering:
217  *			pm_remdrv_lock -> pm_noinvol_rwlock.
218  *
219  * pm_cfb_lock: (High level spin lock)
220  *		Protects the count of how many components of the console
221  *		frame buffer are off (so we know if we have to bring up the
222  *		console as a result of a prom_printf, etc.
223  *		No other locks are taken while holding this lock.
224  *
225  * pm_loan_lock:
226  *		Protects the lock_loan list.  List is used to record that one
227  *		thread has acquired a power lock but has launched another thread
228  *		to complete its processing.  An entry in the list indicates that
229  *		the worker thread can borrow the lock held by the other thread,
230  *		which must block on the completion of the worker.  Use is
231  *		specific to module loading.
232  *		No other locks are taken while holding this lock.
233  *
234  * Global PM rwlocks
235  *
236  * pm_thresh_rwlock:
237  *		Protects the list of thresholds recorded for future use (when
238  *		devices attach).
239  *		Lock ordering:
240  *			pm_thresh_rwlock -> devi_pm_lock
241  *
242  * pm_noinvol_rwlock:
243  *		Protects list of detached nodes that had noinvol registered.
244  *		No other PM locks are taken while holding pm_noinvol_rwlock.
245  *
246  * pm_pscc_direct_rwlock:
247  *		Protects the list that maps devices being directly power
248  *		managed to the processes that manage them.
249  *		Lock ordering:
250  *			pm_pscc_direct_rwlock -> psce_lock
251  *
252  * pm_pscc_interest_rwlock;
253  *		Protects the list that maps state change events to processes
254  *		that want to know about them.
255  *		Lock ordering:
256  *			pm_pscc_interest_rwlock -> psce_lock
257  *
258  * per-dip locks:
259  *
260  * Each node has these per-dip locks, which are only used if the device is
261  * a candidate for power management (e.g. has pm components)
262  *
263  * devi_pm_lock:
264  *		Protects all power management state of the node except for
265  *		power level, which is protected by ndi_devi_enter().
266  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
267  *		Lock ordering:
268  *			devi_pm_lock -> pm_rsvp_lock,
269  *			devi_pm_lock -> pm_dep_thread_lock,
270  *			devi_pm_lock -> pm_noinvol_rwlock,
271  *			devi_pm_lock -> power lock
272  *
273  * power lock (ndi_devi_enter()):
274  *		Since changing power level is possibly a slow operation (30
275  *		seconds to spin up a disk drive), this is locked separately.
276  *		Since a call into the driver to change the power level of one
277  *		component may result in a call back into the framework to change
278  *		the power level of another, this lock allows re-entrancy by
279  *		the same thread (ndi_devi_enter is used for this because
280  *		the USB framework uses ndi_devi_enter in its power entry point,
281  *		and use of any other lock would produce a deadlock.
282  *
283  * devi_pm_busy_lock:
284  *		This lock protects the integrity of the busy count.  It is
285  *		only taken by pm_busy_component() and pm_idle_component and
286  *		some code that adjust the busy time after the timer gets set
287  *		up or after a CPR operation.  It is per-dip to keep from
288  *		single-threading all the disk drivers on a system.
289  *		It could be per component instead, but most devices have
290  *		only one component.
291  *		No other PM locks are taken while holding this lock.
292  *
293  */
294 
295 static int stdout_is_framebuffer;
296 static kmutex_t	e_pm_power_lock;
297 static kmutex_t pm_loan_lock;
298 kmutex_t	pm_scan_lock;
299 callb_id_t	pm_cpr_cb_id;
300 callb_id_t	pm_panic_cb_id;
301 callb_id_t	pm_halt_cb_id;
302 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
303 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
304 
305 clock_t pm_min_scan = PM_MIN_SCAN;
306 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
307 
308 static int pm_busop_set_power(dev_info_t *,
309     void *, pm_bus_power_op_t, void *, void *);
310 static int pm_busop_match_request(dev_info_t *, void *);
311 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
312 
313 /*
314  * Dependency Processing is done thru a seperate thread.
315  */
316 kmutex_t	pm_dep_thread_lock;
317 kcondvar_t	pm_dep_thread_cv;
318 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
319 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
320 
321 /*
322  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
323  * power managing things in single user mode that have been suppressed via
324  * power.conf entries.  Protected by pm_scan_lock.
325  */
326 int		autopm_enabled;
327 
328 /*
329  * This flag is true while processes are stopped for a checkpoint/resume.
330  * Controlling processes of direct pm'd devices are not available to
331  * participate in power level changes, so we bypass them when this is set.
332  */
333 static int	pm_processes_stopped;
334 
335 #ifdef	DEBUG
336 
337 /*
338  * see common/sys/epm.h for PMD_* values
339  */
340 uint_t		pm_debug = 0;
341 
342 /*
343  * If pm_divertdebug is set, then no prom_printf calls will be made by
344  * PMD(), which will prevent debug output from bringing up the console
345  * frame buffer.  Clearing this variable before setting pm_debug will result
346  * in PMD output going to the console.
347  *
348  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
349  * deadlocks and decremented at the end of pm_set_power()
350  */
351 uint_t		pm_divertdebug = 1;
352 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
353 
354 void prdeps(char *);
355 #endif
356 
357 /* Globals */
358 
359 /*
360  * List of recorded thresholds and dependencies
361  */
362 pm_thresh_rec_t *pm_thresh_head;
363 krwlock_t pm_thresh_rwlock;
364 
365 pm_pdr_t *pm_dep_head;
366 static int pm_unresolved_deps = 0;
367 static int pm_prop_deps = 0;
368 
369 /*
370  * List of devices that exported no-involuntary-power-cycles property
371  */
372 pm_noinvol_t *pm_noinvol_head;
373 
374 /*
375  * Locks used in noinvol processing
376  */
377 krwlock_t pm_noinvol_rwlock;
378 kmutex_t pm_remdrv_lock;
379 
380 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
381 int pm_system_idle_threshold;
382 /*
383  * By default nexus has 0 threshold, and depends on its children to keep it up
384  */
385 int pm_default_nexus_threshold = 0;
386 
387 /*
388  * Data structures shared with common/io/pm.c
389  */
390 kmutex_t	pm_clone_lock;
391 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
392 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
393 unsigned char	pm_interest[PM_MAX_CLONE];
394 struct pollhead	pm_pollhead;
395 
396 extern int	hz;
397 extern char	*platform_module_list[];
398 
399 /*
400  * Wrappers for use in ddi_walk_devs
401  */
402 
403 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
404 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
405 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
406 static int		pm_discard_dep_walk(dev_info_t *, void *);
407 #ifdef DEBUG
408 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
409 #endif
410 
411 /*
412  * Routines for managing noinvol devices
413  */
414 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
415 void			pm_noinvol_update_node(dev_info_t *,
416 			    pm_bp_noinvol_t *req);
417 
418 kmutex_t pm_rsvp_lock;
419 kmutex_t pm_compcnt_lock;
420 krwlock_t pm_pscc_direct_rwlock;
421 krwlock_t pm_pscc_interest_rwlock;
422 
423 #define	PSC_INTEREST	0	/* belongs to interest psc list */
424 #define	PSC_DIRECT	1	/* belongs to direct psc list */
425 
426 pscc_t *pm_pscc_interest;
427 pscc_t *pm_pscc_direct;
428 
429 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
430 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
431 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
432 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
433 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
434 
435 #define	PM_INCR_NOTLOWEST(dip) {					\
436 	mutex_enter(&pm_compcnt_lock);					\
437 	if (!PM_IS_NEXUS(dip) ||					\
438 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
439 		if (pm_comps_notlowest == 0)				\
440 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
441 		pm_comps_notlowest++;					\
442 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
443 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
444 	}								\
445 	mutex_exit(&pm_compcnt_lock);					\
446 }
447 #define	PM_DECR_NOTLOWEST(dip) {					\
448 	mutex_enter(&pm_compcnt_lock);					\
449 	if (!PM_IS_NEXUS(dip) ||					\
450 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
451 		ASSERT(pm_comps_notlowest);				\
452 		pm_comps_notlowest--;					\
453 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
454 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
455 		if (pm_comps_notlowest == 0)				\
456 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
457 	}								\
458 	mutex_exit(&pm_compcnt_lock);					\
459 }
460 
461 /*
462  * console frame-buffer power-management is not enabled when
463  * debugging services are present.  to override, set pm_cfb_override
464  * to non-zero.
465  */
466 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
467 kmutex_t pm_cfb_lock;
468 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
469 #ifdef DEBUG
470 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
471 #else
472 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
473 #endif
474 
475 static dev_info_t *cfb_dip = 0;
476 static dev_info_t *cfb_dip_detaching = 0;
477 uint_t cfb_inuse = 0;
478 static ddi_softintr_t pm_soft_id;
479 static clock_t pm_soft_pending;
480 int	pm_scans_disabled = 0;
481 
482 /*
483  * A structure to record the fact that one thread has borrowed a lock held
484  * by another thread.  The context requires that the lender block on the
485  * completion of the borrower.
486  */
487 typedef struct lock_loan {
488 	struct lock_loan	*pmlk_next;
489 	kthread_t		*pmlk_borrower;
490 	kthread_t		*pmlk_lender;
491 	dev_info_t		*pmlk_dip;
492 } lock_loan_t;
493 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
494 
495 #ifdef	DEBUG
496 #define	PMD_FUNC(func, name)	char *(func) = (name);
497 #else
498 #define	PMD_FUNC(func, name)
499 #endif
500 
501 
502 /*
503  * Must be called before first device (including pseudo) attach
504  */
505 void
506 pm_init_locks(void)
507 {
508 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
509 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
510 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
511 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
512 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
513 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
514 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
515 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
516 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
517 }
518 
519 static boolean_t
520 pm_cpr_callb(void *arg, int code)
521 {
522 	_NOTE(ARGUNUSED(arg))
523 	static int auto_save;
524 	static int pm_reset_timestamps(dev_info_t *, void *);
525 
526 	switch (code) {
527 	case CB_CODE_CPR_CHKPT:
528 		/*
529 		 * Cancel scan or wait for scan in progress to finish
530 		 * Other threads may be trying to restart the scan, so we
531 		 * have to keep at it unil it sticks
532 		 */
533 		mutex_enter(&pm_scan_lock);
534 		ASSERT(!pm_scans_disabled);
535 		pm_scans_disabled = 1;
536 		auto_save = autopm_enabled;
537 		autopm_enabled = 0;
538 		mutex_exit(&pm_scan_lock);
539 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
540 		break;
541 
542 	case CB_CODE_CPR_RESUME:
543 		ASSERT(!autopm_enabled);
544 		ASSERT(pm_scans_disabled);
545 		pm_scans_disabled = 0;
546 		/*
547 		 * Call pm_reset_timestamps to reset timestamps of each
548 		 * device to the time when the system is resumed so that their
549 		 * idleness can be re-calculated. That's to avoid devices from
550 		 * being powered down right after resume if the system was in
551 		 * suspended mode long enough.
552 		 */
553 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
554 
555 		autopm_enabled = auto_save;
556 		/*
557 		 * If there is any auto-pm device, get the scanning
558 		 * going. Otherwise don't bother.
559 		 */
560 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
561 		break;
562 	}
563 	return (B_TRUE);
564 }
565 
566 /*
567  * This callback routine is called when there is a system panic.  This function
568  * exists for prototype matching.
569  */
570 static boolean_t
571 pm_panic_callb(void *arg, int code)
572 {
573 	_NOTE(ARGUNUSED(arg, code))
574 	void pm_cfb_check_and_powerup(void);
575 	PMD(PMD_CFB, ("pm_panic_callb\n"))
576 	pm_cfb_check_and_powerup();
577 	return (B_TRUE);
578 }
579 
580 static boolean_t
581 pm_halt_callb(void *arg, int code)
582 {
583 	_NOTE(ARGUNUSED(arg, code))
584 	return (B_TRUE);	/* XXX for now */
585 }
586 
587 /*
588  * This needs to be called after the root and platform drivers are loaded
589  * and be single-threaded with respect to driver attach/detach
590  */
591 void
592 pm_init(void)
593 {
594 	PMD_FUNC(pmf, "pm_init")
595 	char **mod;
596 	extern pri_t minclsyspri;
597 	static void pm_dep_thread(void);
598 
599 	pm_comps_notlowest = 0;
600 	pm_system_idle_threshold = pm_default_idle_threshold;
601 
602 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
603 	    CB_CL_CPR_PM, "pm_cpr");
604 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
605 		    CB_CL_PANIC, "pm_panic");
606 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
607 		    CB_CL_HALT, "pm_halt");
608 
609 	/*
610 	 * Create a thread to do dependency processing.
611 	 */
612 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
613 	    TS_RUN, minclsyspri);
614 
615 	/*
616 	 * loadrootmodules already loaded these ppm drivers, now get them
617 	 * attached so they can claim the root drivers as they attach
618 	 */
619 	for (mod = platform_module_list; *mod; mod++) {
620 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
621 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
622 			    *mod);
623 		} else {
624 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
625 			    ddi_major_to_name(ddi_name_to_major(*mod))))
626 		}
627 	}
628 }
629 
630 /*
631  * pm_scan_init - create pm scan data structure.  Called (if autopm enabled)
632  * when device becomes power managed or after a failed detach and when autopm
633  * is started via PM_START_PM ioctl, and after a CPR resume to get all the
634  * devices scanning again.
635  */
636 void
637 pm_scan_init(dev_info_t *dip)
638 {
639 	PMD_FUNC(pmf, "scan_init")
640 	pm_scan_t	*scanp;
641 
642 	ASSERT(!PM_ISBC(dip));
643 
644 	PM_LOCK_DIP(dip);
645 	scanp = PM_GET_PM_SCAN(dip);
646 	if (!scanp) {
647 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
648 		    pmf, PM_DEVICE(dip)))
649 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
650 		DEVI(dip)->devi_pm_scan = scanp;
651 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
652 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
653 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
654 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
655 	}
656 	PM_UNLOCK_DIP(dip);
657 }
658 
659 /*
660  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
661  */
662 void
663 pm_scan_fini(dev_info_t *dip)
664 {
665 	PMD_FUNC(pmf, "scan_fini")
666 	pm_scan_t	*scanp;
667 
668 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
669 	ASSERT(!PM_ISBC(dip));
670 	PM_LOCK_DIP(dip);
671 	scanp = PM_GET_PM_SCAN(dip);
672 	if (!scanp) {
673 		PM_UNLOCK_DIP(dip);
674 		return;
675 	}
676 
677 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
678 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
679 
680 	kmem_free(scanp, sizeof (pm_scan_t));
681 	DEVI(dip)->devi_pm_scan = NULL;
682 	PM_UNLOCK_DIP(dip);
683 }
684 
685 /*
686  * Given a pointer to a component struct, return the current power level
687  * (struct contains index unless it is a continuous level).
688  * Located here in hopes of getting both this and dev_is_needed into the
689  * cache together
690  */
691 static int
692 cur_power(pm_component_t *cp)
693 {
694 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
695 		return (cp->pmc_cur_pwr);
696 
697 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
698 }
699 
700 static char *
701 pm_decode_direction(int direction)
702 {
703 	switch (direction) {
704 	case PM_LEVEL_UPONLY:
705 		return ("up");
706 
707 	case PM_LEVEL_EXACT:
708 		return ("exact");
709 
710 	case PM_LEVEL_DOWNONLY:
711 		return ("down");
712 
713 	default:
714 		return ("INVALID DIRECTION");
715 	}
716 }
717 
718 char *
719 pm_decode_op(pm_bus_power_op_t op)
720 {
721 	switch (op) {
722 	case BUS_POWER_CHILD_PWRCHG:
723 		return ("CHILD_PWRCHG");
724 	case BUS_POWER_NEXUS_PWRUP:
725 		return ("NEXUS_PWRUP");
726 	case BUS_POWER_PRE_NOTIFICATION:
727 		return ("PRE_NOTIFICATION");
728 	case BUS_POWER_POST_NOTIFICATION:
729 		return ("POST_NOTIFICATION");
730 	case BUS_POWER_HAS_CHANGED:
731 		return ("HAS_CHANGED");
732 	case BUS_POWER_NOINVOL:
733 		return ("NOINVOL");
734 	default:
735 		return ("UNKNOWN OP");
736 	}
737 }
738 
739 /*
740  * Returns true if level is a possible (valid) power level for component
741  */
742 int
743 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
744 {
745 	PMD_FUNC(pmf, "e_pm_valid_power")
746 	pm_component_t *cp = PM_CP(dip, cmpt);
747 	int i;
748 	int *ip = cp->pmc_comp.pmc_lvals;
749 	int limit = cp->pmc_comp.pmc_numlevels;
750 
751 	if (level < 0)
752 		return (0);
753 	for (i = 0; i < limit; i++) {
754 		if (level == *ip++)
755 			return (1);
756 	}
757 #ifdef DEBUG
758 	if (pm_debug & PMD_FAIL) {
759 		ip = cp->pmc_comp.pmc_lvals;
760 
761 		for (i = 0; i < limit; i++)
762 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
763 			    pmf, i, *ip++))
764 	}
765 #endif
766 	return (0);
767 }
768 
769 /*
770  * Returns true if device is pm'd (after calling pm_start if need be)
771  */
772 int
773 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
774 {
775 	pm_info_t *info;
776 	static int pm_start(dev_info_t *dip);
777 
778 	/*
779 	 * Check if the device is power managed if not.
780 	 * To make the common case (device is power managed already)
781 	 * fast, we check without the lock.  If device is not already
782 	 * power managed, then we take the lock and the long route through
783 	 * go get it managed.  Devices never go unmanaged until they
784 	 * detach.
785 	 */
786 	info = PM_GET_PM_INFO(dip);
787 	if (!info) {
788 		if (!DEVI_IS_ATTACHING(dip)) {
789 			return (0);
790 		}
791 		if (pm_start(dip) != DDI_SUCCESS) {
792 			return (0);
793 		}
794 		info = PM_GET_PM_INFO(dip);
795 	}
796 	ASSERT(info);
797 	if (infop != NULL)
798 		*infop = info;
799 	return (1);
800 }
801 
802 int
803 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
804 {
805 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
806 		if (cpp != NULL)
807 			*cpp = PM_CP(dip, cmpt);
808 		return (1);
809 	} else {
810 		return (0);
811 	}
812 }
813 
814 /*
815  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
816  */
817 static int
818 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
819 {
820 	PMD_FUNC(pmf, "din")
821 	pm_component_t *cp;
822 	char *pathbuf;
823 	int result;
824 
825 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
826 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
827 	    !e_pm_valid_power(dip, cmpt, level))
828 		return (DDI_FAILURE);
829 
830 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
831 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
832 	    level, cur_power(cp)))
833 
834 	if (pm_set_power(dip, cmpt, level,  direction,
835 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
836 		if (direction == PM_LEVEL_UPONLY) {
837 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
838 			(void) ddi_pathname(dip, pathbuf);
839 			cmn_err(CE_WARN, "Device %s failed to power up.",
840 			    pathbuf);
841 			kmem_free(pathbuf, MAXPATHLEN);
842 		}
843 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
844 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
845 		    pm_decode_direction(direction), level, result))
846 		return (DDI_FAILURE);
847 	}
848 
849 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
850 	    PM_DEVICE(dip)))
851 	pm_rescan(dip);
852 	return (DDI_SUCCESS);
853 }
854 
855 /*
856  * We can get multiple pm_rescan() threads, if one of them discovers
857  * that no scan is running at the moment, it kicks it into action.
858  * Otherwise, it tells the current scanning thread to scan again when
859  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
860  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
861  * thread at a time runs the pm_scan_dev() code.
862  */
863 void
864 pm_rescan(void *arg)
865 {
866 	PMD_FUNC(pmf, "rescan")
867 	dev_info_t	*dip = (dev_info_t *)arg;
868 	pm_info_t	*info;
869 	pm_scan_t	*scanp;
870 	timeout_id_t	scanid;
871 
872 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
873 	PM_LOCK_DIP(dip);
874 	info = PM_GET_PM_INFO(dip);
875 	scanp = PM_GET_PM_SCAN(dip);
876 	if (pm_scans_disabled || !autopm_enabled || !info || !scanp ||
877 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
878 		PM_UNLOCK_DIP(dip);
879 		return;
880 	}
881 	if (scanp->ps_scan_flags & PM_SCANNING) {
882 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
883 		PM_UNLOCK_DIP(dip);
884 		return;
885 	} else if (scanp->ps_scan_id) {
886 		scanid = scanp->ps_scan_id;
887 		scanp->ps_scan_id = 0;
888 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
889 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
890 		PM_UNLOCK_DIP(dip);
891 		(void) untimeout(scanid);
892 		PM_LOCK_DIP(dip);
893 	}
894 
895 	/*
896 	 * Dispatching pm_scan during attach time is risky due to the fact that
897 	 * attach might soon fail and dip dissolved, and panic may happen while
898 	 * attempting to stop scan. So schedule a pm_rescan instead.
899 	 * (Note that if either of the first two terms are true, taskq_dispatch
900 	 * will not be invoked).
901 	 *
902 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
903 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
904 	 * to regulate the dispatching.
905 	 *
906 	 * Scan is stopped before the device is detached (in pm_detaching())
907 	 * but it may get re-started during the post_detach processing if the
908 	 * driver fails to detach.
909 	 */
910 	if (DEVI_IS_ATTACHING(dip) ||
911 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
912 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
913 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
914 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
915 		if (scanp->ps_scan_id) {
916 			scanid = scanp->ps_scan_id;
917 			scanp->ps_scan_id = 0;
918 			PM_UNLOCK_DIP(dip);
919 			(void) untimeout(scanid);
920 			PM_LOCK_DIP(dip);
921 			if (scanp->ps_scan_id) {
922 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
923 				    "thread scheduled pm_rescan, scanid %lx\n",
924 				    pmf, PM_DEVICE(dip),
925 				    (ulong_t)scanp->ps_scan_id))
926 				PM_UNLOCK_DIP(dip);
927 				return;
928 			}
929 		}
930 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
931 		    (scanp->ps_idle_down ? pm_id_ticks :
932 		    (pm_min_scan * hz)));
933 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
934 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
935 		    (ulong_t)scanp->ps_scan_id))
936 	} else {
937 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
938 		    pmf, PM_DEVICE(dip)))
939 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
940 	}
941 	PM_UNLOCK_DIP(dip);
942 }
943 
944 void
945 pm_scan(void *arg)
946 {
947 	PMD_FUNC(pmf, "scan")
948 	dev_info_t	*dip = (dev_info_t *)arg;
949 	pm_scan_t	*scanp;
950 	time_t		nextscan;
951 
952 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
953 
954 	PM_LOCK_DIP(dip);
955 	scanp = PM_GET_PM_SCAN(dip);
956 	ASSERT(scanp && PM_GET_PM_INFO(dip));
957 
958 	if (pm_scans_disabled || !autopm_enabled ||
959 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
960 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
961 		PM_UNLOCK_DIP(dip);
962 		return;
963 	}
964 
965 	if (scanp->ps_idle_down) {
966 		/*
967 		 * make sure we remember idledown was in affect until
968 		 * we've completed the scan
969 		 */
970 		PMID_SET_SCANS(scanp->ps_idle_down)
971 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
972 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
973 	}
974 
975 	/* possible having two threads running pm_scan() */
976 	if (scanp->ps_scan_flags & PM_SCANNING) {
977 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
978 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
979 		    pmf, PM_DEVICE(dip)))
980 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
981 		PM_UNLOCK_DIP(dip);
982 		return;
983 	}
984 
985 	scanp->ps_scan_flags |= PM_SCANNING;
986 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
987 	do {
988 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
989 		PM_UNLOCK_DIP(dip);
990 		nextscan = pm_scan_dev(dip);
991 		PM_LOCK_DIP(dip);
992 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
993 
994 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
995 	scanp->ps_scan_flags &= ~PM_SCANNING;
996 
997 	if (scanp->ps_idle_down) {
998 		scanp->ps_idle_down &= ~PMID_SCANS;
999 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1000 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1001 	}
1002 
1003 	/* schedule for next idle check */
1004 	if (nextscan != LONG_MAX) {
1005 		if (nextscan > (LONG_MAX / hz))
1006 			nextscan = (LONG_MAX - 1) / hz;
1007 		if (scanp->ps_scan_id) {
1008 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1009 			    "another rescan scheduled scanid(%lx)\n", pmf,
1010 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1011 			PM_UNLOCK_DIP(dip);
1012 			return;
1013 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1014 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1015 			    (clock_t)(nextscan * hz));
1016 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1017 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1018 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1019 		}
1020 	}
1021 	PM_UNLOCK_DIP(dip);
1022 }
1023 
1024 void
1025 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1026 {
1027 	int components = PM_NUMCMPTS(dip);
1028 	int i;
1029 
1030 	ASSERT(components > 0);
1031 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1032 	for (i = 0; i < components; i++) {
1033 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1034 	}
1035 	PM_UNLOCK_BUSY(dip);
1036 }
1037 
1038 /*
1039  * Returns true if device needs to be kept up because it exported the
1040  * "no-involuntary-power-cycles" property or we're pretending it did (console
1041  * fb case) or it is an ancestor of such a device and has used up the "one
1042  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1043  * upon detach
1044  */
1045 int
1046 pm_noinvol(dev_info_t *dip)
1047 {
1048 	PMD_FUNC(pmf, "noinvol")
1049 
1050 	/*
1051 	 * This doesn't change over the life of a driver, so no locking needed
1052 	 */
1053 	if (PM_IS_CFB(dip)) {
1054 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1055 		    pmf, PM_DEVICE(dip)))
1056 		return (1);
1057 	}
1058 	/*
1059 	 * Not an issue if no such kids
1060 	 */
1061 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1062 #ifdef DEBUG
1063 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1064 			dev_info_t *pdip = dip;
1065 			do {
1066 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1067 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1068 				    DEVI(pdip)->devi_pm_noinvolpm,
1069 				    DEVI(pdip)->devi_pm_volpmd))
1070 				pdip = ddi_get_parent(pdip);
1071 			} while (pdip);
1072 		}
1073 #endif
1074 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1075 		return (0);
1076 	}
1077 
1078 	/*
1079 	 * Since we now maintain the counts correct at every node, we no longer
1080 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1081 	 * without the children getting their counts adjusted.
1082 	 */
1083 
1084 #ifdef	DEBUG
1085 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1086 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1087 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1088 		    PM_DEVICE(dip)))
1089 #endif
1090 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1091 }
1092 
1093 /*
1094  * This function performs the actual scanning of the device.
1095  * It attempts to power off the indicated device's components if they have
1096  * been idle and other restrictions are met.
1097  * pm_scan_dev calculates and returns when the next scan should happen for
1098  * this device.
1099  */
1100 time_t
1101 pm_scan_dev(dev_info_t *dip)
1102 {
1103 	PMD_FUNC(pmf, "scan_dev")
1104 	pm_scan_t	*scanp;
1105 	time_t		*timestamp, idletime, now, thresh;
1106 	time_t		timeleft = 0;
1107 	int		i, nxtpwr, curpwr, pwrndx, unused;
1108 	size_t		size;
1109 	pm_component_t	 *cp;
1110 	dev_info_t	*pdip = ddi_get_parent(dip);
1111 	int		circ;
1112 	static int	cur_threshold(dev_info_t *, int);
1113 	static int	pm_next_lower_power(pm_component_t *, int);
1114 
1115 	/*
1116 	 * skip attaching device
1117 	 */
1118 	if (DEVI_IS_ATTACHING(dip)) {
1119 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1120 		    pmf, PM_DEVICE(dip), pm_min_scan))
1121 		return (pm_min_scan);
1122 	}
1123 
1124 	PM_LOCK_DIP(dip);
1125 	scanp = PM_GET_PM_SCAN(dip);
1126 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1127 
1128 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1129 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1130 	    PM_KUC(dip)))
1131 
1132 	/* no scan under the following conditions */
1133 	if (pm_scans_disabled || !autopm_enabled ||
1134 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1135 	    (PM_KUC(dip) != 0) ||
1136 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1137 		PM_UNLOCK_DIP(dip);
1138 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1139 		    "scan_disabled(%d), apm_enabled(%d), kuc(%d), "
1140 		    "%s directpm, %s pm_noinvol\n", pmf, PM_DEVICE(dip),
1141 		    pm_scans_disabled, autopm_enabled, PM_KUC(dip),
1142 		    PM_ISDIRECT(dip) ? "is" : "is not",
1143 		    pm_noinvol(dip) ? "is" : "is not"))
1144 		return (LONG_MAX);
1145 	}
1146 	PM_UNLOCK_DIP(dip);
1147 
1148 	if (!ndi_devi_tryenter(pdip, &circ)) {
1149 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1150 		    pmf, PM_DEVICE(pdip)))
1151 		return ((time_t)1);
1152 	}
1153 	now = gethrestime_sec();
1154 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1155 	timestamp = kmem_alloc(size, KM_SLEEP);
1156 	pm_get_timestamps(dip, timestamp);
1157 
1158 	/*
1159 	 * Since we removed support for backwards compatible devices,
1160 	 * (see big comment at top of file)
1161 	 * it is no longer required to deal with component 0 last.
1162 	 */
1163 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1164 		/*
1165 		 * If already off (an optimization, perhaps)
1166 		 */
1167 		cp = PM_CP(dip, i);
1168 		pwrndx = cp->pmc_cur_pwr;
1169 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1170 		    PM_LEVEL_UNKNOWN :
1171 		    cp->pmc_comp.pmc_lvals[pwrndx];
1172 
1173 		if (pwrndx == 0) {
1174 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1175 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1176 			/* skip device if off or at its lowest */
1177 			continue;
1178 		}
1179 
1180 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1181 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1182 			/* were busy or newly became busy by another thread */
1183 			if (timeleft == 0)
1184 				timeleft = max(thresh, pm_min_scan);
1185 			else
1186 				timeleft = min(
1187 				    timeleft, max(thresh, pm_min_scan));
1188 			continue;
1189 		}
1190 
1191 		idletime = now - timestamp[i];		/* idle time */
1192 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1193 		    pmf, PM_DEVICE(dip), i, idletime))
1194 		if (idletime >= thresh || PM_IS_PID(dip)) {
1195 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1196 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1197 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1198 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1199 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1200 			    PM_CURPOWER(dip, i) != nxtpwr) {
1201 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1202 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1203 				    i, curpwr, nxtpwr))
1204 				timeleft = pm_min_scan;
1205 				continue;
1206 			} else {
1207 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1208 				    "%d->%d, GOOD curpwr %d\n", pmf,
1209 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1210 				    cur_power(cp)))
1211 
1212 				if (nxtpwr == 0)	/* component went off */
1213 					continue;
1214 
1215 				/*
1216 				 * scan to next lower level
1217 				 */
1218 				if (timeleft == 0)
1219 					timeleft = max(
1220 					    1, cur_threshold(dip, i));
1221 				else
1222 					timeleft = min(timeleft,
1223 					    max(1, cur_threshold(dip, i)));
1224 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1225 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1226 				    i, timeleft))
1227 			}
1228 		} else {	/* comp not idle long enough */
1229 			if (timeleft == 0)
1230 				timeleft = thresh - idletime;
1231 			else
1232 				timeleft = min(timeleft, (thresh - idletime));
1233 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1234 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1235 		}
1236 	}
1237 	ndi_devi_exit(pdip, circ);
1238 	kmem_free(timestamp, size);
1239 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1240 	    PM_DEVICE(dip), timeleft))
1241 
1242 	/*
1243 	 * if components are already at lowest level, timeleft is left 0
1244 	 */
1245 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1246 }
1247 
1248 /*
1249  * pm_scan_stop - cancel scheduled pm_rescan,
1250  *                wait for termination of dispatched pm_scan thread
1251  *                     and active pm_scan_dev thread.
1252  */
1253 void
1254 pm_scan_stop(dev_info_t *dip)
1255 {
1256 	PMD_FUNC(pmf, "scan_stop")
1257 	pm_scan_t	*scanp;
1258 	timeout_id_t	scanid;
1259 
1260 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1261 	PM_LOCK_DIP(dip);
1262 	scanp = PM_GET_PM_SCAN(dip);
1263 	if (!scanp) {
1264 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1265 		    pmf, PM_DEVICE(dip)))
1266 		PM_UNLOCK_DIP(dip);
1267 		return;
1268 	}
1269 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1270 
1271 	/* cancel scheduled scan taskq */
1272 	while (scanp->ps_scan_id) {
1273 		scanid = scanp->ps_scan_id;
1274 		scanp->ps_scan_id = 0;
1275 		PM_UNLOCK_DIP(dip);
1276 		(void) untimeout(scanid);
1277 		PM_LOCK_DIP(dip);
1278 	}
1279 
1280 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1281 		PM_UNLOCK_DIP(dip);
1282 		delay(1);
1283 		PM_LOCK_DIP(dip);
1284 	}
1285 	PM_UNLOCK_DIP(dip);
1286 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1287 }
1288 
1289 int
1290 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1291 {
1292 	_NOTE(ARGUNUSED(arg))
1293 
1294 	if (!PM_GET_PM_SCAN(dip))
1295 		return (DDI_WALK_CONTINUE);
1296 	ASSERT(!PM_ISBC(dip));
1297 	pm_scan_stop(dip);
1298 	return (DDI_WALK_CONTINUE);
1299 }
1300 
1301 /*
1302  * Converts a power level value to its index
1303  */
1304 static int
1305 power_val_to_index(pm_component_t *cp, int val)
1306 {
1307 	int limit, i, *ip;
1308 
1309 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1310 	    val != PM_LEVEL_EXACT);
1311 	/*  convert power value into index (i) */
1312 	limit = cp->pmc_comp.pmc_numlevels;
1313 	ip = cp->pmc_comp.pmc_lvals;
1314 	for (i = 0; i < limit; i++)
1315 		if (val == *ip++)
1316 			return (i);
1317 	return (-1);
1318 }
1319 
1320 /*
1321  * Converts a numeric power level to a printable string
1322  */
1323 static char *
1324 power_val_to_string(pm_component_t *cp, int val)
1325 {
1326 	int index;
1327 
1328 	if (val == PM_LEVEL_UPONLY)
1329 		return ("<UPONLY>");
1330 
1331 	if (val == PM_LEVEL_UNKNOWN ||
1332 	    (index = power_val_to_index(cp, val)) == -1)
1333 		return ("<LEVEL_UNKNOWN>");
1334 
1335 	return (cp->pmc_comp.pmc_lnames[index]);
1336 }
1337 
1338 /*
1339  * Return true if this node has been claimed by a ppm.
1340  */
1341 static int
1342 pm_ppm_claimed(dev_info_t *dip)
1343 {
1344 	return (PPM(dip) != NULL);
1345 }
1346 
1347 /*
1348  * A node which was voluntarily power managed has just used up its "free cycle"
1349  * and need is volpmd field cleared, and the same done to all its descendents
1350  */
1351 static void
1352 pm_clear_volpm_dip(dev_info_t *dip)
1353 {
1354 	PMD_FUNC(pmf, "clear_volpm_dip")
1355 
1356 	if (dip == NULL)
1357 		return;
1358 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1359 	    PM_DEVICE(dip)))
1360 	DEVI(dip)->devi_pm_volpmd = 0;
1361 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1362 		pm_clear_volpm_dip(dip);
1363 	}
1364 }
1365 
1366 /*
1367  * A node which was voluntarily power managed has used up the "free cycles"
1368  * for the subtree that it is the root of.  Scan through the list of detached
1369  * nodes and adjust the counts of any that are descendents of the node.
1370  */
1371 static void
1372 pm_clear_volpm_list(dev_info_t *dip)
1373 {
1374 	PMD_FUNC(pmf, "clear_volpm_list")
1375 	char	*pathbuf;
1376 	size_t	len;
1377 	pm_noinvol_t *ip;
1378 
1379 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1380 	(void) ddi_pathname(dip, pathbuf);
1381 	len = strlen(pathbuf);
1382 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1383 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1384 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1385 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1386 		    ip->ni_path))
1387 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1388 		    ip->ni_path[len] == '/') {
1389 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1390 			    ip->ni_path))
1391 			ip->ni_volpmd = 0;
1392 			ip->ni_wasvolpmd = 0;
1393 		}
1394 	}
1395 	kmem_free(pathbuf, MAXPATHLEN);
1396 	rw_exit(&pm_noinvol_rwlock);
1397 }
1398 
1399 /*
1400  * Powers a device, suspending or resuming the driver if it is a backward
1401  * compatible device, calling into ppm to change power level.
1402  * Called with the component's power lock held.
1403  */
1404 static int
1405 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1406     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1407 {
1408 	PMD_FUNC(pmf, "power_dev")
1409 	power_req_t power_req;
1410 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1411 	int		resume_needed = 0;
1412 	int		suspended = 0;
1413 	int		result;
1414 	struct pm_component *cp = PM_CP(dip, comp);
1415 	int		bc = PM_ISBC(dip);
1416 	int pm_all_components_off(dev_info_t *);
1417 	int		clearvolpmd = 0;
1418 	char		pathbuf[MAXNAMELEN];
1419 #ifdef DEBUG
1420 	char *ppmname, *ppmaddr;
1421 #endif
1422 	/*
1423 	 * If this is comp 0 of a backwards compat device and we are
1424 	 * going to take the power away, we need to detach it with
1425 	 * DDI_PM_SUSPEND command.
1426 	 */
1427 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1428 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1429 			/* We could not suspend before turning cmpt zero off */
1430 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1431 			    pmf, PM_DEVICE(dip)))
1432 			return (DDI_FAILURE);
1433 		} else {
1434 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1435 			suspended++;
1436 		}
1437 	}
1438 	power_req.request_type = PMR_PPM_SET_POWER;
1439 	power_req.req.ppm_set_power_req.who = dip;
1440 	power_req.req.ppm_set_power_req.cmpt = comp;
1441 	power_req.req.ppm_set_power_req.old_level = old_level;
1442 	power_req.req.ppm_set_power_req.new_level = level;
1443 	power_req.req.ppm_set_power_req.canblock = canblock;
1444 	power_req.req.ppm_set_power_req.cookie = NULL;
1445 #ifdef DEBUG
1446 	if (pm_ppm_claimed(dip)) {
1447 		ppmname = PM_NAME(PPM(dip));
1448 		ppmaddr = PM_ADDR(PPM(dip));
1449 
1450 	} else {
1451 		ppmname = "noppm";
1452 		ppmaddr = "0";
1453 	}
1454 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1455 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1456 	    power_val_to_string(cp, old_level), old_level,
1457 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1458 #endif
1459 	/*
1460 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1461 	 * bc device comp 0 is powering on, then we count it as a power cycle
1462 	 * against its voluntary count.
1463 	 */
1464 	if (DEVI(dip)->devi_pm_volpmd &&
1465 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1466 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1467 		clearvolpmd = 1;
1468 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1469 	    &power_req, &result)) == DDI_SUCCESS) {
1470 		/*
1471 		 * Now do involuntary pm accounting;  If we've just cycled power
1472 		 * on a voluntarily pm'd node, and by inference on its entire
1473 		 * subtree, we need to set the subtree (including those nodes
1474 		 * already detached) volpmd counts to 0, and subtract out the
1475 		 * value of the current node's volpmd count from the ancestors
1476 		 */
1477 		if (clearvolpmd) {
1478 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1479 			pm_clear_volpm_dip(dip);
1480 			pm_clear_volpm_list(dip);
1481 			if (volpmd) {
1482 				(void) ddi_pathname(dip, pathbuf);
1483 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1484 				    volpmd, 0, pathbuf, dip);
1485 			}
1486 		}
1487 	} else {
1488 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1489 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1490 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1491 	}
1492 	/*
1493 	 * If some other devices were also powered up (e.g. other cpus in
1494 	 * the same domain) return a pointer to that list
1495 	 */
1496 	if (devlist) {
1497 		*devlist = (pm_ppm_devlist_t *)
1498 		    power_req.req.ppm_set_power_req.cookie;
1499 	}
1500 	/*
1501 	 * We will have to resume the device if the device is backwards compat
1502 	 * device and either of the following is true:
1503 	 * -This is comp 0 and we have successfully powered it up
1504 	 * -This is comp 0 and we have failed to power it down. Resume is
1505 	 *  needed because we have suspended it above
1506 	 */
1507 
1508 	if (bc && comp == 0) {
1509 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1510 		if (power_op_ret == DDI_SUCCESS) {
1511 			if (POWERING_ON(old_level, level)) {
1512 				/*
1513 				 * It must be either suspended or resumed
1514 				 * via pm_power_has_changed path
1515 				 */
1516 				ASSERT((DEVI(dip)->devi_pm_flags &
1517 				    PMC_SUSPENDED) ||
1518 				    (PM_CP(dip, comp)->pmc_flags &
1519 				    PM_PHC_WHILE_SET_POWER));
1520 
1521 					resume_needed = suspended;
1522 			}
1523 		} else {
1524 			if (POWERING_OFF(old_level, level)) {
1525 				/*
1526 				 * It must be either suspended or resumed
1527 				 * via pm_power_has_changed path
1528 				 */
1529 				ASSERT((DEVI(dip)->devi_pm_flags &
1530 				    PMC_SUSPENDED) ||
1531 				    (PM_CP(dip, comp)->pmc_flags &
1532 				    PM_PHC_WHILE_SET_POWER));
1533 
1534 					resume_needed = suspended;
1535 			}
1536 		}
1537 	}
1538 	if (resume_needed) {
1539 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1540 		/* ppm is not interested in DDI_PM_RESUME */
1541 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1542 		    DDI_SUCCESS) {
1543 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1544 		} else
1545 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1546 			    PM_DEVICE(dip));
1547 	}
1548 	return (power_op_ret);
1549 }
1550 
1551 /*
1552  * Return true if we are the owner or a borrower of the devi lock.  See
1553  * pm_lock_power_single() about borrowing the lock.
1554  */
1555 static int
1556 pm_devi_lock_held(dev_info_t *dip)
1557 {
1558 	lock_loan_t *cur;
1559 
1560 	if (DEVI_BUSY_OWNED(dip))
1561 	    return (1);
1562 
1563 	/* return false if no locks borrowed */
1564 	if (lock_loan_head.pmlk_next == NULL)
1565 		return (0);
1566 
1567 	mutex_enter(&pm_loan_lock);
1568 	/* see if our thread is registered as a lock borrower. */
1569 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1570 		if (cur->pmlk_borrower == curthread)
1571 			break;
1572 	mutex_exit(&pm_loan_lock);
1573 
1574 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1575 }
1576 
1577 /*
1578  * pm_set_power: adjusts power level of device.	 Assumes device is power
1579  * manageable & component exists.
1580  *
1581  * Cases which require us to bring up devices we keep up ("wekeepups") for
1582  * backwards compatible devices:
1583  *	component 0 is off and we're bringing it up from 0
1584  *		bring up wekeepup first
1585  *	and recursively when component 0 is off and we bring some other
1586  *	component up from 0
1587  * For devices which are not backward compatible, our dependency notion is much
1588  * simpler.  Unless all components are off, then wekeeps must be on.
1589  * We don't treat component 0 differently.
1590  * Canblock tells how to deal with a direct pm'd device.
1591  * Scan arg tells us if we were called from scan, in which case we don't need
1592  * to go back to the root node and walk down to change power.
1593  */
1594 int
1595 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1596     pm_canblock_t canblock, int scan, int *retp)
1597 {
1598 	PMD_FUNC(pmf, "set_power")
1599 	char		*pathbuf;
1600 	pm_bp_child_pwrchg_t bpc;
1601 	pm_sp_misc_t	pspm;
1602 	int		ret = DDI_SUCCESS;
1603 	int		unused = DDI_SUCCESS;
1604 	dev_info_t	*pdip = ddi_get_parent(dip);
1605 
1606 #ifdef DEBUG
1607 	int		diverted = 0;
1608 
1609 	/*
1610 	 * This prevents operations on the console from calling prom_printf and
1611 	 * either deadlocking or bringing up the console because of debug
1612 	 * output
1613 	 */
1614 	if (dip == cfb_dip) {
1615 		diverted++;
1616 		mutex_enter(&pm_debug_lock);
1617 		pm_divertdebug++;
1618 		mutex_exit(&pm_debug_lock);
1619 	}
1620 #endif
1621 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1622 	    direction == PM_LEVEL_EXACT);
1623 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1624 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1625 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1626 	(void) ddi_pathname(dip, pathbuf);
1627 	bpc.bpc_dip = dip;
1628 	bpc.bpc_path = pathbuf;
1629 	bpc.bpc_comp = comp;
1630 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1631 	bpc.bpc_nlevel = level;
1632 	pspm.pspm_direction = direction;
1633 	pspm.pspm_errnop = retp;
1634 	pspm.pspm_canblock = canblock;
1635 	pspm.pspm_scan = scan;
1636 	bpc.bpc_private = &pspm;
1637 
1638 	/*
1639 	 * If a config operation is being done (we've locked the parent) or
1640 	 * we already hold the power lock (we've locked the node)
1641 	 * then we can operate directly on the node because we have already
1642 	 * brought up all the ancestors, otherwise, we have to go back to the
1643 	 * top of the tree.
1644 	 */
1645 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1646 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1647 		    (void *)&bpc, (void *)&unused);
1648 	else
1649 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1650 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1651 #ifdef DEBUG
1652 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1653 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1654 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1655 	}
1656 	if (diverted) {
1657 		mutex_enter(&pm_debug_lock);
1658 		pm_divertdebug--;
1659 		mutex_exit(&pm_debug_lock);
1660 	}
1661 #endif
1662 	kmem_free(pathbuf, MAXPATHLEN);
1663 	return (ret);
1664 }
1665 
1666 static dev_info_t *
1667 find_dip(dev_info_t *dip, char *dev_name, int holddip)
1668 {
1669 	PMD_FUNC(pmf, "find_dip")
1670 	dev_info_t	*cdip;
1671 	char		*child_dev, *addr;
1672 	char		*device;	/* writeable copy of path */
1673 	int		dev_len = strlen(dev_name)+1;
1674 	int		circ;
1675 
1676 	device = kmem_zalloc(dev_len, KM_SLEEP);
1677 	(void) strcpy(device, dev_name);
1678 	addr = strchr(device, '@');
1679 	child_dev = strchr(device, '/');
1680 	if ((addr != NULL) && (child_dev == NULL || addr < child_dev)) {
1681 		/*
1682 		 * We have device = "name@addr..." form
1683 		 */
1684 		*addr++ = '\0';			/* for strcmp (and skip '@') */
1685 		if (child_dev != NULL)
1686 			*child_dev++ = '\0';	/* for strcmp (and skip '/') */
1687 	} else {
1688 		/*
1689 		 * We have device = "name/..." or "name"
1690 		 */
1691 		addr = "";
1692 		if (child_dev != NULL)
1693 			*child_dev++ = '\0';	/* for strcmp (and skip '/') */
1694 	}
1695 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
1696 		if (strcmp(ddi_node_name(dip), device) == 0) {
1697 			/* If the driver isn't loaded, we prune the search */
1698 			if (!i_ddi_devi_attached(dip)) {
1699 				continue;
1700 			}
1701 			if (strcmp(ddi_get_name_addr(dip), addr) == 0) {
1702 				PMD(PMD_NAMETODIP, ("%s: matched %s@%s"
1703 				    "(%s#%d)\n", pmf, PM_DEVICE(dip)))
1704 				if (child_dev != NULL) {
1705 					PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): "
1706 					    "held, call find_dip %s\n", pmf,
1707 					    PM_DEVICE(dip), child_dev))
1708 					ndi_devi_enter(dip, &circ);
1709 					cdip = dip;
1710 					dip = find_dip(ddi_get_child(dip),
1711 					    child_dev, holddip);
1712 					ndi_devi_exit(cdip, circ);
1713 					PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): "
1714 					    "release, find_dip rets %s\n", pmf,
1715 					    PM_DEVICE(cdip), child_dev))
1716 				} else {
1717 					if (holddip) {
1718 						e_ddi_hold_devi(dip);
1719 						PMD(PMD_DHR | PMD_NAMETODIP,
1720 						    ("%s: held %s@%s(%s#%d), "
1721 						    "refcnt=%d\n", pmf,
1722 						    PM_DEVICE(dip),
1723 						    e_ddi_devi_holdcnt(dip)))
1724 					}
1725 				}
1726 				kmem_free(device, dev_len);
1727 				return (dip);
1728 			}
1729 		}
1730 	}
1731 	kmem_free(device, dev_len);
1732 	return (dip);
1733 }
1734 
1735 /*
1736  * If holddip is set, then if a dip is found we return with the node held
1737  */
1738 dev_info_t *
1739 pm_name_to_dip(char *pathname, int holddip)
1740 {
1741 	PMD_FUNC(pmf, "name_to_dip")
1742 	dev_info_t	*dip = NULL;
1743 	char		dev_name[MAXNAMELEN];
1744 	dev_info_t	*first_child;
1745 	int		circular;
1746 
1747 	if (!pathname)
1748 		return (NULL);
1749 
1750 	(void) strncpy(dev_name, pathname, MAXNAMELEN);
1751 
1752 	PMD(PMD_NAMETODIP, ("%s: devname: %s\n", pmf, dev_name))
1753 
1754 	/*
1755 	 * First we attempt to match the node in the tree.  If we succeed
1756 	 * we hold the driver and look up the dip again.
1757 	 * No need to hold the root as that node is always held.
1758 	 */
1759 	if (dev_name[0] == '/') {
1760 		ndi_devi_enter(ddi_root_node(), &circular);
1761 		first_child = ddi_get_child(ddi_root_node());
1762 		dip = find_dip(first_child, dev_name + 1, holddip);
1763 		ndi_devi_exit(ddi_root_node(), circular);
1764 
1765 	} else {
1766 		PMD(PMD_NAMETODIP, ("%s: physpath with unrooted "
1767 		    "search\n", pmf))
1768 		return (NULL);
1769 	}
1770 
1771 	ASSERT(!dip ||
1772 	    (ddi_name_to_major(ddi_binding_name(dip)) != (major_t)-1));
1773 
1774 	return (dip);
1775 }
1776 
1777 /*
1778  * Search for a dependency and mark it unsatisfied
1779  */
1780 static void
1781 pm_unsatisfy(char *keeper, char *kept)
1782 {
1783 	PMD_FUNC(pmf, "unsatisfy")
1784 	pm_pdr_t *dp;
1785 
1786 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1787 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1788 		if (!dp->pdr_isprop) {
1789 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1790 			    (dp->pdr_kept_count > 0) &&
1791 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1792 				if (dp->pdr_satisfied) {
1793 					dp->pdr_satisfied = 0;
1794 					pm_unresolved_deps++;
1795 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1796 					    "pm_unresolved_deps now %d\n", pmf,
1797 					    pm_unresolved_deps))
1798 				}
1799 			}
1800 		}
1801 	}
1802 }
1803 
1804 /*
1805  * Device dip is being un power managed, it keeps up count other devices.
1806  * We need to release any hold we have on the kept devices, and also
1807  * mark the dependency no longer satisfied.
1808  */
1809 static void
1810 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1811 {
1812 	PMD_FUNC(pmf, "unkeeps")
1813 	int i, j;
1814 	dev_info_t *kept;
1815 	dev_info_t *dip;
1816 	struct pm_component *cp;
1817 	int keeper_on = 0, circ;
1818 
1819 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1820 	    keeper, (void *)keptpaths))
1821 	/*
1822 	 * Try to grab keeper. Keeper may have gone away by now,
1823 	 * in this case, used the passed in value pwr
1824 	 */
1825 	dip = pm_name_to_dip(keeper, 1);
1826 	for (i = 0; i < count; i++) {
1827 		/* Release power hold */
1828 		kept = pm_name_to_dip(keptpaths[i], 1);
1829 		if (kept) {
1830 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1831 			    PM_DEVICE(kept), i))
1832 			/*
1833 			 * We need to check if we skipped a bringup here
1834 			 * because we could have failed the bringup
1835 			 * (ie DIRECT PM device) and have
1836 			 * not increment the count.
1837 			 */
1838 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1839 				keeper_on = 0;
1840 				PM_LOCK_POWER(dip, &circ);
1841 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1842 				    cp = &DEVI(dip)->devi_pm_components[j];
1843 					if (cur_power(cp)) {
1844 						keeper_on++;
1845 						break;
1846 					}
1847 				}
1848 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1849 					pm_rele_power(kept);
1850 					DEVI(kept)->devi_pm_flags
1851 						&= ~PMC_SKIP_BRINGUP;
1852 				}
1853 				PM_UNLOCK_POWER(dip, circ);
1854 			} else if (pwr) {
1855 				if (PM_SKBU(kept) == 0) {
1856 					pm_rele_power(kept);
1857 					DEVI(kept)->devi_pm_flags
1858 					    &= ~PMC_SKIP_BRINGUP;
1859 				}
1860 			}
1861 			ddi_release_devi(kept);
1862 		}
1863 		/*
1864 		 * mark this dependency not satisfied
1865 		 */
1866 		pm_unsatisfy(keeper, keptpaths[i]);
1867 	}
1868 	if (dip)
1869 		ddi_release_devi(dip);
1870 }
1871 
1872 /*
1873  * Device kept is being un power managed, it is kept up by keeper.
1874  * We need to mark the dependency no longer satisfied.
1875  */
1876 static void
1877 pm_unkepts(char *kept, char *keeper)
1878 {
1879 	PMD_FUNC(pmf, "unkepts")
1880 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1881 	ASSERT(keeper != NULL);
1882 	/*
1883 	 * mark this dependency not satisfied
1884 	 */
1885 	pm_unsatisfy(keeper, kept);
1886 }
1887 
1888 /*
1889  * Removes dependency information and hold on the kepts, if the path is a
1890  * path of a keeper.
1891  */
1892 static void
1893 pm_free_keeper(char *path, int pwr)
1894 {
1895 	pm_pdr_t *dp;
1896 	int i;
1897 	size_t length;
1898 
1899 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1900 		if (strcmp(dp->pdr_keeper, path) != 0)
1901 			continue;
1902 		/*
1903 		 * Remove all our kept holds and the dependency records,
1904 		 * then free up the kept lists.
1905 		 */
1906 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1907 		if (dp->pdr_kept_count)  {
1908 			for (i = 0; i < dp->pdr_kept_count; i++) {
1909 				length = strlen(dp->pdr_kept_paths[i]);
1910 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1911 			}
1912 			kmem_free(dp->pdr_kept_paths,
1913 			    dp->pdr_kept_count * sizeof (char **));
1914 			dp->pdr_kept_paths = NULL;
1915 			dp->pdr_kept_count = 0;
1916 		}
1917 	}
1918 }
1919 
1920 /*
1921  * Removes the device represented by path from the list of kepts, if the
1922  * path is a path of a kept
1923  */
1924 static void
1925 pm_free_kept(char *path)
1926 {
1927 	pm_pdr_t *dp;
1928 	int i;
1929 	int j, count;
1930 	size_t length;
1931 	char **paths;
1932 
1933 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1934 		if (dp->pdr_kept_count == 0)
1935 			continue;
1936 		count = dp->pdr_kept_count;
1937 		/* Remove this device from the kept path lists */
1938 		for (i = 0; i < count; i++) {
1939 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1940 				pm_unkepts(path, dp->pdr_keeper);
1941 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1942 				kmem_free(dp->pdr_kept_paths[i], length);
1943 				dp->pdr_kept_paths[i] = NULL;
1944 				dp->pdr_kept_count--;
1945 			}
1946 		}
1947 		/* Compact the kept paths array */
1948 		if (dp->pdr_kept_count) {
1949 			length = dp->pdr_kept_count * sizeof (char **);
1950 			paths = kmem_zalloc(length, KM_SLEEP);
1951 			j = 0;
1952 			for (i = 0; i < count; i++) {
1953 				if (dp->pdr_kept_paths[i] != NULL) {
1954 					paths[j] = dp->pdr_kept_paths[i];
1955 					j++;
1956 				}
1957 			}
1958 			ASSERT(j == dp->pdr_kept_count);
1959 		}
1960 		/* Now free the old array and point to the new one */
1961 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1962 		if (dp->pdr_kept_count)
1963 			dp->pdr_kept_paths = paths;
1964 		else
1965 			dp->pdr_kept_paths = NULL;
1966 	}
1967 }
1968 
1969 /*
1970  * Free the dependency information for a device.
1971  */
1972 void
1973 pm_free_keeps(char *path, int pwr)
1974 {
1975 	PMD_FUNC(pmf, "free_keeps")
1976 
1977 #ifdef DEBUG
1978 	int doprdeps = 0;
1979 	void prdeps(char *);
1980 
1981 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
1982 	if (pm_debug & PMD_KEEPS) {
1983 		doprdeps = 1;
1984 		prdeps("pm_free_keeps before");
1985 	}
1986 #endif
1987 	/*
1988 	 * First assume we are a keeper and remove all our kepts.
1989 	 */
1990 	pm_free_keeper(path, pwr);
1991 	/*
1992 	 * Now assume we a kept device, and remove all our records.
1993 	 */
1994 	pm_free_kept(path);
1995 #ifdef	DEBUG
1996 	if (doprdeps) {
1997 		prdeps("pm_free_keeps after");
1998 	}
1999 #endif
2000 }
2001 
2002 static int
2003 pm_is_kept(char *path)
2004 {
2005 	pm_pdr_t *dp;
2006 	int i;
2007 
2008 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2009 		if (dp->pdr_kept_count == 0)
2010 			continue;
2011 		for (i = 0; i < dp->pdr_kept_count; i++) {
2012 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2013 				return (1);
2014 		}
2015 	}
2016 	return (0);
2017 }
2018 
2019 static void
2020 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2021 {
2022 	PMD_FUNC(pmf, "hold_rele_power")
2023 	int circ;
2024 
2025 	if ((dip == NULL) ||
2026 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2027 		return;
2028 
2029 	PM_LOCK_POWER(dip, &circ);
2030 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2031 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2032 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2033 
2034 	PM_KUC(dip) += cnt;
2035 
2036 	ASSERT(PM_KUC(dip) >= 0);
2037 	PM_UNLOCK_POWER(dip, circ);
2038 
2039 	if (cnt < 0 && PM_KUC(dip) == 0)
2040 		pm_rescan(dip);
2041 }
2042 
2043 #define	MAX_PPM_HANDLERS	4
2044 
2045 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2046 
2047 struct	ppm_callbacks {
2048 	int (*ppmc_func)(dev_info_t *);
2049 	dev_info_t	*ppmc_dip;
2050 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2051 
2052 
2053 /*
2054  * This routine calls into all the registered ppms to notify them
2055  * that either all components of power-managed devices are at their
2056  * lowest levels or no longer all are at their lowest levels.
2057  */
2058 static void
2059 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2060 {
2061 	struct ppm_callbacks *ppmcp;
2062 	power_req_t power_req;
2063 	int result = 0;
2064 
2065 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2066 	power_req.req.ppm_all_lowest_req.mode = mode;
2067 	mutex_enter(&ppm_lock);
2068 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2069 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2070 		    DDI_CTLOPS_POWER, &power_req, &result);
2071 	mutex_exit(&ppm_lock);
2072 }
2073 
2074 static void
2075 pm_set_pm_info(dev_info_t *dip, void *value)
2076 {
2077 	DEVI(dip)->devi_pm_info = value;
2078 }
2079 
2080 pm_rsvp_t *pm_blocked_list;
2081 
2082 /*
2083  * Look up an entry in the blocked list by dip and component
2084  */
2085 static pm_rsvp_t *
2086 pm_rsvp_lookup(dev_info_t *dip, int comp)
2087 {
2088 	pm_rsvp_t *p;
2089 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2090 	for (p = pm_blocked_list; p; p = p->pr_next)
2091 		if (p->pr_dip == dip && p->pr_comp == comp) {
2092 			return (p);
2093 		}
2094 	return (NULL);
2095 }
2096 
2097 /*
2098  * Called when a device which is direct power managed (or the parent or
2099  * dependent of such a device) changes power, or when a pm clone is closed
2100  * that was direct power managing a device.  This call results in pm_blocked()
2101  * (below) returning.
2102  */
2103 void
2104 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2105 {
2106 	PMD_FUNC(pmf, "proceed")
2107 	pm_rsvp_t *found = NULL;
2108 	pm_rsvp_t *p;
2109 
2110 	mutex_enter(&pm_rsvp_lock);
2111 	switch (cmd) {
2112 	/*
2113 	 * we're giving up control, let any pending op continue
2114 	 */
2115 	case PMP_RELEASE:
2116 		for (p = pm_blocked_list; p; p = p->pr_next) {
2117 			if (dip == p->pr_dip) {
2118 				p->pr_retval = PMP_RELEASE;
2119 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2120 				    pmf, PM_DEVICE(dip)))
2121 				cv_signal(&p->pr_cv);
2122 			}
2123 		}
2124 		break;
2125 
2126 	/*
2127 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2128 	 * succeed and a non-matching request for the same device fail
2129 	 */
2130 	case PMP_SETPOWER:
2131 		found = pm_rsvp_lookup(dip, comp);
2132 		if (!found)	/* if driver not waiting */
2133 			break;
2134 		/*
2135 		 * This cannot be pm_lower_power, since that can only happen
2136 		 * during detach or probe
2137 		 */
2138 		if (found->pr_newlevel <= newlevel) {
2139 			found->pr_retval = PMP_SUCCEED;
2140 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2141 			    PM_DEVICE(dip)))
2142 		} else {
2143 			found->pr_retval = PMP_FAIL;
2144 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2145 			    PM_DEVICE(dip)))
2146 		}
2147 		cv_signal(&found->pr_cv);
2148 		break;
2149 
2150 	default:
2151 		panic("pm_proceed unknown cmd %d", cmd);
2152 	}
2153 	mutex_exit(&pm_rsvp_lock);
2154 }
2155 
2156 /*
2157  * This routine dispatches new work to the dependency thread. Caller must
2158  * be prepared to block for memory if necessary.
2159  */
2160 void
2161 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2162     int *res, int cached_pwr)
2163 {
2164 	pm_dep_wk_t	*new_work;
2165 
2166 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2167 	new_work->pdw_type = cmd;
2168 	new_work->pdw_wait = wait;
2169 	new_work->pdw_done = 0;
2170 	new_work->pdw_ret = 0;
2171 	new_work->pdw_pwr = cached_pwr;
2172 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2173 	if (keeper != NULL) {
2174 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2175 		    KM_SLEEP);
2176 		(void) strcpy(new_work->pdw_keeper, keeper);
2177 	}
2178 	if (kept != NULL) {
2179 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2180 		(void) strcpy(new_work->pdw_kept, kept);
2181 	}
2182 	mutex_enter(&pm_dep_thread_lock);
2183 	if (pm_dep_thread_workq == NULL) {
2184 		pm_dep_thread_workq = new_work;
2185 		pm_dep_thread_tail = new_work;
2186 		new_work->pdw_next = NULL;
2187 	} else {
2188 		pm_dep_thread_tail->pdw_next = new_work;
2189 		pm_dep_thread_tail = new_work;
2190 		new_work->pdw_next = NULL;
2191 	}
2192 	cv_signal(&pm_dep_thread_cv);
2193 	/* If caller asked for it, wait till it is done. */
2194 	if (wait)  {
2195 		while (!new_work->pdw_done)
2196 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2197 		/*
2198 		 * Pass return status, if any, back.
2199 		 */
2200 		if (res != NULL)
2201 			*res = new_work->pdw_ret;
2202 		/*
2203 		 * If we asked to wait, it is our job to free the request
2204 		 * structure.
2205 		 */
2206 		if (new_work->pdw_keeper)
2207 			kmem_free(new_work->pdw_keeper,
2208 			    strlen(new_work->pdw_keeper) + 1);
2209 		if (new_work->pdw_kept)
2210 			kmem_free(new_work->pdw_kept,
2211 			    strlen(new_work->pdw_kept) + 1);
2212 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2213 	}
2214 	mutex_exit(&pm_dep_thread_lock);
2215 }
2216 
2217 /*
2218  * Release the pm resource for this device.
2219  */
2220 void
2221 pm_rem_info(dev_info_t *dip)
2222 {
2223 	PMD_FUNC(pmf, "rem_info")
2224 	int		i, count = 0;
2225 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2226 	dev_info_t	*pdip = ddi_get_parent(dip);
2227 	char		*pathbuf;
2228 	int		work_type = PM_DEP_WK_DETACH;
2229 
2230 	ASSERT(info);
2231 
2232 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2233 	if (PM_ISDIRECT(dip)) {
2234 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2235 		ASSERT(info->pmi_clone);
2236 		info->pmi_clone = 0;
2237 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2238 	}
2239 	ASSERT(!PM_GET_PM_SCAN(dip));
2240 
2241 	/*
2242 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2243 	 * Others we check all components.  BC node that has already
2244 	 * called pm_destroy_components() has zero component count.
2245 	 * Parents that get notification are not adjusted because their
2246 	 * kidsupcnt is always 0 (or 1 during configuration).
2247 	 */
2248 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2249 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2250 
2251 	/* node is detached, so we can examine power without locking */
2252 	if (PM_ISBC(dip)) {
2253 		count = (PM_CURPOWER(dip, 0) != 0);
2254 	} else {
2255 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2256 			count += (PM_CURPOWER(dip, i) != 0);
2257 	}
2258 
2259 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2260 		e_pm_hold_rele_power(pdip, -count);
2261 
2262 	/* Schedule a request to clean up dependency records */
2263 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2264 	(void) ddi_pathname(dip, pathbuf);
2265 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2266 	    PM_DEP_NOWAIT, NULL, (count > 0));
2267 	kmem_free(pathbuf, MAXPATHLEN);
2268 
2269 	/*
2270 	 * Adjust the pm_comps_notlowest count since this device is
2271 	 * not being power-managed anymore.
2272 	 */
2273 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2274 		if (PM_CURPOWER(dip, i) != 0)
2275 			PM_DECR_NOTLOWEST(dip);
2276 	}
2277 	/*
2278 	 * Once we clear the info pointer, it looks like it is not power
2279 	 * managed to everybody else.
2280 	 */
2281 	pm_set_pm_info(dip, NULL);
2282 	kmem_free(info, sizeof (pm_info_t));
2283 }
2284 
2285 int
2286 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2287 {
2288 	int components = PM_NUMCMPTS(dip);
2289 	int *bufp;
2290 	size_t size;
2291 	int i;
2292 
2293 	if (components <= 0) {
2294 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2295 		    "can't get normal power values\n", PM_DEVICE(dip));
2296 		return (DDI_FAILURE);
2297 	} else {
2298 		size = components * sizeof (int);
2299 		bufp = kmem_alloc(size, KM_SLEEP);
2300 		for (i = 0; i < components; i++) {
2301 			bufp[i] = pm_get_normal_power(dip, i);
2302 		}
2303 	}
2304 	*length = size;
2305 	*valuep = bufp;
2306 	return (DDI_SUCCESS);
2307 }
2308 
2309 static int
2310 pm_reset_timestamps(dev_info_t *dip, void *arg)
2311 {
2312 	_NOTE(ARGUNUSED(arg))
2313 
2314 	int components;
2315 	int	i;
2316 
2317 	if (!PM_GET_PM_INFO(dip))
2318 		return (DDI_WALK_CONTINUE);
2319 	components = PM_NUMCMPTS(dip);
2320 	ASSERT(components > 0);
2321 	PM_LOCK_BUSY(dip);
2322 	for (i = 0; i < components; i++) {
2323 		struct pm_component *cp;
2324 		/*
2325 		 * If the component was not marked as busy,
2326 		 * reset its timestamp to now.
2327 		 */
2328 		cp = PM_CP(dip, i);
2329 		if (cp->pmc_timestamp)
2330 			cp->pmc_timestamp = gethrestime_sec();
2331 	}
2332 	PM_UNLOCK_BUSY(dip);
2333 	return (DDI_WALK_CONTINUE);
2334 }
2335 
2336 /*
2337  * Convert a power level to an index into the levels array (or
2338  * just PM_LEVEL_UNKNOWN in that special case).
2339  */
2340 static int
2341 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2342 {
2343 	PMD_FUNC(pmf, "level_to_index")
2344 	int i;
2345 	int limit = cp->pmc_comp.pmc_numlevels;
2346 	int *ip = cp->pmc_comp.pmc_lvals;
2347 
2348 	if (level == PM_LEVEL_UNKNOWN)
2349 		return (level);
2350 
2351 	for (i = 0; i < limit; i++) {
2352 		if (level == *ip++) {
2353 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2354 			    pmf, PM_DEVICE(dip),
2355 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2356 			return (i);
2357 		}
2358 	}
2359 	panic("pm_level_to_index: level %d not found for device "
2360 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2361 	/*NOTREACHED*/
2362 }
2363 
2364 /*
2365  * Internal function to set current power level
2366  */
2367 static void
2368 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2369 {
2370 	PMD_FUNC(pmf, "set_cur_pwr")
2371 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2372 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2373 
2374 	/*
2375 	 * Nothing to adjust if current & new levels are the same.
2376 	 */
2377 	if (curpwr != PM_LEVEL_UNKNOWN &&
2378 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2379 		return;
2380 
2381 	/*
2382 	 * Keep the count for comps doing transition to/from lowest
2383 	 * level.
2384 	 */
2385 	if (curpwr == 0) {
2386 		PM_INCR_NOTLOWEST(dip);
2387 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2388 		PM_DECR_NOTLOWEST(dip);
2389 	}
2390 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2391 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2392 }
2393 
2394 /*
2395  * This is the default method of setting the power of a device if no ppm
2396  * driver has claimed it.
2397  */
2398 int
2399 pm_power(dev_info_t *dip, int comp, int level)
2400 {
2401 	PMD_FUNC(pmf, "power")
2402 	struct dev_ops	*ops;
2403 	int		(*fn)(dev_info_t *, int, int);
2404 	struct pm_component *cp = PM_CP(dip, comp);
2405 	int retval;
2406 	pm_info_t *info = PM_GET_PM_INFO(dip);
2407 	static int pm_phc_impl(dev_info_t *, int, int, int);
2408 
2409 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2410 	    PM_DEVICE(dip), comp, level))
2411 	if (!(ops = ddi_get_driver(dip))) {
2412 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2413 		    PM_DEVICE(dip)))
2414 		return (DDI_FAILURE);
2415 	}
2416 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2417 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2418 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2419 		    (!fn ? " devo_power NULL" : "")))
2420 		return (DDI_FAILURE);
2421 	}
2422 	cp->pmc_flags |= PM_POWER_OP;
2423 	retval = (*fn)(dip, comp, level);
2424 	cp->pmc_flags &= ~PM_POWER_OP;
2425 	if (retval == DDI_SUCCESS) {
2426 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2427 		return (DDI_SUCCESS);
2428 	}
2429 
2430 	/*
2431 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2432 	 * updated only the power level of the component.  If our attempt to
2433 	 * set the device new to a power level above has failed we sync the
2434 	 * total power state via phc code now.
2435 	 */
2436 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2437 		int phc_lvl =
2438 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2439 
2440 		ASSERT(info);
2441 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2442 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2443 			pmf, PM_DEVICE(dip), comp, phc_lvl))
2444 	}
2445 
2446 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2447 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2448 	    level, power_val_to_string(cp, level)));
2449 	return (DDI_FAILURE);
2450 }
2451 
2452 int
2453 pm_unmanage(dev_info_t *dip)
2454 {
2455 	PMD_FUNC(pmf, "unmanage")
2456 	power_req_t power_req;
2457 	int result, retval = 0;
2458 
2459 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2460 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2461 	    PM_DEVICE(dip)))
2462 	power_req.request_type = PMR_PPM_UNMANAGE;
2463 	power_req.req.ppm_config_req.who = dip;
2464 	if (pm_ppm_claimed(dip))
2465 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2466 		    &power_req, &result);
2467 #ifdef DEBUG
2468 	else
2469 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2470 		    &power_req, &result);
2471 #endif
2472 	ASSERT(retval == DDI_SUCCESS);
2473 	pm_rem_info(dip);
2474 	return (retval);
2475 }
2476 
2477 int
2478 pm_raise_power(dev_info_t *dip, int comp, int level)
2479 {
2480 	if (level < 0)
2481 		return (DDI_FAILURE);
2482 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2483 	    !e_pm_valid_power(dip, comp, level))
2484 		return (DDI_FAILURE);
2485 
2486 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2487 }
2488 
2489 int
2490 pm_lower_power(dev_info_t *dip, int comp, int level)
2491 {
2492 	PMD_FUNC(pmf, "pm_lower_power")
2493 
2494 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2495 	    !e_pm_valid_power(dip, comp, level)) {
2496 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2497 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2498 		return (DDI_FAILURE);
2499 	}
2500 
2501 	if (!DEVI_IS_DETACHING(dip)) {
2502 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2503 		    pmf, PM_DEVICE(dip)))
2504 		return (DDI_FAILURE);
2505 	}
2506 
2507 	/*
2508 	 * If we don't care about saving power, or we're treating this node
2509 	 * specially, then this is a no-op
2510 	 */
2511 	if (!autopm_enabled || pm_noinvol(dip)) {
2512 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s\n", pmf, PM_DEVICE(dip),
2513 		    !autopm_enabled ? "!autopm_enabled " : "",
2514 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2515 		return (DDI_SUCCESS);
2516 	}
2517 
2518 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2519 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2520 		    PM_DEVICE(dip)))
2521 		return (DDI_FAILURE);
2522 	}
2523 	return (DDI_SUCCESS);
2524 }
2525 
2526 /*
2527  * Find the entries struct for a given dip in the blocked list, return it locked
2528  */
2529 static psce_t *
2530 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2531 {
2532 	pscc_t *p;
2533 	psce_t *psce;
2534 
2535 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2536 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2537 		if (p->pscc_dip == dip) {
2538 			*psccp = p;
2539 			psce = p->pscc_entries;
2540 			mutex_enter(&psce->psce_lock);
2541 			ASSERT(psce);
2542 			rw_exit(&pm_pscc_direct_rwlock);
2543 			return (psce);
2544 		}
2545 	}
2546 	rw_exit(&pm_pscc_direct_rwlock);
2547 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2548 	/*NOTREACHED*/
2549 }
2550 
2551 /*
2552  * Write an entry indicating a power level change (to be passed to a process
2553  * later) in the given psce.
2554  * If we were called in the path that brings up the console fb in the
2555  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2556  * we create a record that has a size of -1, a physaddr of NULL, and that
2557  * has the overflow flag set.
2558  */
2559 static int
2560 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2561     int old, int which, pm_canblock_t canblock)
2562 {
2563 	char	buf[MAXNAMELEN];
2564 	pm_state_change_t *p;
2565 	size_t	size;
2566 	caddr_t physpath = NULL;
2567 	int	overrun = 0;
2568 
2569 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2570 	(void) ddi_pathname(dip, buf);
2571 	size = strlen(buf) + 1;
2572 	p = psce->psce_in;
2573 	if (canblock == PM_CANBLOCK_BYPASS) {
2574 		physpath = kmem_alloc(size, KM_NOSLEEP);
2575 		if (physpath == NULL) {
2576 			/*
2577 			 * mark current entry as overrun
2578 			 */
2579 			p->flags |= PSC_EVENT_LOST;
2580 			size = (size_t)-1;
2581 		}
2582 	} else
2583 		physpath = kmem_alloc(size, KM_SLEEP);
2584 	if (p->size) {	/* overflow; mark the next entry */
2585 		if (p->size != (size_t)-1)
2586 			kmem_free(p->physpath, p->size);
2587 		ASSERT(psce->psce_out == p);
2588 		if (p == psce->psce_last) {
2589 			psce->psce_first->flags |= PSC_EVENT_LOST;
2590 			psce->psce_out = psce->psce_first;
2591 		} else {
2592 			(p + 1)->flags |= PSC_EVENT_LOST;
2593 			psce->psce_out = (p + 1);
2594 		}
2595 		overrun++;
2596 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2597 		p->flags |= PSC_EVENT_LOST;
2598 		p->size = 0;
2599 		p->physpath = NULL;
2600 	}
2601 	if (which == PSC_INTEREST) {
2602 		mutex_enter(&pm_compcnt_lock);
2603 		if (pm_comps_notlowest == 0)
2604 			p->flags |= PSC_ALL_LOWEST;
2605 		else
2606 			p->flags &= ~PSC_ALL_LOWEST;
2607 		mutex_exit(&pm_compcnt_lock);
2608 	}
2609 	p->event = event;
2610 	p->timestamp = gethrestime_sec();
2611 	p->component = comp;
2612 	p->old_level = old;
2613 	p->new_level = new;
2614 	p->physpath = physpath;
2615 	p->size = size;
2616 	if (physpath != NULL)
2617 		(void) strcpy(p->physpath, buf);
2618 	if (p == psce->psce_last)
2619 		psce->psce_in = psce->psce_first;
2620 	else
2621 		psce->psce_in = ++p;
2622 	mutex_exit(&psce->psce_lock);
2623 	return (overrun);
2624 }
2625 
2626 /*
2627  * Find the next entry on the interest list.  We keep a pointer to the item we
2628  * last returned in the user's cooke.  Returns a locked entries struct.
2629  */
2630 static psce_t *
2631 psc_interest(void **cookie, pscc_t **psccp)
2632 {
2633 	pscc_t *pscc;
2634 	pscc_t **cookiep = (pscc_t **)cookie;
2635 
2636 	if (*cookiep == NULL)
2637 		pscc = pm_pscc_interest;
2638 	else
2639 		pscc = (*cookiep)->pscc_next;
2640 	if (pscc) {
2641 		*cookiep = pscc;
2642 		*psccp = pscc;
2643 		mutex_enter(&pscc->pscc_entries->psce_lock);
2644 		return (pscc->pscc_entries);
2645 	} else {
2646 		return (NULL);
2647 	}
2648 }
2649 
2650 /*
2651  * Create an entry for a process to pick up indicating a power level change.
2652  */
2653 static void
2654 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2655     int newlevel, int oldlevel, pm_canblock_t canblock)
2656 {
2657 	PMD_FUNC(pmf, "enqueue_notify")
2658 	pscc_t	*pscc;
2659 	psce_t	*psce;
2660 	void		*cookie = NULL;
2661 	int	overrun;
2662 
2663 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2664 	switch (cmd) {
2665 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2666 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2667 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2668 		psce = pm_psc_dip_to_direct(dip, &pscc);
2669 		ASSERT(psce);
2670 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2671 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2672 		    pm_poll_cnt[pscc->pscc_clone]))
2673 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2674 		    PSC_DIRECT, canblock);
2675 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2676 		mutex_enter(&pm_clone_lock);
2677 		if (!overrun)
2678 			pm_poll_cnt[pscc->pscc_clone]++;
2679 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2680 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2681 		mutex_exit(&pm_clone_lock);
2682 		break;
2683 	case PSC_HAS_CHANGED:
2684 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2685 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2686 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2687 			psce = pm_psc_dip_to_direct(dip, &pscc);
2688 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2689 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2690 			    pm_poll_cnt[pscc->pscc_clone]))
2691 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2692 			    oldlevel, PSC_DIRECT, canblock);
2693 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2694 			mutex_enter(&pm_clone_lock);
2695 			if (!overrun)
2696 				pm_poll_cnt[pscc->pscc_clone]++;
2697 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2698 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2699 			mutex_exit(&pm_clone_lock);
2700 		}
2701 		mutex_enter(&pm_clone_lock);
2702 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2703 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2704 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2705 			    oldlevel, PSC_INTEREST, canblock);
2706 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2707 		}
2708 		rw_exit(&pm_pscc_interest_rwlock);
2709 		mutex_exit(&pm_clone_lock);
2710 		break;
2711 #ifdef DEBUG
2712 	default:
2713 		ASSERT(0);
2714 #endif
2715 	}
2716 }
2717 
2718 static void
2719 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2720 {
2721 	if (listp) {
2722 		pm_ppm_devlist_t *p, *next = NULL;
2723 
2724 		for (p = *listp; p; p = next) {
2725 			next = p->ppd_next;
2726 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2727 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2728 			    canblock);
2729 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2730 		}
2731 		*listp = NULL;
2732 	}
2733 }
2734 
2735 /*
2736  * Try to get the power locks of the parent node and target (child)
2737  * node.  Return true if successful (with both locks held) or false
2738  * (with no locks held).
2739  */
2740 static int
2741 pm_try_parent_child_locks(dev_info_t *pdip,
2742     dev_info_t *dip, int *pcircp, int *circp)
2743 {
2744 	if (ndi_devi_tryenter(pdip, pcircp))
2745 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2746 			return (1);
2747 		} else {
2748 			ndi_devi_exit(pdip, *pcircp);
2749 		}
2750 	return (0);
2751 }
2752 
2753 /*
2754  * Determine if the power lock owner is blocked by current thread.
2755  * returns :
2756  * 	1 - If the thread owning the effective power lock (the first lock on
2757  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2758  *          a mutex held by the current thread.
2759  *
2760  *	0 - otherwise
2761  *
2762  * Note : This function is called by pm_power_has_changed to determine whether
2763  * it is executing in parallel with pm_set_power.
2764  */
2765 static int
2766 pm_blocked_by_us(dev_info_t *dip)
2767 {
2768 	power_req_t power_req;
2769 	kthread_t *owner;
2770 	int result;
2771 	kmutex_t *mp;
2772 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2773 
2774 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2775 	power_req.req.ppm_power_lock_owner_req.who = dip;
2776 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2777 	    DDI_SUCCESS) {
2778 		/*
2779 		 * It is assumed that if the device is claimed by ppm, ppm
2780 		 * will always implement this request type and it'll always
2781 		 * return success. We panic here, if it fails.
2782 		 */
2783 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2784 		    PM_DEVICE(dip));
2785 		/*NOTREACHED*/
2786 	}
2787 
2788 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2789 	    owner->t_state == TS_SLEEP &&
2790 	    owner->t_sobj_ops &&
2791 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2792 	    (mp = (kmutex_t *)owner->t_wchan) &&
2793 	    mutex_owner(mp) == curthread)
2794 		return (1);
2795 
2796 	return (0);
2797 }
2798 
2799 /*
2800  * Notify parent which wants to hear about a child's power changes.
2801  */
2802 static void
2803 pm_notify_parent(dev_info_t *dip,
2804     dev_info_t *pdip, int comp, int old_level, int level)
2805 {
2806 	pm_bp_has_changed_t bphc;
2807 	pm_sp_misc_t pspm;
2808 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2809 	int result = DDI_SUCCESS;
2810 
2811 	bphc.bphc_dip = dip;
2812 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2813 	bphc.bphc_comp = comp;
2814 	bphc.bphc_olevel = old_level;
2815 	bphc.bphc_nlevel = level;
2816 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2817 	pspm.pspm_scan = 0;
2818 	bphc.bphc_private = &pspm;
2819 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2820 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2821 	kmem_free(pathbuf, MAXPATHLEN);
2822 }
2823 
2824 /*
2825  * Check if we need to resume a BC device, and make the attach call as required.
2826  */
2827 static int
2828 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2829 {
2830 	int ret = DDI_SUCCESS;
2831 
2832 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2833 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2834 		/* ppm is not interested in DDI_PM_RESUME */
2835 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2836 			/* XXX Should we mark it resumed, */
2837 			/* even though it failed? */
2838 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2839 			    PM_NAME(dip), PM_ADDR(dip));
2840 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2841 	}
2842 
2843 	return (ret);
2844 }
2845 
2846 /*
2847  * Tests outside the lock to see if we should bother to enqueue an entry
2848  * for any watching process.  If yes, then caller will take the lock and
2849  * do the full protocol
2850  */
2851 static int
2852 pm_watchers()
2853 {
2854 	if (pm_processes_stopped)
2855 		return (0);
2856 	return (pm_pscc_direct || pm_pscc_interest);
2857 }
2858 
2859 /*
2860  * A driver is reporting that the power of one of its device's components
2861  * has changed.  Update the power state accordingly.
2862  */
2863 int
2864 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2865 {
2866 	PMD_FUNC(pmf, "pm_power_has_changed")
2867 	int ret;
2868 	dev_info_t *pdip = ddi_get_parent(dip);
2869 	struct pm_component *cp;
2870 	int blocked, circ, pcirc, old_level;
2871 	static int pm_phc_impl(dev_info_t *, int, int, int);
2872 
2873 	if (level < 0) {
2874 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2875 		    PM_DEVICE(dip), level))
2876 		return (DDI_FAILURE);
2877 	}
2878 
2879 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2880 	    PM_DEVICE(dip), comp, level))
2881 
2882 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2883 	    !e_pm_valid_power(dip, comp, level))
2884 		return (DDI_FAILURE);
2885 
2886 	/*
2887 	 * A driver thread calling pm_power_has_changed and another thread
2888 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2889 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2890 	 * this specific deadlock.  If we can't get the lock immediately
2891 	 * and we are deadlocked, just update the component's level, do
2892 	 * notifications, and return.  We intend to update the total power
2893 	 * state later (if the other thread fails to set power to the
2894 	 * desired level).  If we were called because of a power change on a
2895 	 * component that isn't involved in a set_power op, update all state
2896 	 * immediately.
2897 	 */
2898 	cp = PM_CP(dip, comp);
2899 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2900 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2901 		    (cp->pmc_flags & PM_POWER_OP)) {
2902 			if (pm_watchers()) {
2903 				mutex_enter(&pm_rsvp_lock);
2904 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2905 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2906 				mutex_exit(&pm_rsvp_lock);
2907 			}
2908 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2909 				pm_notify_parent(dip,
2910 				    pdip, comp, cur_power(cp), level);
2911 			(void) pm_check_and_resume(dip,
2912 			    comp, cur_power(cp), level);
2913 
2914 			/*
2915 			 * Stash the old power index, update curpwr, and flag
2916 			 * that the total power state needs to be synched.
2917 			 */
2918 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2919 			/*
2920 			 * Several pm_power_has_changed calls could arrive
2921 			 * while the set power path remains blocked.  Keep the
2922 			 * oldest old power and the newest new power of any
2923 			 * sequence of phc calls which arrive during deadlock.
2924 			 */
2925 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2926 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2927 			cp->pmc_cur_pwr =
2928 			    pm_level_to_index(dip, cp, level);
2929 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2930 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2931 			return (DDI_SUCCESS);
2932 		} else
2933 			if (blocked) {	/* blocked, but different cmpt? */
2934 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2935 					cmn_err(CE_NOTE,
2936 					    "!pm: parent kuc not updated due "
2937 					    "to possible deadlock.\n");
2938 					return (pm_phc_impl(dip,
2939 						    comp, level, 1));
2940 				}
2941 				old_level = cur_power(cp);
2942 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2943 				    (!PM_ISBC(dip) || comp == 0) &&
2944 				    POWERING_ON(old_level, level))
2945 					pm_hold_power(pdip);
2946 				ret = pm_phc_impl(dip, comp, level, 1);
2947 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2948 					if ((!PM_ISBC(dip) ||
2949 					    comp == 0) && level == 0 &&
2950 					    old_level != PM_LEVEL_UNKNOWN)
2951 						pm_rele_power(pdip);
2952 				}
2953 				ndi_devi_exit(pdip, pcirc);
2954 				/* child lock not held: deadlock */
2955 				return (ret);
2956 			}
2957 		delay(1);
2958 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
2959 	}
2960 
2961 	/* non-deadlock case */
2962 	old_level = cur_power(cp);
2963 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2964 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
2965 		pm_hold_power(pdip);
2966 	ret = pm_phc_impl(dip, comp, level, 1);
2967 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2968 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
2969 		    old_level != PM_LEVEL_UNKNOWN)
2970 			pm_rele_power(pdip);
2971 	}
2972 	PM_UNLOCK_POWER(dip, circ);
2973 	ndi_devi_exit(pdip, pcirc);
2974 	return (ret);
2975 }
2976 
2977 /*
2978  * Account for power changes to a component of the the console frame buffer.
2979  * If lowering power from full (or "unkown", which is treatd as full)
2980  * we will increment the "components off" count of the fb device.
2981  * Subsequent lowering of the same component doesn't affect the count.  If
2982  * raising a component back to full power, we will decrement the count.
2983  *
2984  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
2985  */
2986 static int
2987 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
2988 {
2989 	struct pm_component *cp = PM_CP(dip, cmpt);
2990 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
2991 	int want_normal = (new == cp->pmc_norm_pwr);
2992 	int incr = 0;
2993 
2994 	if (on && !want_normal)
2995 		incr = 1;
2996 	else if (!on && want_normal)
2997 		incr = -1;
2998 	return (incr);
2999 }
3000 
3001 /*
3002  * Adjust the count of console frame buffer components < full power.
3003  */
3004 static void
3005 update_comps_off(int incr, dev_info_t *dip)
3006 {
3007 		mutex_enter(&pm_cfb_lock);
3008 		pm_cfb_comps_off += incr;
3009 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3010 		mutex_exit(&pm_cfb_lock);
3011 }
3012 
3013 /*
3014  * Update the power state in the framework (via the ppm).  The 'notify'
3015  * argument tells whether to notify watchers.  Power lock is already held.
3016  */
3017 static int
3018 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3019 {
3020 	PMD_FUNC(pmf, "phc_impl")
3021 	power_req_t power_req;
3022 	int i, dodeps = 0;
3023 	dev_info_t *pdip = ddi_get_parent(dip);
3024 	int result;
3025 	int old_level;
3026 	struct pm_component *cp;
3027 	int incr = 0;
3028 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3029 	int work_type = 0;
3030 	char *pathbuf;
3031 
3032 	/* Must use "official" power level for this test. */
3033 	cp = PM_CP(dip, comp);
3034 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3035 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3036 	if (old_level != PM_LEVEL_UNKNOWN)
3037 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3038 
3039 	if (level == old_level) {
3040 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3041 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3042 		return (DDI_SUCCESS);
3043 	}
3044 
3045 	/*
3046 	 * Tell ppm about this.
3047 	 */
3048 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3049 	power_req.req.ppm_notify_level_req.who = dip;
3050 	power_req.req.ppm_notify_level_req.cmpt = comp;
3051 	power_req.req.ppm_notify_level_req.new_level = level;
3052 	power_req.req.ppm_notify_level_req.old_level = old_level;
3053 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3054 	    &result) == DDI_FAILURE) {
3055 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3056 		    pmf, PM_DEVICE(dip), level))
3057 		return (DDI_FAILURE);
3058 	}
3059 
3060 	if (PM_IS_CFB(dip)) {
3061 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3062 
3063 		if (incr) {
3064 			update_comps_off(incr, dip);
3065 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3066 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3067 			    comp, old_level, level, pm_cfb_comps_off))
3068 		}
3069 	}
3070 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3071 	result = DDI_SUCCESS;
3072 
3073 	if (notify) {
3074 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3075 			pm_notify_parent(dip, pdip, comp, old_level, level);
3076 		(void) pm_check_and_resume(dip, comp, old_level, level);
3077 	}
3078 
3079 	/*
3080 	 * Decrement the dependency kidsup count if we turn a device
3081 	 * off.
3082 	 */
3083 	if (POWERING_OFF(old_level, level)) {
3084 		dodeps = 1;
3085 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3086 			cp = PM_CP(dip, i);
3087 			if (cur_power(cp)) {
3088 				dodeps = 0;
3089 				break;
3090 			}
3091 		}
3092 		if (dodeps)
3093 			work_type = PM_DEP_WK_POWER_OFF;
3094 	}
3095 
3096 	/*
3097 	 * Increment if we turn it on. Check to see
3098 	 * if other comps are already on, if so,
3099 	 * dont increment.
3100 	 */
3101 	if (POWERING_ON(old_level, level)) {
3102 		dodeps = 1;
3103 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3104 			cp = PM_CP(dip, i);
3105 			if (comp == i)
3106 				continue;
3107 			/* -1 also treated as 0 in this case */
3108 			if (cur_power(cp) > 0) {
3109 				dodeps = 0;
3110 				break;
3111 			}
3112 		}
3113 		if (dodeps)
3114 			work_type = PM_DEP_WK_POWER_ON;
3115 	}
3116 
3117 	if (dodeps) {
3118 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3119 		(void) ddi_pathname(dip, pathbuf);
3120 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3121 		    PM_DEP_NOWAIT, NULL, 0);
3122 		kmem_free(pathbuf, MAXPATHLEN);
3123 	}
3124 
3125 	if (notify && (level != old_level) && pm_watchers()) {
3126 		mutex_enter(&pm_rsvp_lock);
3127 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3128 		    PM_CANBLOCK_BLOCK);
3129 		mutex_exit(&pm_rsvp_lock);
3130 	}
3131 
3132 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3133 	pm_rescan(dip);
3134 	return (DDI_SUCCESS);
3135 }
3136 
3137 /*
3138  * This function is called at startup time to notify pm of the existence
3139  * of any platform power managers for this platform.  As a result of
3140  * this registration, each function provided will be called each time
3141  * a device node is attached, until one returns true, and it must claim the
3142  * device node (by returning non-zero) if it wants to be involved in the
3143  * node's power management.  If it does claim the node, then it will
3144  * subsequently be notified of attach and detach events.
3145  *
3146  */
3147 
3148 int
3149 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3150 {
3151 	PMD_FUNC(pmf, "register_ppm")
3152 	struct ppm_callbacks *ppmcp;
3153 	pm_component_t *cp;
3154 	int i, pwr, result, circ;
3155 	power_req_t power_req;
3156 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3157 	void pm_ppm_claim(dev_info_t *);
3158 
3159 	mutex_enter(&ppm_lock);
3160 	ppmcp = ppm_callbacks;
3161 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3162 		if (ppmcp->ppmc_func == NULL) {
3163 			ppmcp->ppmc_func = func;
3164 			ppmcp->ppmc_dip = dip;
3165 			break;
3166 		}
3167 	}
3168 	mutex_exit(&ppm_lock);
3169 
3170 	if (i >= MAX_PPM_HANDLERS)
3171 		return (DDI_FAILURE);
3172 	while ((dip = ddi_get_parent(dip)) != NULL) {
3173 		if (PM_GET_PM_INFO(dip) == NULL)
3174 			continue;
3175 		pm_ppm_claim(dip);
3176 		if (pm_ppm_claimed(dip)) {
3177 			/*
3178 			 * Tell ppm about this.
3179 			 */
3180 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3181 			p->old_level = PM_LEVEL_UNKNOWN;
3182 			p->who = dip;
3183 			PM_LOCK_POWER(dip, &circ);
3184 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3185 				cp = PM_CP(dip, i);
3186 				pwr = cp->pmc_cur_pwr;
3187 				if (pwr != PM_LEVEL_UNKNOWN) {
3188 					p->cmpt = i;
3189 					p->new_level = cur_power(cp);
3190 					p->old_level = PM_LEVEL_UNKNOWN;
3191 					if (pm_ctlops(PPM(dip), dip,
3192 					    DDI_CTLOPS_POWER, &power_req,
3193 					    &result) == DDI_FAILURE) {
3194 						PMD(PMD_FAIL, ("%s: pc "
3195 						    "%s@%s(%s#%d) to %d "
3196 						    "fails\n", pmf,
3197 						    PM_DEVICE(dip), pwr))
3198 					}
3199 				}
3200 			}
3201 			PM_UNLOCK_POWER(dip, circ);
3202 		}
3203 	}
3204 	return (DDI_SUCCESS);
3205 }
3206 
3207 /*
3208  * Call the ppm's that have registered and adjust the devinfo struct as
3209  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3210  * by each ppm are assumed to be disjoint.
3211  */
3212 void
3213 pm_ppm_claim(dev_info_t *dip)
3214 {
3215 	struct ppm_callbacks *ppmcp;
3216 
3217 	if (PPM(dip)) {
3218 		return;
3219 	}
3220 	mutex_enter(&ppm_lock);
3221 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3222 		if ((*ppmcp->ppmc_func)(dip)) {
3223 			DEVI(dip)->devi_pm_ppm =
3224 			    (struct dev_info *)ppmcp->ppmc_dip;
3225 			mutex_exit(&ppm_lock);
3226 			return;
3227 		}
3228 	}
3229 	mutex_exit(&ppm_lock);
3230 }
3231 
3232 /*
3233  * Node is being detached so stop autopm until we see if it succeeds, in which
3234  * case pm_stop will be called.  For backwards compatible devices we bring the
3235  * device up to full power on the assumption the detach will succeed.
3236  */
3237 void
3238 pm_detaching(dev_info_t *dip)
3239 {
3240 	PMD_FUNC(pmf, "detaching")
3241 	pm_info_t *info = PM_GET_PM_INFO(dip);
3242 	int iscons;
3243 
3244 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3245 	    PM_NUMCMPTS(dip)))
3246 	if (info == NULL)
3247 		return;
3248 	ASSERT(DEVI_IS_DETACHING(dip));
3249 	PM_LOCK_DIP(dip);
3250 	info->pmi_dev_pm_state |= PM_DETACHING;
3251 	PM_UNLOCK_DIP(dip);
3252 	if (!PM_ISBC(dip))
3253 		pm_scan_stop(dip);
3254 
3255 	/*
3256 	 * console and old-style devices get brought up when detaching.
3257 	 */
3258 	iscons = PM_IS_CFB(dip);
3259 	if (iscons || PM_ISBC(dip)) {
3260 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3261 		if (iscons) {
3262 			mutex_enter(&pm_cfb_lock);
3263 			while (cfb_inuse) {
3264 				mutex_exit(&pm_cfb_lock);
3265 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3266 				delay(1);
3267 				mutex_enter(&pm_cfb_lock);
3268 			}
3269 			ASSERT(cfb_dip_detaching == NULL);
3270 			ASSERT(cfb_dip);
3271 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3272 			cfb_dip = NULL;
3273 			mutex_exit(&pm_cfb_lock);
3274 		}
3275 	}
3276 }
3277 
3278 /*
3279  * Node failed to detach.  If it used to be autopm'd, make it so again.
3280  */
3281 void
3282 pm_detach_failed(dev_info_t *dip)
3283 {
3284 	PMD_FUNC(pmf, "detach_failed")
3285 	pm_info_t *info = PM_GET_PM_INFO(dip);
3286 	int pm_all_at_normal(dev_info_t *);
3287 
3288 	if (info == NULL)
3289 		return;
3290 	ASSERT(DEVI_IS_DETACHING(dip));
3291 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3292 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3293 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3294 			/* Make sure the operation is still needed */
3295 			if (!pm_all_at_normal(dip)) {
3296 				if (pm_all_to_normal(dip,
3297 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3298 					PMD(PMD_ERROR, ("%s: could not bring "
3299 					    "%s@%s(%s#%d) to normal\n", pmf,
3300 					    PM_DEVICE(dip)))
3301 				}
3302 			}
3303 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3304 		}
3305 	}
3306 	if (!PM_ISBC(dip)) {
3307 		mutex_enter(&pm_scan_lock);
3308 		if (autopm_enabled)
3309 			pm_scan_init(dip);
3310 		mutex_exit(&pm_scan_lock);
3311 		pm_rescan(dip);
3312 	}
3313 }
3314 
3315 /* generic Backwards Compatible component */
3316 static char *bc_names[] = {"off", "on"};
3317 
3318 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3319 
3320 static void
3321 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3322 {
3323 	pm_comp_t *pmc;
3324 	pmc = &cp->pmc_comp;
3325 	pmc->pmc_numlevels = 2;
3326 	pmc->pmc_lvals[0] = 0;
3327 	pmc->pmc_lvals[1] = norm;
3328 	e_pm_set_cur_pwr(dip, cp, norm);
3329 }
3330 
3331 static void
3332 e_pm_default_components(dev_info_t *dip, int cmpts)
3333 {
3334 	int i;
3335 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3336 
3337 	p = DEVI(dip)->devi_pm_components;
3338 	for (i = 0; i < cmpts; i++, p++) {
3339 		p->pmc_comp = bc_comp;	/* struct assignment */
3340 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3341 		    KM_SLEEP);
3342 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3343 		    KM_SLEEP);
3344 		p->pmc_comp.pmc_numlevels = 2;
3345 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3346 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3347 	}
3348 }
3349 
3350 /*
3351  * Called from functions that require components to exist already to allow
3352  * for their creation by parsing the pm-components property.
3353  * Device will not be power managed as a result of this call
3354  * No locking needed because we're single threaded by the ndi_devi_enter
3355  * done while attaching, and the device isn't visible until after it has
3356  * attached
3357  */
3358 int
3359 pm_premanage(dev_info_t *dip, int style)
3360 {
3361 	PMD_FUNC(pmf, "premanage")
3362 	pm_comp_t	*pcp, *compp;
3363 	int		cmpts, i, norm, error;
3364 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3365 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3366 
3367 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3368 	/*
3369 	 * If this dip has already been processed, don't mess with it
3370 	 */
3371 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3372 		return (DDI_SUCCESS);
3373 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3374 		return (DDI_FAILURE);
3375 	}
3376 	/*
3377 	 * Look up pm-components property and create components accordingly
3378 	 * If that fails, fall back to backwards compatibility
3379 	 */
3380 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3381 		/*
3382 		 * If error is set, the property existed but was not well formed
3383 		 */
3384 		if (error || (style == PM_STYLE_NEW)) {
3385 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3386 			return (DDI_FAILURE);
3387 		}
3388 		/*
3389 		 * If they don't have the pm-components property, then we
3390 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3391 		 * behavior driver must have called pm_create_components, and
3392 		 * we need to flesh out dummy components
3393 		 */
3394 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3395 			/*
3396 			 * Not really failure, but we don't want the
3397 			 * caller to treat it as success
3398 			 */
3399 			return (DDI_FAILURE);
3400 		}
3401 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3402 		e_pm_default_components(dip, cmpts);
3403 		for (i = 0; i < cmpts; i++) {
3404 			/*
3405 			 * if normal power not set yet, we don't really know
3406 			 * what *ANY* of the power values are.  If normal
3407 			 * power is set, then we assume for this backwards
3408 			 * compatible case that the values are 0, normal power.
3409 			 */
3410 			norm = pm_get_normal_power(dip, i);
3411 			if (norm == (uint_t)-1) {
3412 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3413 				    PM_DEVICE(dip), i))
3414 				return (DDI_FAILURE);
3415 			}
3416 			/*
3417 			 * Components of BC devices start at their normal power,
3418 			 * so count them to be not at their lowest power.
3419 			 */
3420 			PM_INCR_NOTLOWEST(dip);
3421 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3422 		}
3423 	} else {
3424 		/*
3425 		 * e_pm_create_components was called from pm_autoconfig(), it
3426 		 * creates components with no descriptions (or known levels)
3427 		 */
3428 		cmpts = PM_NUMCMPTS(dip);
3429 		ASSERT(cmpts != 0);
3430 		pcp = compp;
3431 		p = DEVI(dip)->devi_pm_components;
3432 		for (i = 0; i < cmpts; i++, p++) {
3433 			p->pmc_comp = *pcp++;   /* struct assignment */
3434 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3435 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3436 		}
3437 		pm_set_device_threshold(dip, pm_system_idle_threshold,
3438 		    PMC_DEF_THRESH);
3439 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3440 	}
3441 	return (DDI_SUCCESS);
3442 }
3443 
3444 /*
3445  * Called from during or after the device's attach to let us know it is ready
3446  * to play autopm.   Look up the pm model and manage the device accordingly.
3447  * Returns system call errno value.
3448  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3449  * a little cleaner
3450  *
3451  * Called with dip lock held, return with dip lock unheld.
3452  */
3453 
3454 int
3455 e_pm_manage(dev_info_t *dip, int style)
3456 {
3457 	PMD_FUNC(pmf, "e_manage")
3458 	pm_info_t	*info;
3459 	dev_info_t	*pdip = ddi_get_parent(dip);
3460 	int	pm_thresh_specd(dev_info_t *);
3461 	int	count;
3462 	char	*pathbuf;
3463 
3464 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3465 		return (DDI_FAILURE);
3466 	}
3467 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3468 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3469 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3470 
3471 	/*
3472 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3473 	 * out at their normal power, so they are "up", others start out
3474 	 * unknown, which is effectively "up".  Parent which want notification
3475 	 * get kidsupcnt of 0 always.
3476 	 */
3477 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3478 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3479 		e_pm_hold_rele_power(pdip, count);
3480 
3481 	pm_set_pm_info(dip, info);
3482 	/*
3483 	 * Apply any recorded thresholds
3484 	 */
3485 	(void) pm_thresh_specd(dip);
3486 
3487 	/*
3488 	 * Do dependency processing.
3489 	 */
3490 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3491 	(void) ddi_pathname(dip, pathbuf);
3492 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3493 	    PM_DEP_NOWAIT, NULL, 0);
3494 	kmem_free(pathbuf, MAXPATHLEN);
3495 
3496 	if (!PM_ISBC(dip)) {
3497 		mutex_enter(&pm_scan_lock);
3498 		if (autopm_enabled) {
3499 			pm_scan_init(dip);
3500 			mutex_exit(&pm_scan_lock);
3501 			pm_rescan(dip);
3502 		} else {
3503 			mutex_exit(&pm_scan_lock);
3504 		}
3505 	}
3506 	return (0);
3507 }
3508 
3509 /*
3510  * This is the obsolete exported interface for a driver to find out its
3511  * "normal" (max) power.
3512  * We only get components destroyed while no power management is
3513  * going on (and the device is detached), so we don't need a mutex here
3514  */
3515 int
3516 pm_get_normal_power(dev_info_t *dip, int comp)
3517 {
3518 
3519 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3520 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3521 	}
3522 	return (DDI_FAILURE);
3523 }
3524 
3525 /*
3526  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3527  */
3528 int
3529 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3530 {
3531 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3532 		*levelp = PM_CURPOWER(dip, comp);
3533 		return (DDI_SUCCESS);
3534 	}
3535 	return (DDI_FAILURE);
3536 }
3537 
3538 /*
3539  * Returns current threshold of indicated component
3540  */
3541 static int
3542 cur_threshold(dev_info_t *dip, int comp)
3543 {
3544 	pm_component_t *cp = PM_CP(dip, comp);
3545 	int pwr;
3546 
3547 	if (PM_ISBC(dip)) {
3548 		/*
3549 		 * backwards compatible nodes only have one threshold
3550 		 */
3551 		return (cp->pmc_comp.pmc_thresh[1]);
3552 	}
3553 	pwr = cp->pmc_cur_pwr;
3554 	if (pwr == PM_LEVEL_UNKNOWN) {
3555 		int thresh;
3556 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3557 			thresh = pm_default_nexus_threshold;
3558 		else
3559 			thresh = pm_system_idle_threshold;
3560 		return (thresh);
3561 	}
3562 	ASSERT(cp->pmc_comp.pmc_thresh);
3563 	return (cp->pmc_comp.pmc_thresh[pwr]);
3564 }
3565 
3566 /*
3567  * Compute next lower component power level given power index.
3568  */
3569 static int
3570 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3571 {
3572 	int nxt_pwr;
3573 
3574 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3575 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3576 	} else {
3577 		pwrndx--;
3578 		ASSERT(pwrndx >= 0);
3579 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3580 	}
3581 	return (nxt_pwr);
3582 }
3583 
3584 /*
3585  * Bring all components of device to normal power
3586  */
3587 int
3588 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3589 {
3590 	PMD_FUNC(pmf, "all_to_normal")
3591 	int		*normal;
3592 	int		i, ncomps, result;
3593 	size_t		size;
3594 	int		changefailed = 0;
3595 
3596 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3597 	ASSERT(PM_GET_PM_INFO(dip));
3598 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3599 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3600 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3601 		return (DDI_FAILURE);
3602 	}
3603 	ncomps = PM_NUMCMPTS(dip);
3604 	for (i = 0; i < ncomps; i++) {
3605 		if (pm_set_power(dip, i, normal[i],
3606 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3607 			changefailed++;
3608 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3609 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3610 			    PM_DEVICE(dip), i, normal[i], result))
3611 		}
3612 	}
3613 	kmem_free(normal, size);
3614 	if (changefailed) {
3615 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3616 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3617 		return (DDI_FAILURE);
3618 	}
3619 	return (DDI_SUCCESS);
3620 }
3621 
3622 /*
3623  * Returns true if all components of device are at normal power
3624  */
3625 int
3626 pm_all_at_normal(dev_info_t *dip)
3627 {
3628 	PMD_FUNC(pmf, "all_at_normal")
3629 	int		*normal;
3630 	int		i;
3631 	size_t		size;
3632 
3633 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3634 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3635 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3636 		return (DDI_FAILURE);
3637 	}
3638 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3639 		int current = PM_CURPOWER(dip, i);
3640 		if (normal[i] > current) {
3641 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3642 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3643 			    normal[i], current))
3644 			break;
3645 		}
3646 	}
3647 	kmem_free(normal, size);
3648 	if (i != PM_NUMCMPTS(dip)) {
3649 		return (0);
3650 	}
3651 	return (1);
3652 }
3653 
3654 static void
3655 bring_wekeeps_up(char *keeper)
3656 {
3657 	PMD_FUNC(pmf, "bring_wekeeps_up")
3658 	int i;
3659 	pm_pdr_t *dp;
3660 	pm_info_t *wku_info;
3661 	char *kept_path;
3662 	dev_info_t *kept;
3663 	static void bring_pmdep_up(dev_info_t *, int);
3664 
3665 	if (panicstr) {
3666 		return;
3667 	}
3668 	/*
3669 	 * We process the request even if the keeper detaches because
3670 	 * detach processing expects this to increment kidsupcnt of kept.
3671 	 */
3672 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3673 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3674 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3675 			continue;
3676 		for (i = 0; i < dp->pdr_kept_count; i++) {
3677 			kept_path = dp->pdr_kept_paths[i];
3678 			if (kept_path == NULL)
3679 				continue;
3680 			ASSERT(kept_path[0] != '\0');
3681 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3682 				continue;
3683 			wku_info = PM_GET_PM_INFO(kept);
3684 			if (wku_info == NULL) {
3685 				if (kept)
3686 					ddi_release_devi(kept);
3687 				continue;
3688 			}
3689 			/*
3690 			 * Don't mess with it if it is being detached, it isn't
3691 			 * safe to call its power entry point
3692 			 */
3693 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3694 				if (kept)
3695 					ddi_release_devi(kept);
3696 				continue;
3697 			}
3698 			bring_pmdep_up(kept, 1);
3699 			ddi_release_devi(kept);
3700 		}
3701 	}
3702 }
3703 
3704 /*
3705  * Bring up the 'kept' device passed as argument
3706  */
3707 static void
3708 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3709 {
3710 	PMD_FUNC(pmf, "bring_pmdep_up")
3711 	int is_all_at_normal = 0;
3712 
3713 	/*
3714 	 * If the kept device has been unmanaged, do nothing.
3715 	 */
3716 	if (!PM_GET_PM_INFO(kept_dip))
3717 		return;
3718 
3719 	/* Just ignore DIRECT PM device till they are released. */
3720 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3721 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3722 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3723 		    "controlling process did something else\n", pmf,
3724 		    PM_DEVICE(kept_dip)))
3725 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3726 		return;
3727 	}
3728 	/* if we got here the keeper had a transition from OFF->ON */
3729 	if (hold)
3730 		pm_hold_power(kept_dip);
3731 
3732 	if (!is_all_at_normal)
3733 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3734 }
3735 
3736 /*
3737  * A bunch of stuff that belongs only to the next routine (or two)
3738  */
3739 
3740 static const char namestr[] = "NAME=";
3741 static const int nameln = sizeof (namestr) - 1;
3742 static const char pmcompstr[] = "pm-components";
3743 
3744 struct pm_comp_pkg {
3745 	pm_comp_t		*comp;
3746 	struct pm_comp_pkg	*next;
3747 };
3748 
3749 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3750 
3751 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3752 			((ch) >= 'A' && (ch) <= 'F'))
3753 
3754 /*
3755  * Rather than duplicate this code ...
3756  * (this code excerpted from the function that follows it)
3757  */
3758 #define	FINISH_COMP { \
3759 	ASSERT(compp); \
3760 	compp->pmc_lnames_sz = size; \
3761 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3762 	compp->pmc_numlevels = level; \
3763 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3764 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3765 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3766 	/* copy string out of prop array into buffer */ \
3767 	for (j = 0; j < level; j++) { \
3768 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3769 		compp->pmc_lvals[j] = lvals[j]; \
3770 		(void) strcpy(tp, lnames[j]); \
3771 		compp->pmc_lnames[j] = tp; \
3772 		tp += lszs[j]; \
3773 	} \
3774 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3775 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3776 	}
3777 
3778 /*
3779  * Create (empty) component data structures.
3780  */
3781 static void
3782 e_pm_create_components(dev_info_t *dip, int num_components)
3783 {
3784 	struct pm_component *compp, *ocompp;
3785 	int i, size = 0;
3786 
3787 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3788 	ASSERT(!DEVI(dip)->devi_pm_components);
3789 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3790 	size = sizeof (struct pm_component) * num_components;
3791 
3792 	compp = kmem_zalloc(size, KM_SLEEP);
3793 	ocompp = compp;
3794 	DEVI(dip)->devi_pm_comp_size = size;
3795 	DEVI(dip)->devi_pm_num_components = num_components;
3796 	PM_LOCK_BUSY(dip);
3797 	for (i = 0; i < num_components;  i++) {
3798 		compp->pmc_timestamp = gethrestime_sec();
3799 		compp->pmc_norm_pwr = (uint_t)-1;
3800 		compp++;
3801 	}
3802 	PM_UNLOCK_BUSY(dip);
3803 	DEVI(dip)->devi_pm_components = ocompp;
3804 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3805 }
3806 
3807 /*
3808  * Parse hex or decimal value from char string
3809  */
3810 static char *
3811 pm_parsenum(char *cp, int *valp)
3812 {
3813 	int ch, offset;
3814 	char numbuf[256];
3815 	char *np = numbuf;
3816 	int value = 0;
3817 
3818 	ch = *cp++;
3819 	if (isdigit(ch)) {
3820 		if (ch == '0') {
3821 			if ((ch = *cp++) == 'x' || ch == 'X') {
3822 				ch = *cp++;
3823 				while (isxdigit(ch)) {
3824 					*np++ = (char)ch;
3825 					ch = *cp++;
3826 				}
3827 				*np = 0;
3828 				cp--;
3829 				goto hexval;
3830 			} else {
3831 				goto digit;
3832 			}
3833 		} else {
3834 digit:
3835 			while (isdigit(ch)) {
3836 				*np++ = (char)ch;
3837 				ch = *cp++;
3838 			}
3839 			*np = 0;
3840 			cp--;
3841 			goto decval;
3842 		}
3843 	} else
3844 		return (NULL);
3845 
3846 hexval:
3847 	for (np = numbuf; *np; np++) {
3848 		if (*np >= 'a' && *np <= 'f')
3849 			offset = 'a' - 10;
3850 		else if (*np >= 'A' && *np <= 'F')
3851 			offset = 'A' - 10;
3852 		else if (*np >= '0' && *np <= '9')
3853 			offset = '0';
3854 		value *= 16;
3855 		value += *np - offset;
3856 	}
3857 	*valp = value;
3858 	return (cp);
3859 
3860 decval:
3861 	offset = '0';
3862 	for (np = numbuf; *np; np++) {
3863 		value *= 10;
3864 		value += *np - offset;
3865 	}
3866 	*valp = value;
3867 	return (cp);
3868 }
3869 
3870 /*
3871  * Set max (previously documented as "normal") power.
3872  */
3873 static void
3874 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3875 {
3876 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3877 }
3878 
3879 /*
3880  * Internal routine for destroying components
3881  * It is called even when there might not be any, so it must be forgiving.
3882  */
3883 static void
3884 e_pm_destroy_components(dev_info_t *dip)
3885 {
3886 	int i;
3887 	struct pm_component *cp;
3888 
3889 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3890 	if (PM_NUMCMPTS(dip) == 0)
3891 		return;
3892 	cp = DEVI(dip)->devi_pm_components;
3893 	ASSERT(cp);
3894 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
3895 		int nlevels = cp->pmc_comp.pmc_numlevels;
3896 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
3897 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
3898 		/*
3899 		 * For BC nodes, the rest is static in bc_comp, so skip it
3900 		 */
3901 		if (PM_ISBC(dip))
3902 			continue;
3903 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
3904 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
3905 		kmem_free(cp->pmc_comp.pmc_lname_buf,
3906 				cp->pmc_comp.pmc_lnames_sz);
3907 	}
3908 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
3909 	DEVI(dip)->devi_pm_components = NULL;
3910 	DEVI(dip)->devi_pm_num_components = 0;
3911 	DEVI(dip)->devi_pm_flags &=
3912 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
3913 }
3914 
3915 /*
3916  * Read the pm-components property (if there is one) and use it to set up
3917  * components.  Returns a pointer to an array of component structures if
3918  * pm-components found and successfully parsed, else returns NULL.
3919  * Sets error return *errp to true to indicate a failure (as opposed to no
3920  * property being present).
3921  */
3922 pm_comp_t *
3923 pm_autoconfig(dev_info_t *dip, int *errp)
3924 {
3925 	PMD_FUNC(pmf, "autoconfig")
3926 	uint_t nelems;
3927 	char **pp;
3928 	pm_comp_t *compp = NULL;
3929 	int i, j, level, components = 0;
3930 	size_t size = 0;
3931 	struct pm_comp_pkg *p, *ptail;
3932 	struct pm_comp_pkg *phead = NULL;
3933 	int *lvals = NULL;
3934 	int *lszs = NULL;
3935 	int *np = NULL;
3936 	int npi = 0;
3937 	char **lnames = NULL;
3938 	char *cp, *tp;
3939 	pm_comp_t *ret = NULL;
3940 
3941 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3942 	*errp = 0;	/* assume success */
3943 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3944 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
3945 		return (NULL);
3946 	}
3947 
3948 	if (nelems < 3) {	/* need at least one name and two levels */
3949 		goto errout;
3950 	}
3951 
3952 	/*
3953 	 * pm_create_components is no longer allowed
3954 	 */
3955 	if (PM_NUMCMPTS(dip) != 0) {
3956 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
3957 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
3958 		goto errout;
3959 	}
3960 
3961 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3962 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3963 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
3964 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3965 
3966 	level = 0;
3967 	phead = NULL;
3968 	for (i = 0; i < nelems; i++) {
3969 		cp = pp[i];
3970 		if (!isdigit(*cp)) {	/*  must be name */
3971 			if (strncmp(cp, namestr, nameln) != 0) {
3972 				goto errout;
3973 			}
3974 			if (i != 0) {
3975 				if (level == 0) {	/* no level spec'd */
3976 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
3977 					    pmf))
3978 					goto errout;
3979 				}
3980 				np[npi++] = lvals[level - 1];
3981 				/* finish up previous component levels */
3982 				FINISH_COMP;
3983 			}
3984 			cp += nameln;
3985 			if (!*cp) {
3986 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
3987 				goto errout;
3988 			}
3989 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
3990 			if (phead == NULL) {
3991 				phead = ptail = p;
3992 			} else {
3993 				ptail->next = p;
3994 				ptail = p;
3995 			}
3996 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
3997 			    KM_SLEEP);
3998 			compp->pmc_name_sz = strlen(cp) + 1;
3999 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4000 			    KM_SLEEP);
4001 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4002 			components++;
4003 			level = 0;
4004 		} else {	/* better be power level <num>=<name> */
4005 #ifdef DEBUG
4006 			tp = cp;
4007 #endif
4008 			if (i == 0 ||
4009 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4010 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4011 				goto errout;
4012 			}
4013 #ifdef DEBUG
4014 			tp = cp;
4015 #endif
4016 			if (*cp++ != '=' || !*cp) {
4017 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4018 				goto errout;
4019 			}
4020 
4021 			lszs[level] = strlen(cp) + 1;
4022 			size += lszs[level];
4023 			lnames[level] = cp;	/* points into prop string */
4024 			level++;
4025 		}
4026 	}
4027 	np[npi++] = lvals[level - 1];
4028 	if (level == 0) {	/* ended with a name */
4029 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4030 		goto errout;
4031 	}
4032 	FINISH_COMP;
4033 
4034 
4035 	/*
4036 	 * Now we have a list of components--we have to return instead an
4037 	 * array of them, but we can just copy the top level and leave
4038 	 * the rest as is
4039 	 */
4040 	(void) e_pm_create_components(dip, components);
4041 	for (i = 0; i < components; i++)
4042 		e_pm_set_max_power(dip, i, np[i]);
4043 
4044 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4045 	for (i = 0, p = phead; i < components; i++) {
4046 		ASSERT(p);
4047 		/*
4048 		 * Now sanity-check values:  levels must be monotonically
4049 		 * increasing
4050 		 */
4051 		if (p->comp->pmc_numlevels < 2) {
4052 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4053 			    "levels\n", pmf,
4054 			    p->comp->pmc_name, PM_DEVICE(dip),
4055 			    p->comp->pmc_numlevels))
4056 			goto errout;
4057 		}
4058 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4059 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4060 			    (p->comp->pmc_lvals[j] <=
4061 			    p->comp->pmc_lvals[j - 1]))) {
4062 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4063 				    "not mono. incr, %d follows %d\n", pmf,
4064 				    p->comp->pmc_name, PM_DEVICE(dip),
4065 				    p->comp->pmc_lvals[j],
4066 				    p->comp->pmc_lvals[j - 1]))
4067 				goto errout;
4068 			}
4069 		}
4070 		ret[i] = *p->comp;	/* struct assignment */
4071 		for (j = 0; j < i; j++) {
4072 			/*
4073 			 * Test for unique component names
4074 			 */
4075 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4076 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4077 				    "unique\n", pmf, ret[j].pmc_name,
4078 				    PM_DEVICE(dip)))
4079 				goto errout;
4080 			}
4081 		}
4082 		ptail = p;
4083 		p = p->next;
4084 		phead = p;	/* errout depends on phead making sense */
4085 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4086 		kmem_free(ptail, sizeof (*ptail));
4087 	}
4088 out:
4089 	ddi_prop_free(pp);
4090 	if (lvals)
4091 		kmem_free(lvals, nelems * sizeof (int));
4092 	if (lszs)
4093 		kmem_free(lszs, nelems * sizeof (int));
4094 	if (lnames)
4095 		kmem_free(lnames, nelems * sizeof (char *));
4096 	if (np)
4097 		kmem_free(np, nelems * sizeof (int));
4098 	return (ret);
4099 
4100 errout:
4101 	e_pm_destroy_components(dip);
4102 	*errp = 1;	/* signal failure */
4103 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4104 	for (i = 0; i < nelems - 1; i++)
4105 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4106 	if (nelems != 0)
4107 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4108 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4109 	for (p = phead; p; ) {
4110 		pm_comp_t *pp;
4111 		int n;
4112 
4113 		ptail = p;
4114 		/*
4115 		 * Free component data structures
4116 		 */
4117 		pp = p->comp;
4118 		n = pp->pmc_numlevels;
4119 		if (pp->pmc_name_sz) {
4120 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4121 		}
4122 		if (pp->pmc_lnames_sz) {
4123 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4124 		}
4125 		if (pp->pmc_lnames) {
4126 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4127 		}
4128 		if (pp->pmc_thresh) {
4129 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4130 		}
4131 		if (pp->pmc_lvals) {
4132 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4133 		}
4134 		p = ptail->next;
4135 		kmem_free(ptail, sizeof (*ptail));
4136 	}
4137 	if (ret != NULL)
4138 		kmem_free(ret, components * sizeof (pm_comp_t));
4139 	ret = NULL;
4140 	goto out;
4141 }
4142 
4143 /*
4144  * Set threshold values for a devices components by dividing the target
4145  * threshold (base) by the number of transitions and assign each transition
4146  * that threshold.  This will get the entire device down in the target time if
4147  * all components are idle and even if there are dependencies among components.
4148  *
4149  * Devices may well get powered all the way down before the target time, but
4150  * at least the EPA will be happy.
4151  */
4152 void
4153 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4154 {
4155 	PMD_FUNC(pmf, "set_device_threshold")
4156 	int target_threshold = (base * 95) / 100;
4157 	int level, comp;		/* loop counters */
4158 	int transitions = 0;
4159 	int ncomp = PM_NUMCMPTS(dip);
4160 	int thresh;
4161 	int remainder;
4162 	pm_comp_t *pmc;
4163 	int i, circ;
4164 
4165 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4166 	PM_LOCK_DIP(dip);
4167 	/*
4168 	 * First we handle the easy one.  If we're setting the default
4169 	 * threshold for a node with children, then we set it to the
4170 	 * default nexus threshold (currently 0) and mark it as default
4171 	 * nexus threshold instead
4172 	 */
4173 	if (PM_IS_NEXUS(dip)) {
4174 		if (flag == PMC_DEF_THRESH) {
4175 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4176 			    PM_DEVICE(dip)))
4177 			thresh = pm_default_nexus_threshold;
4178 			for (comp = 0; comp < ncomp; comp++) {
4179 				pmc = &PM_CP(dip, comp)->pmc_comp;
4180 				for (level = 1; level < pmc->pmc_numlevels;
4181 				    level++) {
4182 					pmc->pmc_thresh[level] = thresh;
4183 				}
4184 			}
4185 			DEVI(dip)->devi_pm_dev_thresh =
4186 			    pm_default_nexus_threshold;
4187 			/*
4188 			 * If the nexus node is being reconfigured back to
4189 			 * the default threshold, adjust the notlowest count.
4190 			 */
4191 			if (DEVI(dip)->devi_pm_flags &
4192 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4193 				PM_LOCK_POWER(dip, &circ);
4194 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4195 					if (PM_CURPOWER(dip, i) == 0)
4196 						continue;
4197 					mutex_enter(&pm_compcnt_lock);
4198 					ASSERT(pm_comps_notlowest);
4199 					pm_comps_notlowest--;
4200 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4201 					    "notlowest to %d\n", pmf,
4202 					    PM_DEVICE(dip), pm_comps_notlowest))
4203 					if (pm_comps_notlowest == 0)
4204 						pm_ppm_notify_all_lowest(dip,
4205 						    PM_ALL_LOWEST);
4206 					mutex_exit(&pm_compcnt_lock);
4207 				}
4208 				PM_UNLOCK_POWER(dip, circ);
4209 			}
4210 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4211 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4212 			PM_UNLOCK_DIP(dip);
4213 			return;
4214 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4215 			/*
4216 			 * If the nexus node is being configured for a
4217 			 * non-default threshold, include that node in
4218 			 * the notlowest accounting.
4219 			 */
4220 			PM_LOCK_POWER(dip, &circ);
4221 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4222 				if (PM_CURPOWER(dip, i) == 0)
4223 					continue;
4224 				mutex_enter(&pm_compcnt_lock);
4225 				if (pm_comps_notlowest == 0)
4226 					pm_ppm_notify_all_lowest(dip,
4227 					    PM_NOT_ALL_LOWEST);
4228 				pm_comps_notlowest++;
4229 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4230 				    "notlowest to %d\n", pmf,
4231 				    PM_DEVICE(dip), pm_comps_notlowest))
4232 				mutex_exit(&pm_compcnt_lock);
4233 			}
4234 			PM_UNLOCK_POWER(dip, circ);
4235 		}
4236 	}
4237 	/*
4238 	 * Compute the total number of transitions for all components
4239 	 * of the device.  Distribute the threshold evenly over them
4240 	 */
4241 	for (comp = 0; comp < ncomp; comp++) {
4242 		pmc = &PM_CP(dip, comp)->pmc_comp;
4243 		ASSERT(pmc->pmc_numlevels > 1);
4244 		transitions += pmc->pmc_numlevels - 1;
4245 	}
4246 	ASSERT(transitions);
4247 	thresh = target_threshold / transitions;
4248 
4249 	for (comp = 0; comp < ncomp; comp++) {
4250 		pmc = &PM_CP(dip, comp)->pmc_comp;
4251 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4252 			pmc->pmc_thresh[level] = thresh;
4253 		}
4254 	}
4255 
4256 #ifdef DEBUG
4257 	for (comp = 0; comp < ncomp; comp++) {
4258 		pmc = &PM_CP(dip, comp)->pmc_comp;
4259 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4260 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4261 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4262 			    comp, level, pmc->pmc_thresh[level]))
4263 		}
4264 	}
4265 #endif
4266 	/*
4267 	 * Distribute any remainder till they are all gone
4268 	 */
4269 	remainder = target_threshold - thresh * transitions;
4270 	level = 1;
4271 #ifdef DEBUG
4272 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4273 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4274 	    transitions))
4275 #endif
4276 	while (remainder > 0) {
4277 		comp = 0;
4278 		while (remainder && (comp < ncomp)) {
4279 			pmc = &PM_CP(dip, comp)->pmc_comp;
4280 			if (level < pmc->pmc_numlevels) {
4281 				pmc->pmc_thresh[level] += 1;
4282 				remainder--;
4283 			}
4284 			comp++;
4285 		}
4286 		level++;
4287 	}
4288 #ifdef DEBUG
4289 	for (comp = 0; comp < ncomp; comp++) {
4290 		pmc = &PM_CP(dip, comp)->pmc_comp;
4291 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4292 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4293 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4294 			    comp, level, pmc->pmc_thresh[level]))
4295 		}
4296 	}
4297 #endif
4298 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4299 	DEVI(dip)->devi_pm_dev_thresh = base;
4300 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4301 	DEVI(dip)->devi_pm_flags |= flag;
4302 	PM_UNLOCK_DIP(dip);
4303 }
4304 
4305 /*
4306  * Called when there is no old-style platform power management driver
4307  */
4308 static int
4309 ddi_no_platform_power(power_req_t *req)
4310 {
4311 	_NOTE(ARGUNUSED(req))
4312 	return (DDI_FAILURE);
4313 }
4314 
4315 /*
4316  * This function calls the entry point supplied by the platform-specific
4317  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4318  * The use of global for getting the  function name from platform-specific
4319  * pm driver is not ideal, but it is simple and efficient.
4320  * The previous property lookup was being done in the idle loop on swift
4321  * systems without pmc chips and hurt deskbench performance as well as
4322  * violating scheduler locking rules
4323  */
4324 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4325 
4326 /*
4327  * Old obsolete interface for a device to request a power change (but only
4328  * an increase in power)
4329  */
4330 int
4331 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4332 {
4333 	return (pm_raise_power(dip, cmpt, level));
4334 }
4335 
4336 /*
4337  * The old obsolete interface to platform power management.  Only used by
4338  * Gypsy platform and APM on X86.
4339  */
4340 int
4341 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4342 {
4343 	power_req_t	request;
4344 
4345 	request.request_type = PMR_SET_POWER;
4346 	request.req.set_power_req.who = dip;
4347 	request.req.set_power_req.cmpt = pm_cmpt;
4348 	request.req.set_power_req.level = pm_level;
4349 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4350 }
4351 
4352 /*
4353  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4354  * passed.  Returns true if subsequent processing could result in power being
4355  * removed from the device.  The arg is not currently used because it is
4356  * implicit in the operation of cpr/DR.
4357  */
4358 int
4359 ddi_removing_power(dev_info_t *dip)
4360 {
4361 	_NOTE(ARGUNUSED(dip))
4362 	return (pm_powering_down);
4363 }
4364 
4365 /*
4366  * Returns true if a device indicates that its parent handles suspend/resume
4367  * processing for it.
4368  */
4369 int
4370 e_ddi_parental_suspend_resume(dev_info_t *dip)
4371 {
4372 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4373 }
4374 
4375 /*
4376  * Called for devices which indicate that their parent does suspend/resume
4377  * handling for them
4378  */
4379 int
4380 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4381 {
4382 	power_req_t	request;
4383 	request.request_type = PMR_SUSPEND;
4384 	request.req.suspend_req.who = dip;
4385 	request.req.suspend_req.cmd = cmd;
4386 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4387 }
4388 
4389 /*
4390  * Called for devices which indicate that their parent does suspend/resume
4391  * handling for them
4392  */
4393 int
4394 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4395 {
4396 	power_req_t	request;
4397 	request.request_type = PMR_RESUME;
4398 	request.req.resume_req.who = dip;
4399 	request.req.resume_req.cmd = cmd;
4400 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4401 }
4402 
4403 /*
4404  * Old obsolete exported interface for drivers to create components.
4405  * This is now handled by exporting the pm-components property.
4406  */
4407 int
4408 pm_create_components(dev_info_t *dip, int num_components)
4409 {
4410 	PMD_FUNC(pmf, "pm_create_components")
4411 
4412 	if (num_components < 1)
4413 		return (DDI_FAILURE);
4414 
4415 	if (!DEVI_IS_ATTACHING(dip)) {
4416 		return (DDI_FAILURE);
4417 	}
4418 
4419 	/* don't need to lock dip because attach is single threaded */
4420 	if (DEVI(dip)->devi_pm_components) {
4421 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4422 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4423 		return (DDI_FAILURE);
4424 	}
4425 	e_pm_create_components(dip, num_components);
4426 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4427 	e_pm_default_components(dip, num_components);
4428 	return (DDI_SUCCESS);
4429 }
4430 
4431 /*
4432  * Obsolete interface previously called by drivers to destroy their components
4433  * at detach time.  This is now done automatically.  However, we need to keep
4434  * this for the old drivers.
4435  */
4436 void
4437 pm_destroy_components(dev_info_t *dip)
4438 {
4439 	PMD_FUNC(pmf, "pm_destroy_components")
4440 	dev_info_t *pdip = ddi_get_parent(dip);
4441 
4442 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4443 	    PM_DEVICE(dip)))
4444 	ASSERT(DEVI_IS_DETACHING(dip));
4445 #ifdef DEBUG
4446 	if (!PM_ISBC(dip))
4447 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4448 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4449 		    PM_ADDR(dip));
4450 #endif
4451 	/*
4452 	 * We ignore this unless this is an old-style driver, except for
4453 	 * printing the message above
4454 	 */
4455 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4456 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4457 		    PM_DEVICE(dip)))
4458 		return;
4459 	}
4460 	ASSERT(PM_GET_PM_INFO(dip));
4461 
4462 	/*
4463 	 * pm_unmanage will clear info pointer later, after dealing with
4464 	 * dependencies
4465 	 */
4466 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4467 	/*
4468 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4469 	 * Parents that get notification are not adjusted because their
4470 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4471 	 */
4472 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4473 		pm_rele_power(pdip);
4474 #ifdef DEBUG
4475 	else {
4476 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4477 		    pmf, PM_DEVICE(dip)))
4478 	}
4479 #endif
4480 	e_pm_destroy_components(dip);
4481 	/*
4482 	 * Forget we ever knew anything about the components of this  device
4483 	 */
4484 	DEVI(dip)->devi_pm_flags &=
4485 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4486 }
4487 
4488 /*
4489  * Exported interface for a driver to set a component busy.
4490  */
4491 int
4492 pm_busy_component(dev_info_t *dip, int cmpt)
4493 {
4494 	struct pm_component *cp;
4495 
4496 	ASSERT(dip != NULL);
4497 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4498 		return (DDI_FAILURE);
4499 	PM_LOCK_BUSY(dip);
4500 	cp->pmc_busycount++;
4501 	cp->pmc_timestamp = 0;
4502 	PM_UNLOCK_BUSY(dip);
4503 	return (DDI_SUCCESS);
4504 }
4505 
4506 /*
4507  * Exported interface for a driver to set a component idle.
4508  */
4509 int
4510 pm_idle_component(dev_info_t *dip, int cmpt)
4511 {
4512 	PMD_FUNC(pmf, "pm_idle_component")
4513 	struct pm_component *cp;
4514 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4515 
4516 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4517 		return (DDI_FAILURE);
4518 
4519 	PM_LOCK_BUSY(dip);
4520 	if (cp->pmc_busycount) {
4521 		if (--(cp->pmc_busycount) == 0)
4522 			cp->pmc_timestamp = gethrestime_sec();
4523 	} else {
4524 		cp->pmc_timestamp = gethrestime_sec();
4525 	}
4526 
4527 	PM_UNLOCK_BUSY(dip);
4528 
4529 	/*
4530 	 * if device becomes idle during idle down period, try scan it down
4531 	 */
4532 	if (scanp && PM_IS_PID(dip)) {
4533 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4534 		    PM_DEVICE(dip)))
4535 		pm_rescan(dip);
4536 		return (DDI_SUCCESS);
4537 	}
4538 
4539 	/*
4540 	 * handle scan not running with nexus threshold == 0
4541 	 */
4542 
4543 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4544 		pm_rescan(dip);
4545 	}
4546 
4547 	return (DDI_SUCCESS);
4548 }
4549 
4550 /*
4551  * This is the old  obsolete interface called by drivers to set their normal
4552  * power.  Thus we can't fix its behavior or return a value.
4553  * This functionality is replaced by the pm-component property.
4554  * We'll only get components destroyed while no power management is
4555  * going on (and the device is detached), so we don't need a mutex here
4556  */
4557 void
4558 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4559 {
4560 	PMD_FUNC(pmf, "set_normal_power")
4561 #ifdef DEBUG
4562 	if (!PM_ISBC(dip))
4563 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4564 		    "(driver exporting pm-components property) ignored",
4565 		    PM_NAME(dip), PM_ADDR(dip));
4566 #endif
4567 	if (PM_ISBC(dip)) {
4568 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4569 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4570 		e_pm_set_max_power(dip, comp, level);
4571 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4572 	}
4573 }
4574 
4575 /*
4576  * Called on a successfully detached driver to free pm resources
4577  */
4578 static void
4579 pm_stop(dev_info_t *dip)
4580 {
4581 	PMD_FUNC(pmf, "stop")
4582 	dev_info_t *pdip = ddi_get_parent(dip);
4583 
4584 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4585 	/* stopping scan, destroy scan data structure */
4586 	if (!PM_ISBC(dip)) {
4587 		pm_scan_stop(dip);
4588 		pm_scan_fini(dip);
4589 	}
4590 
4591 	if (PM_GET_PM_INFO(dip) != NULL) {
4592 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4593 			/*
4594 			 * Old style driver may have called
4595 			 * pm_destroy_components already, but just in case ...
4596 			 */
4597 			e_pm_destroy_components(dip);
4598 		} else {
4599 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4600 			    pmf, PM_DEVICE(dip)))
4601 		}
4602 	} else {
4603 		if (PM_NUMCMPTS(dip))
4604 			e_pm_destroy_components(dip);
4605 		else {
4606 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4607 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4608 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4609 					pm_rele_power(pdip);
4610 				} else if (pdip &&
4611 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4612 					(void) mdi_power(pdip,
4613 					    MDI_PM_RELE_POWER,
4614 					    (void *)dip, NULL, 0);
4615 				}
4616 			}
4617 		}
4618 	}
4619 }
4620 
4621 /*
4622  * The node is the subject of a reparse pm props ioctl. Throw away the old
4623  * info and start over.
4624  */
4625 int
4626 e_new_pm_props(dev_info_t *dip)
4627 {
4628 	if (PM_GET_PM_INFO(dip) != NULL) {
4629 		pm_stop(dip);
4630 
4631 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4632 			return (DDI_FAILURE);
4633 		}
4634 	}
4635 	e_pm_props(dip);
4636 	return (DDI_SUCCESS);
4637 }
4638 
4639 /*
4640  * Device has been attached, so process its pm properties
4641  */
4642 void
4643 e_pm_props(dev_info_t *dip)
4644 {
4645 	char *pp;
4646 	int len;
4647 	int flags = 0;
4648 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4649 
4650 	/*
4651 	 * It doesn't matter if we do this more than once, we should always
4652 	 * get the same answers, and if not, then the last one in is the
4653 	 * best one.
4654 	 */
4655 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4656 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4657 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4658 			flags = PMC_NEEDS_SR;
4659 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4660 			flags = PMC_NO_SR;
4661 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4662 			flags = PMC_PARENTAL_SR;
4663 		} else {
4664 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4665 			    "%s property value '%s'", PM_NAME(dip),
4666 			    PM_ADDR(dip), "pm-hardware-state", pp);
4667 		}
4668 		kmem_free(pp, len);
4669 	}
4670 	/*
4671 	 * This next segment (PMC_WANTS_NOTIFY) is in
4672 	 * support of nexus drivers which will want to be involved in
4673 	 * (or at least notified of) their child node's power level transitions.
4674 	 * "pm-want-child-notification?" is defined by the parent.
4675 	 */
4676 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4677 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4678 		flags |= PMC_WANTS_NOTIFY;
4679 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4680 	    dip, propflag, "pm-want-child-notification?"));
4681 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4682 	    "no-involuntary-power-cycles"))
4683 		flags |= PMC_NO_INVOL;
4684 	/* devfs single threads us */
4685 	DEVI(dip)->devi_pm_flags |= flags;
4686 }
4687 
4688 /*
4689  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4690  * driver which has claimed a node.
4691  * Sets old_power in arg struct.
4692  */
4693 static int
4694 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4695     ddi_ctl_enum_t ctlop, void *arg, void *result)
4696 {
4697 	_NOTE(ARGUNUSED(dip))
4698 	PMD_FUNC(pmf, "ctlops")
4699 	power_req_t *reqp = (power_req_t *)arg;
4700 	int retval;
4701 	dev_info_t *target_dip;
4702 	int new_level, old_level, cmpt;
4703 #ifdef DEBUG
4704 	char *format;
4705 #endif
4706 
4707 	/*
4708 	 * The interface for doing the actual power level changes is now
4709 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4710 	 * different platform-specific power control drivers.
4711 	 *
4712 	 * This driver implements the "default" version of this interface.
4713 	 * If no ppm driver has been installed then this interface is called
4714 	 * instead.
4715 	 */
4716 	ASSERT(dip == NULL);
4717 	switch (ctlop) {
4718 	case DDI_CTLOPS_POWER:
4719 		switch (reqp->request_type) {
4720 		case PMR_PPM_SET_POWER:
4721 		{
4722 			target_dip = reqp->req.ppm_set_power_req.who;
4723 			ASSERT(target_dip == rdip);
4724 			new_level = reqp->req.ppm_set_power_req.new_level;
4725 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4726 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4727 			old_level = PM_CURPOWER(target_dip, cmpt);
4728 			reqp->req.ppm_set_power_req.old_level = old_level;
4729 			retval = pm_power(target_dip, cmpt, new_level);
4730 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4731 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4732 			    old_level, new_level, (retval == DDI_SUCCESS ?
4733 			    "chd" : "no chg")))
4734 			return (retval);
4735 		}
4736 
4737 		case PMR_PPM_PRE_DETACH:
4738 		case PMR_PPM_POST_DETACH:
4739 		case PMR_PPM_PRE_ATTACH:
4740 		case PMR_PPM_POST_ATTACH:
4741 		case PMR_PPM_PRE_PROBE:
4742 		case PMR_PPM_POST_PROBE:
4743 		case PMR_PPM_PRE_RESUME:
4744 		case PMR_PPM_INIT_CHILD:
4745 		case PMR_PPM_UNINIT_CHILD:
4746 #ifdef DEBUG
4747 			switch (reqp->request_type) {
4748 				case PMR_PPM_PRE_DETACH:
4749 					format = "%s: PMR_PPM_PRE_DETACH "
4750 					    "%s@%s(%s#%d)\n";
4751 					break;
4752 				case PMR_PPM_POST_DETACH:
4753 					format = "%s: PMR_PPM_POST_DETACH "
4754 					    "%s@%s(%s#%d) rets %d\n";
4755 					break;
4756 				case PMR_PPM_PRE_ATTACH:
4757 					format = "%s: PMR_PPM_PRE_ATTACH "
4758 					    "%s@%s(%s#%d)\n";
4759 					break;
4760 				case PMR_PPM_POST_ATTACH:
4761 					format = "%s: PMR_PPM_POST_ATTACH "
4762 					    "%s@%s(%s#%d) rets %d\n";
4763 					break;
4764 				case PMR_PPM_PRE_PROBE:
4765 					format = "%s: PMR_PPM_PRE_PROBE "
4766 					    "%s@%s(%s#%d)\n";
4767 					break;
4768 				case PMR_PPM_POST_PROBE:
4769 					format = "%s: PMR_PPM_POST_PROBE "
4770 					    "%s@%s(%s#%d) rets %d\n";
4771 					break;
4772 				case PMR_PPM_PRE_RESUME:
4773 					format = "%s: PMR_PPM_PRE_RESUME "
4774 					    "%s@%s(%s#%d) rets %d\n";
4775 					break;
4776 				case PMR_PPM_INIT_CHILD:
4777 					format = "%s: PMR_PPM_INIT_CHILD "
4778 					    "%s@%s(%s#%d)\n";
4779 					break;
4780 				case PMR_PPM_UNINIT_CHILD:
4781 					format = "%s: PMR_PPM_UNINIT_CHILD "
4782 					    "%s@%s(%s#%d)\n";
4783 					break;
4784 				default:
4785 					break;
4786 			}
4787 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4788 			    reqp->req.ppm_config_req.result))
4789 #endif
4790 			return (DDI_SUCCESS);
4791 
4792 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4793 			/*
4794 			 * Nothing for us to do
4795 			 */
4796 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4797 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4798 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4799 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4800 			    reqp->req.ppm_notify_level_req.cmpt,
4801 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4802 			    reqp->req.ppm_notify_level_req.cmpt),
4803 			    reqp->req.ppm_notify_level_req.new_level))
4804 			return (DDI_SUCCESS);
4805 
4806 		case PMR_PPM_UNMANAGE:
4807 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4808 			    pmf, PM_DEVICE(rdip)))
4809 			return (DDI_SUCCESS);
4810 
4811 		case PMR_PPM_LOCK_POWER:
4812 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4813 			    reqp->req.ppm_lock_power_req.circp);
4814 			return (DDI_SUCCESS);
4815 
4816 		case PMR_PPM_UNLOCK_POWER:
4817 			pm_unlock_power_single(
4818 			    reqp->req.ppm_unlock_power_req.who,
4819 			    reqp->req.ppm_unlock_power_req.circ);
4820 			return (DDI_SUCCESS);
4821 
4822 		case PMR_PPM_TRY_LOCK_POWER:
4823 			*(int *)result = pm_try_locking_power_single(
4824 			    reqp->req.ppm_lock_power_req.who,
4825 			    reqp->req.ppm_lock_power_req.circp);
4826 			return (DDI_SUCCESS);
4827 
4828 		case PMR_PPM_POWER_LOCK_OWNER:
4829 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4830 			ASSERT(target_dip == rdip);
4831 			reqp->req.ppm_power_lock_owner_req.owner =
4832 			    DEVI(rdip)->devi_busy_thread;
4833 			return (DDI_SUCCESS);
4834 		default:
4835 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4836 			return (DDI_FAILURE);
4837 		}
4838 
4839 	default:
4840 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4841 		return (DDI_FAILURE);
4842 	}
4843 }
4844 
4845 /*
4846  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4847  * power_ops struct for this functionality instead?
4848  * However, we only ever do this on a ppm driver.
4849  */
4850 int
4851 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4852 {
4853 	int (*fp)();
4854 
4855 	/* if no ppm handler, call the default routine */
4856 	if (d == NULL) {
4857 		return (pm_default_ctlops(d, r, op, a, v));
4858 	}
4859 	if (!d || !r)
4860 		return (DDI_FAILURE);
4861 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4862 		DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4863 
4864 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4865 	return ((*fp)(d, r, op, a, v));
4866 }
4867 
4868 /*
4869  * Called on a node when attach completes or the driver makes its first pm
4870  * call (whichever comes first).
4871  * In the attach case, device may not be power manageable at all.
4872  * Don't need to lock the dip because we're single threaded by the devfs code
4873  */
4874 static int
4875 pm_start(dev_info_t *dip)
4876 {
4877 	PMD_FUNC(pmf, "start")
4878 	int ret;
4879 	dev_info_t *pdip = ddi_get_parent(dip);
4880 	int e_pm_manage(dev_info_t *, int);
4881 	void pm_noinvol_specd(dev_info_t *dip);
4882 
4883 	e_pm_props(dip);
4884 	pm_noinvol_specd(dip);
4885 	/*
4886 	 * If this dip has already been processed, don't mess with it
4887 	 * (but decrement the speculative count we did above, as whatever
4888 	 * code put it under pm already will have dealt with it)
4889 	 */
4890 	if (PM_GET_PM_INFO(dip)) {
4891 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
4892 		    pmf, PM_DEVICE(dip)))
4893 		return (0);
4894 	}
4895 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
4896 
4897 	if (PM_GET_PM_INFO(dip) == NULL) {
4898 		/*
4899 		 * keep the kidsupcount increment as is
4900 		 */
4901 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
4902 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4903 			pm_hold_power(pdip);
4904 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4905 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
4906 			    (void *)dip, NULL, 0);
4907 		}
4908 
4909 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
4910 		    "left up\n", pmf, PM_DEVICE(dip)))
4911 	}
4912 
4913 	return (ret);
4914 }
4915 
4916 /*
4917  * Keep a list of recorded thresholds.  For now we just keep a list and
4918  * search it linearly.  We don't expect too many entries.  Can always hash it
4919  * later if we need to.
4920  */
4921 void
4922 pm_record_thresh(pm_thresh_rec_t *rp)
4923 {
4924 	pm_thresh_rec_t *pptr, *ptr;
4925 
4926 	ASSERT(*rp->ptr_physpath);
4927 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
4928 	for (pptr = NULL, ptr = pm_thresh_head;
4929 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
4930 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
4931 			/* replace this one */
4932 			rp->ptr_next = ptr->ptr_next;
4933 			if (pptr) {
4934 				pptr->ptr_next = rp;
4935 			} else {
4936 				pm_thresh_head = rp;
4937 			}
4938 			rw_exit(&pm_thresh_rwlock);
4939 			kmem_free(ptr, ptr->ptr_size);
4940 			return;
4941 		}
4942 		continue;
4943 	}
4944 	/*
4945 	 * There was not a match in the list, insert this one in front
4946 	 */
4947 	if (pm_thresh_head) {
4948 		rp->ptr_next = pm_thresh_head;
4949 		pm_thresh_head = rp;
4950 	} else {
4951 		rp->ptr_next = NULL;
4952 		pm_thresh_head = rp;
4953 	}
4954 	rw_exit(&pm_thresh_rwlock);
4955 }
4956 
4957 /*
4958  * Create a new dependency record and hang a new dependency entry off of it
4959  */
4960 pm_pdr_t *
4961 newpdr(char *kept, char *keeps, int isprop)
4962 {
4963 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
4964 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
4965 	p->pdr_size = size;
4966 	p->pdr_isprop = isprop;
4967 	p->pdr_kept_paths = NULL;
4968 	p->pdr_kept_count = 0;
4969 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
4970 	(void) strcpy(p->pdr_kept, kept);
4971 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
4972 	(void) strcpy(p->pdr_keeper, keeps);
4973 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
4974 	    (intptr_t)p + size);
4975 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
4976 	    (intptr_t)p + size);
4977 	return (p);
4978 }
4979 
4980 /*
4981  * Keep a list of recorded dependencies.  We only keep the
4982  * keeper -> kept list for simplification. At this point We do not
4983  * care about whether the devices are attached or not yet,
4984  * this would be done in pm_keeper() and pm_kept().
4985  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
4986  * and it is up to the user to issue the ioctl again if they want it
4987  * (e.g. pmconfig)
4988  * Returns true if dependency already exists in the list.
4989  */
4990 int
4991 pm_record_keeper(char *kept, char *keeper, int isprop)
4992 {
4993 	PMD_FUNC(pmf, "record_keeper")
4994 	pm_pdr_t *npdr, *ppdr, *pdr;
4995 
4996 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
4997 	ASSERT(kept && keeper);
4998 #ifdef DEBUG
4999 	if (pm_debug & PMD_KEEPS)
5000 		prdeps("pm_record_keeper entry");
5001 #endif
5002 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5003 	    ppdr = pdr, pdr = pdr->pdr_next) {
5004 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5005 		    pdr->pdr_keeper))
5006 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5007 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5008 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5009 			return (1);
5010 		}
5011 	}
5012 	/*
5013 	 * We did not find any match, so we have to make an entry
5014 	 */
5015 	npdr = newpdr(kept, keeper, isprop);
5016 	if (ppdr) {
5017 		ASSERT(ppdr->pdr_next == NULL);
5018 		ppdr->pdr_next = npdr;
5019 	} else {
5020 		ASSERT(pm_dep_head == NULL);
5021 		pm_dep_head = npdr;
5022 	}
5023 #ifdef DEBUG
5024 	if (pm_debug & PMD_KEEPS)
5025 		prdeps("pm_record_keeper after new record");
5026 #endif
5027 	if (!isprop)
5028 		pm_unresolved_deps++;
5029 	else
5030 		pm_prop_deps++;
5031 	return (0);
5032 }
5033 
5034 /*
5035  * Look up this device in the set of devices we've seen ioctls for
5036  * to see if we are holding a threshold spec for it.  If so, make it so.
5037  * At ioctl time, we were given the physical path of the device.
5038  */
5039 int
5040 pm_thresh_specd(dev_info_t *dip)
5041 {
5042 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5043 	char *path = 0;
5044 	char pathbuf[MAXNAMELEN];
5045 	pm_thresh_rec_t *rp;
5046 
5047 	path = ddi_pathname(dip, pathbuf);
5048 
5049 	rw_enter(&pm_thresh_rwlock, RW_READER);
5050 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5051 		if (strcmp(rp->ptr_physpath, path) != 0)
5052 			continue;
5053 		pm_apply_recorded_thresh(dip, rp);
5054 		rw_exit(&pm_thresh_rwlock);
5055 		return (1);
5056 	}
5057 	rw_exit(&pm_thresh_rwlock);
5058 	return (0);
5059 }
5060 
5061 static int
5062 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5063 {
5064 	PMD_FUNC(pmf, "set_keeping")
5065 	pm_info_t *kept_info;
5066 	int j, up = 0, circ;
5067 	void prdeps(char *);
5068 
5069 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5070 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5071 #ifdef DEBUG
5072 	if (pm_debug & PMD_KEEPS)
5073 		prdeps("Before PAD\n");
5074 #endif
5075 	ASSERT(keeper != kept);
5076 	if (PM_GET_PM_INFO(keeper) == NULL) {
5077 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5078 		    "%s@%s(%s#%d), but the latter is not power managed",
5079 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5080 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5081 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5082 		return (0);
5083 	}
5084 	kept_info = PM_GET_PM_INFO(kept);
5085 	ASSERT(kept_info);
5086 	PM_LOCK_POWER(keeper, &circ);
5087 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5088 		if (PM_CURPOWER(keeper, j)) {
5089 			up++;
5090 			break;
5091 		}
5092 	}
5093 	if (up) {
5094 		/* Bringup and maintain a hold on the kept */
5095 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5096 		    PM_DEVICE(kept)))
5097 		bring_pmdep_up(kept, 1);
5098 	}
5099 	PM_UNLOCK_POWER(keeper, circ);
5100 #ifdef DEBUG
5101 	if (pm_debug & PMD_KEEPS)
5102 		prdeps("After PAD\n");
5103 #endif
5104 	return (1);
5105 }
5106 
5107 /*
5108  * Should this device keep up another device?
5109  * Look up this device in the set of devices we've seen ioctls for
5110  * to see if we are holding a dependency spec for it.  If so, make it so.
5111  * Because we require the kept device to be attached already in order to
5112  * make the list entry (and hold it), we only need to look for keepers.
5113  * At ioctl time, we were given the physical path of the device.
5114  */
5115 int
5116 pm_keeper(char *keeper)
5117 {
5118 	PMD_FUNC(pmf, "keeper")
5119 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5120 	dev_info_t *dip;
5121 	pm_pdr_t *dp;
5122 	dev_info_t *kept = NULL;
5123 	int ret = 0;
5124 	int i;
5125 
5126 	if (!pm_unresolved_deps && !pm_prop_deps)
5127 		return (0);
5128 	ASSERT(keeper != NULL);
5129 	dip = pm_name_to_dip(keeper, 1);
5130 	if (dip == NULL)
5131 		return (0);
5132 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5133 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5134 		if (!dp->pdr_isprop) {
5135 			if (!pm_unresolved_deps)
5136 				continue;
5137 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5138 			if (dp->pdr_satisfied) {
5139 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5140 				continue;
5141 			}
5142 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5143 				ret += pm_apply_recorded_dep(dip, dp);
5144 			}
5145 		} else {
5146 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5147 				continue;
5148 			for (i = 0; i < dp->pdr_kept_count; i++) {
5149 				if (dp->pdr_kept_paths[i] == NULL)
5150 					continue;
5151 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5152 				if (kept == NULL)
5153 					continue;
5154 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5155 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5156 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5157 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5158 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5159 				    dp->pdr_kept_count))
5160 				if (kept != dip) {
5161 					ret += pm_set_keeping(dip, kept);
5162 				}
5163 				ddi_release_devi(kept);
5164 			}
5165 
5166 		}
5167 	}
5168 	ddi_release_devi(dip);
5169 	return (ret);
5170 }
5171 
5172 /*
5173  * Should this device be kept up by another device?
5174  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5175  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5176  * kept device lists.
5177  */
5178 static int
5179 pm_kept(char *keptp)
5180 {
5181 	PMD_FUNC(pmf, "kept")
5182 	pm_pdr_t *dp;
5183 	int found = 0;
5184 	int ret = 0;
5185 	dev_info_t *keeper;
5186 	dev_info_t *kept;
5187 	size_t length;
5188 	int i;
5189 	char **paths;
5190 	char *path;
5191 
5192 	ASSERT(keptp != NULL);
5193 	kept = pm_name_to_dip(keptp, 1);
5194 	if (kept == NULL)
5195 		return (0);
5196 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5197 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5198 		if (dp->pdr_isprop) {
5199 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5200 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5201 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5202 				/*
5203 				 * Dont allow self dependency.
5204 				 */
5205 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5206 					continue;
5207 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5208 				if (keeper == NULL)
5209 					continue;
5210 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5211 				    "%p\n", pmf, (void *)kept))
5212 #ifdef DEBUG
5213 				if (pm_debug & PMD_DEP)
5214 					prdeps("Before Adding from pm_kept\n");
5215 #endif
5216 				/*
5217 				 * Add ourselves to the dip list.
5218 				 */
5219 				if (dp->pdr_kept_count == 0) {
5220 					length = strlen(keptp) + 1;
5221 					path =
5222 					    kmem_alloc(length, KM_SLEEP);
5223 					paths = kmem_alloc(sizeof (char **),
5224 						    KM_SLEEP);
5225 					(void) strcpy(path, keptp);
5226 					paths[0] = path;
5227 					dp->pdr_kept_paths = paths;
5228 					dp->pdr_kept_count++;
5229 				} else {
5230 					/* Check to see if already on list */
5231 					for (i = 0; i < dp->pdr_kept_count;
5232 					    i++) {
5233 						if (strcmp(keptp,
5234 						    dp->pdr_kept_paths[i])
5235 						    == 0) {
5236 							found++;
5237 							break;
5238 						}
5239 					}
5240 					if (found) {
5241 						ddi_release_devi(keeper);
5242 						continue;
5243 					}
5244 					length = dp->pdr_kept_count *
5245 					    sizeof (char **);
5246 					paths = kmem_alloc(
5247 					    length + sizeof (char **),
5248 					    KM_SLEEP);
5249 					if (dp->pdr_kept_count) {
5250 						bcopy(dp->pdr_kept_paths,
5251 						    paths, length);
5252 						kmem_free(dp->pdr_kept_paths,
5253 							length);
5254 					}
5255 					dp->pdr_kept_paths = paths;
5256 					length = strlen(keptp) + 1;
5257 					path =
5258 					    kmem_alloc(length, KM_SLEEP);
5259 					(void) strcpy(path, keptp);
5260 					dp->pdr_kept_paths[i] = path;
5261 					dp->pdr_kept_count++;
5262 				}
5263 #ifdef DEBUG
5264 				if (pm_debug & PMD_DEP)
5265 					prdeps("After from pm_kept\n");
5266 #endif
5267 				if (keeper) {
5268 					ret += pm_set_keeping(keeper, kept);
5269 					ddi_release_devi(keeper);
5270 				}
5271 			}
5272 		} else {
5273 			/*
5274 			 * pm_keeper would be called later to do
5275 			 * the actual pm_set_keeping.
5276 			 */
5277 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5278 			    pmf, (void *)kept))
5279 #ifdef DEBUG
5280 			if (pm_debug & PMD_DEP)
5281 				prdeps("Before Adding from pm_kept\n");
5282 #endif
5283 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5284 				if (dp->pdr_kept_paths == NULL) {
5285 					length = strlen(keptp) + 1;
5286 					path =
5287 					    kmem_alloc(length, KM_SLEEP);
5288 					paths = kmem_alloc(sizeof (char **),
5289 						KM_SLEEP);
5290 					(void) strcpy(path, keptp);
5291 					paths[0] = path;
5292 					dp->pdr_kept_paths = paths;
5293 					dp->pdr_kept_count++;
5294 				}
5295 			}
5296 #ifdef DEBUG
5297 			if (pm_debug & PMD_DEP)
5298 			    prdeps("After from pm_kept\n");
5299 #endif
5300 		}
5301 	}
5302 	ddi_release_devi(kept);
5303 	return (ret);
5304 }
5305 
5306 /*
5307  * Apply a recorded dependency.  dp specifies the dependency, and
5308  * keeper is already known to be the device that keeps up the other (kept) one.
5309  * We have to the whole tree for the "kept" device, then apply
5310  * the dependency (which may already be applied).
5311  */
5312 int
5313 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5314 {
5315 	PMD_FUNC(pmf, "apply_recorded_dep")
5316 	dev_info_t *kept = NULL;
5317 	int ret = 0;
5318 	char *keptp = NULL;
5319 
5320 	/*
5321 	 * Device to Device dependency can only be 1 to 1.
5322 	 */
5323 	if (dp->pdr_kept_paths == NULL)
5324 		return (0);
5325 	keptp = dp->pdr_kept_paths[0];
5326 	if (keptp == NULL)
5327 		return (0);
5328 	ASSERT(*keptp != '\0');
5329 	kept = pm_name_to_dip(keptp, 1);
5330 	if (kept == NULL)
5331 		return (0);
5332 	if (kept) {
5333 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5334 		    dp->pdr_keeper, keptp))
5335 		if (pm_set_keeping(keeper, kept)) {
5336 			ASSERT(dp->pdr_satisfied == 0);
5337 			dp->pdr_satisfied = 1;
5338 			ASSERT(pm_unresolved_deps);
5339 			pm_unresolved_deps--;
5340 			ret++;
5341 		}
5342 	}
5343 	ddi_release_devi(kept);
5344 
5345 	return (ret);
5346 }
5347 
5348 /*
5349  * Called from common/io/pm.c
5350  */
5351 int
5352 pm_cur_power(pm_component_t *cp)
5353 {
5354 	return (cur_power(cp));
5355 }
5356 
5357 /*
5358  * External interface to sanity-check a power level.
5359  */
5360 int
5361 pm_valid_power(dev_info_t *dip, int comp, int level)
5362 {
5363 	PMD_FUNC(pmf, "valid_power")
5364 
5365 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5366 		return (e_pm_valid_power(dip, comp, level));
5367 	else {
5368 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5369 		    pmf, comp, PM_NUMCMPTS(dip), level))
5370 		return (0);
5371 	}
5372 }
5373 
5374 /*
5375  * Called when a device that is direct power managed needs to change state.
5376  * This routine arranges to block the request until the process managing
5377  * the device makes the change (or some other incompatible change) or
5378  * the process closes /dev/pm.
5379  */
5380 static int
5381 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5382 {
5383 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5384 	int ret = 0;
5385 	void pm_dequeue_blocked(pm_rsvp_t *);
5386 	void pm_enqueue_blocked(pm_rsvp_t *);
5387 
5388 	ASSERT(!pm_processes_stopped);
5389 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5390 	new->pr_dip = dip;
5391 	new->pr_comp = comp;
5392 	new->pr_newlevel = newpower;
5393 	new->pr_oldlevel = oldpower;
5394 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5395 	mutex_enter(&pm_rsvp_lock);
5396 	pm_enqueue_blocked(new);
5397 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5398 	    PM_CANBLOCK_BLOCK);
5399 	PM_UNLOCK_DIP(dip);
5400 	/*
5401 	 * truss may make the cv_wait_sig return prematurely
5402 	 */
5403 	while (ret == 0) {
5404 		/*
5405 		 * Normally there will be no user context involved, but if
5406 		 * there is (e.g. we are here via an ioctl call to a driver)
5407 		 * then we should allow the process to abort the request,
5408 		 * or we get an unkillable process if the same thread does
5409 		 * PM_DIRECT_PM and pm_raise_power
5410 		 */
5411 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5412 			ret = PMP_FAIL;
5413 		} else {
5414 			ret = new->pr_retval;
5415 		}
5416 	}
5417 	pm_dequeue_blocked(new);
5418 	mutex_exit(&pm_rsvp_lock);
5419 	cv_destroy(&new->pr_cv);
5420 	kmem_free(new, sizeof (*new));
5421 	return (ret);
5422 }
5423 
5424 /*
5425  * Returns true if the process is interested in power level changes (has issued
5426  * PM_GET_STATE_CHANGE ioctl).
5427  */
5428 int
5429 pm_interest_registered(int clone)
5430 {
5431 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5432 	return (pm_interest[clone]);
5433 }
5434 
5435 /*
5436  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5437  * watch all state transitions (dip == NULL).  Set up data
5438  * structs to communicate with process about state changes.
5439  */
5440 void
5441 pm_register_watcher(int clone, dev_info_t *dip)
5442 {
5443 	pscc_t	*p;
5444 	psce_t	*psce;
5445 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5446 
5447 	/*
5448 	 * We definitely need a control struct, then we have to search to see
5449 	 * there is already an entries struct (in the dip != NULL case).
5450 	 */
5451 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5452 	pscc->pscc_clone = clone;
5453 	pscc->pscc_dip = dip;
5454 
5455 	if (dip) {
5456 		int found = 0;
5457 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5458 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5459 			/*
5460 			 * Already an entry for this clone, so just use it
5461 			 * for the new one (for the case where a single
5462 			 * process is watching multiple devices)
5463 			 */
5464 			if (p->pscc_clone == clone) {
5465 				ASSERT(p->pscc_dip != dip);
5466 				pscc->pscc_entries = p->pscc_entries;
5467 				pscc->pscc_entries->psce_references++;
5468 				found++;
5469 			}
5470 		}
5471 		if (!found) {		/* create a new one */
5472 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5473 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5474 			psce->psce_first =
5475 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5476 			    KM_SLEEP);
5477 			psce->psce_in = psce->psce_out = psce->psce_first;
5478 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5479 			psce->psce_references = 1;
5480 			pscc->pscc_entries = psce;
5481 		}
5482 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5483 		rw_exit(&pm_pscc_direct_rwlock);
5484 	} else {
5485 		ASSERT(!pm_interest_registered(clone));
5486 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5487 #ifdef DEBUG
5488 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5489 			/*
5490 			 * Should not be an entry for this clone!
5491 			 */
5492 			ASSERT(p->pscc_clone != clone);
5493 		}
5494 #endif
5495 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5496 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5497 		    PSCCOUNT, KM_SLEEP);
5498 		psce->psce_in = psce->psce_out = psce->psce_first;
5499 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5500 		psce->psce_references = 1;
5501 		pscc->pscc_entries = psce;
5502 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5503 		pm_interest[clone] = 1;
5504 		rw_exit(&pm_pscc_interest_rwlock);
5505 	}
5506 }
5507 
5508 /*
5509  * Remove the given entry from the blocked list
5510  */
5511 void
5512 pm_dequeue_blocked(pm_rsvp_t *p)
5513 {
5514 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5515 	if (pm_blocked_list == p) {
5516 		ASSERT(p->pr_prev == NULL);
5517 		if (p->pr_next != NULL)
5518 			p->pr_next->pr_prev = NULL;
5519 		pm_blocked_list = p->pr_next;
5520 	} else {
5521 		ASSERT(p->pr_prev != NULL);
5522 		p->pr_prev->pr_next = p->pr_next;
5523 		if (p->pr_next != NULL)
5524 			p->pr_next->pr_prev = p->pr_prev;
5525 	}
5526 }
5527 
5528 /*
5529  * Remove the given control struct from the given list
5530  */
5531 static void
5532 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5533 {
5534 	if (*list == p) {
5535 		ASSERT(p->pscc_prev == NULL);
5536 		if (p->pscc_next != NULL)
5537 			p->pscc_next->pscc_prev = NULL;
5538 		*list = p->pscc_next;
5539 	} else {
5540 		ASSERT(p->pscc_prev != NULL);
5541 		p->pscc_prev->pscc_next = p->pscc_next;
5542 		if (p->pscc_next != NULL)
5543 			p->pscc_next->pscc_prev = p->pscc_prev;
5544 	}
5545 }
5546 
5547 /*
5548  * Stick the control struct specified on the front of the list
5549  */
5550 static void
5551 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5552 {
5553 	pscc_t *h;	/* entry at head of list */
5554 	if ((h = *list) == NULL) {
5555 		*list = p;
5556 		ASSERT(p->pscc_next == NULL);
5557 		ASSERT(p->pscc_prev == NULL);
5558 	} else {
5559 		p->pscc_next = h;
5560 		ASSERT(h->pscc_prev == NULL);
5561 		h->pscc_prev = p;
5562 		ASSERT(p->pscc_prev == NULL);
5563 		*list = p;
5564 	}
5565 }
5566 
5567 /*
5568  * If dip is NULL, process is closing "clone" clean up all its registrations.
5569  * Otherwise only clean up those for dip because process is just giving up
5570  * control of a direct device.
5571  */
5572 void
5573 pm_deregister_watcher(int clone, dev_info_t *dip)
5574 {
5575 	pscc_t	*p, *pn;
5576 	psce_t	*psce;
5577 	int found = 0;
5578 
5579 	if (dip == NULL) {
5580 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5581 		for (p = pm_pscc_interest; p; p = pn) {
5582 			pn = p->pscc_next;
5583 			if (p->pscc_clone == clone) {
5584 				pm_dequeue_pscc(p, &pm_pscc_interest);
5585 				psce = p->pscc_entries;
5586 				ASSERT(psce->psce_references == 1);
5587 				mutex_destroy(&psce->psce_lock);
5588 				kmem_free(psce->psce_first,
5589 				    sizeof (pm_state_change_t) * PSCCOUNT);
5590 				kmem_free(psce, sizeof (*psce));
5591 				kmem_free(p, sizeof (*p));
5592 			}
5593 		}
5594 		pm_interest[clone] = 0;
5595 		rw_exit(&pm_pscc_interest_rwlock);
5596 	}
5597 	found = 0;
5598 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5599 	for (p = pm_pscc_direct; p; p = pn) {
5600 		pn = p->pscc_next;
5601 		if ((dip && p->pscc_dip == dip) ||
5602 		    (dip == NULL && clone == p->pscc_clone)) {
5603 			ASSERT(clone == p->pscc_clone);
5604 			found++;
5605 			/*
5606 			 * Remove from control list
5607 			 */
5608 			pm_dequeue_pscc(p, &pm_pscc_direct);
5609 			/*
5610 			 * If we're the last reference, free the
5611 			 * entries struct.
5612 			 */
5613 			psce = p->pscc_entries;
5614 			ASSERT(psce);
5615 			if (psce->psce_references == 1) {
5616 				kmem_free(psce->psce_first,
5617 				    PSCCOUNT * sizeof (pm_state_change_t));
5618 				kmem_free(psce, sizeof (*psce));
5619 			} else {
5620 				psce->psce_references--;
5621 			}
5622 			kmem_free(p, sizeof (*p));
5623 		}
5624 	}
5625 	ASSERT(dip == NULL || found);
5626 	rw_exit(&pm_pscc_direct_rwlock);
5627 }
5628 
5629 /*
5630  * Search the indicated list for an entry that matches clone, and return a
5631  * pointer to it.  To be interesting, the entry must have something ready to
5632  * be passed up to the controlling process.
5633  * The returned entry will be locked upon return from this call.
5634  */
5635 static psce_t *
5636 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5637 {
5638 	pscc_t	*p;
5639 	psce_t	*psce;
5640 	rw_enter(lock, RW_READER);
5641 	for (p = *list; p; p = p->pscc_next) {
5642 		if (clone == p->pscc_clone) {
5643 			psce = p->pscc_entries;
5644 			mutex_enter(&psce->psce_lock);
5645 			if (psce->psce_out->size) {
5646 				rw_exit(lock);
5647 				return (psce);
5648 			} else {
5649 				mutex_exit(&psce->psce_lock);
5650 			}
5651 		}
5652 	}
5653 	rw_exit(lock);
5654 	return (NULL);
5655 }
5656 
5657 /*
5658  * Find an entry for a particular clone in the direct list.
5659  */
5660 psce_t *
5661 pm_psc_clone_to_direct(int clone)
5662 {
5663 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5664 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5665 	    &pm_pscc_direct_rwlock));
5666 }
5667 
5668 /*
5669  * Find an entry for a particular clone in the interest list.
5670  */
5671 psce_t *
5672 pm_psc_clone_to_interest(int clone)
5673 {
5674 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5675 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5676 	    &pm_pscc_interest_rwlock));
5677 }
5678 
5679 /*
5680  * Put the given entry at the head of the blocked list
5681  */
5682 void
5683 pm_enqueue_blocked(pm_rsvp_t *p)
5684 {
5685 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5686 	ASSERT(p->pr_next == NULL);
5687 	ASSERT(p->pr_prev == NULL);
5688 	if (pm_blocked_list != NULL) {
5689 		p->pr_next = pm_blocked_list;
5690 		ASSERT(pm_blocked_list->pr_prev == NULL);
5691 		pm_blocked_list->pr_prev = p;
5692 		pm_blocked_list = p;
5693 	} else {
5694 		pm_blocked_list = p;
5695 	}
5696 }
5697 
5698 /*
5699  * Sets every power managed device back to its default threshold
5700  */
5701 void
5702 pm_all_to_default_thresholds(void)
5703 {
5704 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5705 	    (void *) &pm_system_idle_threshold);
5706 }
5707 
5708 static int
5709 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5710 {
5711 	int thr = (int)(*(int *)arg);
5712 
5713 	if (!PM_GET_PM_INFO(dip))
5714 		return (DDI_WALK_CONTINUE);
5715 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5716 	return (DDI_WALK_CONTINUE);
5717 }
5718 
5719 /*
5720  * Returns the current threshold value (in seconds) for the indicated component
5721  */
5722 int
5723 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5724 {
5725 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5726 		return (DDI_FAILURE);
5727 	} else {
5728 		*threshp = cur_threshold(dip, comp);
5729 		return (DDI_SUCCESS);
5730 	}
5731 }
5732 
5733 /*
5734  * To be called when changing the power level of a component of a device.
5735  * On some platforms, changing power on one device may require that power
5736  * be changed on other, related devices in the same transaction.  Thus, we
5737  * always pass this request to the platform power manager so that all the
5738  * affected devices will be locked.
5739  */
5740 void
5741 pm_lock_power(dev_info_t *dip, int *circp)
5742 {
5743 	power_req_t power_req;
5744 	int result;
5745 
5746 	power_req.request_type = PMR_PPM_LOCK_POWER;
5747 	power_req.req.ppm_lock_power_req.who = dip;
5748 	power_req.req.ppm_lock_power_req.circp = circp;
5749 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5750 }
5751 
5752 /*
5753  * Release the lock (or locks) acquired to change the power of a device.
5754  * See comments for pm_lock_power.
5755  */
5756 void
5757 pm_unlock_power(dev_info_t *dip, int circ)
5758 {
5759 	power_req_t power_req;
5760 	int result;
5761 
5762 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5763 	power_req.req.ppm_unlock_power_req.who = dip;
5764 	power_req.req.ppm_unlock_power_req.circ = circ;
5765 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5766 }
5767 
5768 
5769 /*
5770  * Attempt (without blocking) to acquire the lock(s) needed to change the
5771  * power of a component of a device.  See comments for pm_lock_power.
5772  *
5773  * Return: 1 if lock(s) acquired, 0 if not.
5774  */
5775 int
5776 pm_try_locking_power(dev_info_t *dip, int *circp)
5777 {
5778 	power_req_t power_req;
5779 	int result;
5780 
5781 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5782 	power_req.req.ppm_lock_power_req.who = dip;
5783 	power_req.req.ppm_lock_power_req.circp = circp;
5784 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5785 	return (result);
5786 }
5787 
5788 
5789 /*
5790  * Lock power state of a device.
5791  *
5792  * The implementation handles a special case where another thread may have
5793  * acquired the lock and created/launched this thread to do the work.  If
5794  * the lock cannot be acquired immediately, we check to see if this thread
5795  * is registered as a borrower of the lock.  If so, we may proceed without
5796  * the lock.  This assumes that the lending thread blocks on the completion
5797  * of this thread.
5798  *
5799  * Note 1: for use by ppm only.
5800  *
5801  * Note 2: On failing to get the lock immediately, we search lock_loan list
5802  * for curthread (as borrower of the lock).  On a hit, we check that the
5803  * lending thread already owns the lock we want.  It is safe to compare
5804  * devi_busy_thread and thread id of the lender because in the == case (the
5805  * only one we care about) we know that the owner is blocked.  Similarly,
5806  * If we find that curthread isn't registered as a lock borrower, it is safe
5807  * to use the blocking call (ndi_devi_enter) because we know that if we
5808  * weren't already listed as a borrower (upstream on the call stack) we won't
5809  * become one.
5810  */
5811 void
5812 pm_lock_power_single(dev_info_t *dip, int *circp)
5813 {
5814 	lock_loan_t *cur;
5815 
5816 	/* if the lock is available, we are done. */
5817 	if (ndi_devi_tryenter(dip, circp))
5818 		return;
5819 
5820 	mutex_enter(&pm_loan_lock);
5821 	/* see if our thread is registered as a lock borrower. */
5822 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5823 		if (cur->pmlk_borrower == curthread)
5824 			break;
5825 	mutex_exit(&pm_loan_lock);
5826 
5827 	/* if this thread not already registered, it is safe to block */
5828 	if (cur == NULL)
5829 		ndi_devi_enter(dip, circp);
5830 	else {
5831 		/* registered: does lender own the lock we want? */
5832 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5833 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5834 			cur->pmlk_dip = dip;
5835 		} else /* no: just block for it */
5836 			ndi_devi_enter(dip, circp);
5837 
5838 	}
5839 }
5840 
5841 /*
5842  * Drop the lock on the device's power state.  See comment for
5843  * pm_lock_power_single() for special implementation considerations.
5844  *
5845  * Note: for use by ppm only.
5846  */
5847 void
5848 pm_unlock_power_single(dev_info_t *dip, int circ)
5849 {
5850 	lock_loan_t *cur;
5851 
5852 	/* optimization: mutex not needed to check empty list */
5853 	if (lock_loan_head.pmlk_next == NULL) {
5854 		ndi_devi_exit(dip, circ);
5855 		return;
5856 	}
5857 
5858 	mutex_enter(&pm_loan_lock);
5859 	/* see if our thread is registered as a lock borrower. */
5860 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5861 		if (cur->pmlk_borrower == curthread)
5862 			break;
5863 	mutex_exit(&pm_loan_lock);
5864 
5865 	if (cur == NULL || cur->pmlk_dip != dip)
5866 		/* we acquired the lock directly, so return it */
5867 		ndi_devi_exit(dip, circ);
5868 }
5869 
5870 /*
5871  * Try to take the lock for changing the power level of a component.
5872  *
5873  * Note: for use by ppm only.
5874  */
5875 int
5876 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5877 {
5878 	return (ndi_devi_tryenter(dip, circp));
5879 }
5880 
5881 #ifdef	DEBUG
5882 /*
5883  * The following are used only to print out data structures for debugging
5884  */
5885 void
5886 prdeps(char *msg)
5887 {
5888 
5889 	pm_pdr_t *rp;
5890 	int i;
5891 
5892 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
5893 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
5894 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
5895 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
5896 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
5897 		    (void *)rp->pdr_next);
5898 		if (rp->pdr_kept_count != 0) {
5899 			pm_log("kept list = ");
5900 			i = 0;
5901 			while (i < rp->pdr_kept_count) {
5902 				pm_log("%s ", rp->pdr_kept_paths[i]);
5903 				i++;
5904 			}
5905 			pm_log("\n");
5906 		}
5907 	}
5908 }
5909 
5910 void
5911 pr_noinvol(char *hdr)
5912 {
5913 	pm_noinvol_t *ip;
5914 
5915 	pm_log("%s\n", hdr);
5916 	rw_enter(&pm_noinvol_rwlock, RW_READER);
5917 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
5918 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
5919 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
5920 	rw_exit(&pm_noinvol_rwlock);
5921 }
5922 #endif
5923 
5924 /*
5925  * Attempt to apply the thresholds indicated by rp to the node specified by
5926  * dip.
5927  */
5928 void
5929 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5930 {
5931 	PMD_FUNC(pmf, "apply_recorded_thresh")
5932 	int i, j;
5933 	int comps = PM_NUMCMPTS(dip);
5934 	struct pm_component *cp;
5935 	pm_pte_t *ep;
5936 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
5937 
5938 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
5939 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
5940 	PM_LOCK_DIP(dip);
5941 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
5942 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
5943 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
5944 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
5945 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
5946 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
5947 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
5948 		PM_UNLOCK_DIP(dip);
5949 		return;
5950 	}
5951 
5952 	ep = rp->ptr_entries;
5953 	/*
5954 	 * Here we do the special case of a device threshold
5955 	 */
5956 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
5957 		ASSERT(ep && ep->pte_numthresh == 1);
5958 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
5959 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
5960 		PM_UNLOCK_DIP(dip);
5961 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
5962 		if (autopm_enabled)
5963 			pm_rescan(dip);
5964 		return;
5965 	}
5966 	for (i = 0; i < comps; i++) {
5967 		cp = PM_CP(dip, i);
5968 		for (j = 0; j < ep->pte_numthresh; j++) {
5969 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
5970 			    "to %x\n", pmf, j, PM_DEVICE(dip),
5971 			    i, ep->pte_thresh[j]))
5972 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
5973 		}
5974 		ep++;
5975 	}
5976 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
5977 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
5978 	PM_UNLOCK_DIP(dip);
5979 
5980 	if (autopm_enabled)
5981 		pm_rescan(dip);
5982 }
5983 
5984 /*
5985  * Returns true if the threshold specified by rp could be applied to dip
5986  * (that is, the number of components and transitions are the same)
5987  */
5988 int
5989 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5990 {
5991 	PMD_FUNC(pmf, "valid_thresh")
5992 	int comps, i;
5993 	pm_component_t *cp;
5994 	pm_pte_t *ep;
5995 
5996 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
5997 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
5998 		    rp->ptr_physpath))
5999 		return (0);
6000 	}
6001 	/*
6002 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6003 	 * an entry with numcomps == 0, (since we don't know how many
6004 	 * components there are in advance).  This is always a valid
6005 	 * spec.
6006 	 */
6007 	if (rp->ptr_numcomps == 0) {
6008 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6009 		return (1);
6010 	}
6011 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6012 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6013 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6014 		return (0);
6015 	}
6016 	ep = rp->ptr_entries;
6017 	for (i = 0; i < comps; i++) {
6018 		cp = PM_CP(dip, i);
6019 		if ((ep + i)->pte_numthresh !=
6020 		    cp->pmc_comp.pmc_numlevels - 1) {
6021 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6022 			    pmf, rp->ptr_physpath, i,
6023 			    cp->pmc_comp.pmc_numlevels - 1,
6024 			    (ep + i)->pte_numthresh))
6025 			return (0);
6026 		}
6027 	}
6028 	return (1);
6029 }
6030 
6031 /*
6032  * Remove any recorded threshold for device physpath
6033  * We know there will be at most one.
6034  */
6035 void
6036 pm_unrecord_threshold(char *physpath)
6037 {
6038 	pm_thresh_rec_t *pptr, *ptr;
6039 
6040 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6041 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6042 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6043 			if (pptr) {
6044 				pptr->ptr_next = ptr->ptr_next;
6045 			} else {
6046 				ASSERT(pm_thresh_head == ptr);
6047 				pm_thresh_head = ptr->ptr_next;
6048 			}
6049 			kmem_free(ptr, ptr->ptr_size);
6050 			break;
6051 		}
6052 		pptr = ptr;
6053 	}
6054 	rw_exit(&pm_thresh_rwlock);
6055 }
6056 
6057 /*
6058  * Discard all recorded thresholds.  We are returning to the default pm state.
6059  */
6060 void
6061 pm_discard_thresholds(void)
6062 {
6063 	pm_thresh_rec_t *rp;
6064 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6065 	while (pm_thresh_head) {
6066 		rp = pm_thresh_head;
6067 		pm_thresh_head = rp->ptr_next;
6068 		kmem_free(rp, rp->ptr_size);
6069 	}
6070 	rw_exit(&pm_thresh_rwlock);
6071 }
6072 
6073 /*
6074  * Discard all recorded dependencies.  We are returning to the default pm state.
6075  */
6076 void
6077 pm_discard_dependencies(void)
6078 {
6079 	pm_pdr_t *rp;
6080 	int i;
6081 	size_t length;
6082 
6083 #ifdef DEBUG
6084 	if (pm_debug & PMD_DEP)
6085 		prdeps("Before discard\n");
6086 #endif
6087 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6088 
6089 #ifdef DEBUG
6090 	if (pm_debug & PMD_DEP)
6091 		prdeps("After discard\n");
6092 #endif
6093 	while (pm_dep_head) {
6094 		rp = pm_dep_head;
6095 		if (!rp->pdr_isprop) {
6096 			ASSERT(rp->pdr_satisfied == 0);
6097 			ASSERT(pm_unresolved_deps);
6098 			pm_unresolved_deps--;
6099 		} else {
6100 			ASSERT(pm_prop_deps);
6101 			pm_prop_deps--;
6102 		}
6103 		pm_dep_head = rp->pdr_next;
6104 		if (rp->pdr_kept_count)  {
6105 			for (i = 0; i < rp->pdr_kept_count; i++) {
6106 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6107 				kmem_free(rp->pdr_kept_paths[i], length);
6108 			}
6109 			kmem_free(rp->pdr_kept_paths,
6110 				rp->pdr_kept_count * sizeof (char **));
6111 		}
6112 		kmem_free(rp, rp->pdr_size);
6113 	}
6114 }
6115 
6116 
6117 static int
6118 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6119 {
6120 	_NOTE(ARGUNUSED(arg))
6121 	char *pathbuf;
6122 
6123 	if (PM_GET_PM_INFO(dip) == NULL)
6124 		return (DDI_WALK_CONTINUE);
6125 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6126 	(void) ddi_pathname(dip, pathbuf);
6127 	pm_free_keeper(pathbuf, 0);
6128 	kmem_free(pathbuf, MAXPATHLEN);
6129 	return (DDI_WALK_CONTINUE);
6130 }
6131 
6132 static int
6133 pm_kept_walk(dev_info_t *dip, void *arg)
6134 {
6135 	_NOTE(ARGUNUSED(arg))
6136 	char *pathbuf;
6137 
6138 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6139 	(void) ddi_pathname(dip, pathbuf);
6140 	(void) pm_kept(pathbuf);
6141 	kmem_free(pathbuf, MAXPATHLEN);
6142 
6143 	return (DDI_WALK_CONTINUE);
6144 }
6145 
6146 static int
6147 pm_keeper_walk(dev_info_t *dip, void *arg)
6148 {
6149 	_NOTE(ARGUNUSED(arg))
6150 	char *pathbuf;
6151 
6152 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6153 	(void) ddi_pathname(dip, pathbuf);
6154 	(void) pm_keeper(pathbuf);
6155 	kmem_free(pathbuf, MAXPATHLEN);
6156 
6157 	return (DDI_WALK_CONTINUE);
6158 }
6159 
6160 static char *
6161 pdw_type_decode(int type)
6162 {
6163 	switch (type) {
6164 	case PM_DEP_WK_POWER_ON:
6165 		return ("power on");
6166 	case PM_DEP_WK_POWER_OFF:
6167 		return ("power off");
6168 	case PM_DEP_WK_DETACH:
6169 		return ("detach");
6170 	case PM_DEP_WK_REMOVE_DEP:
6171 		return ("remove dep");
6172 	case PM_DEP_WK_BRINGUP_SELF:
6173 		return ("bringup self");
6174 	case PM_DEP_WK_RECORD_KEEPER:
6175 		return ("add dependent");
6176 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6177 		return ("add dependent property");
6178 	case PM_DEP_WK_KEPT:
6179 		return ("kept");
6180 	case PM_DEP_WK_KEEPER:
6181 		return ("keeper");
6182 	case PM_DEP_WK_ATTACH:
6183 		return ("attach");
6184 	case PM_DEP_WK_CHECK_KEPT:
6185 		return ("check kept");
6186 	case PM_DEP_WK_CPR_SUSPEND:
6187 		return ("suspend");
6188 	case PM_DEP_WK_CPR_RESUME:
6189 		return ("resume");
6190 	default:
6191 		return ("unknown");
6192 	}
6193 
6194 }
6195 
6196 static void
6197 pm_rele_dep(char *keeper)
6198 {
6199 	PMD_FUNC(pmf, "rele_dep")
6200 	pm_pdr_t *dp;
6201 	char *kept_path = NULL;
6202 	dev_info_t *kept = NULL;
6203 	int count = 0;
6204 
6205 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6206 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6207 			continue;
6208 		for (count = 0; count < dp->pdr_kept_count; count++) {
6209 			kept_path = dp->pdr_kept_paths[count];
6210 			if (kept_path == NULL)
6211 				continue;
6212 			kept = pm_name_to_dip(kept_path, 1);
6213 			if (kept) {
6214 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6215 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6216 				    keeper))
6217 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6218 				pm_rele_power(kept);
6219 				ddi_release_devi(kept);
6220 			}
6221 		}
6222 	}
6223 }
6224 
6225 /*
6226  * Called when we are just released from direct PM.  Bring ourself up
6227  * if our keeper is up since dependency is not honored while a kept
6228  * device is under direct PM.
6229  */
6230 static void
6231 pm_bring_self_up(char *keptpath)
6232 {
6233 	PMD_FUNC(pmf, "bring_self_up")
6234 	dev_info_t *kept;
6235 	dev_info_t *keeper;
6236 	pm_pdr_t *dp;
6237 	int i, j;
6238 	int up = 0, circ;
6239 
6240 	kept = pm_name_to_dip(keptpath, 1);
6241 	if (kept == NULL)
6242 		return;
6243 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6244 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6245 		if (dp->pdr_kept_count == 0)
6246 			continue;
6247 		for (i = 0; i < dp->pdr_kept_count; i++) {
6248 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6249 				continue;
6250 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6251 			if (keeper) {
6252 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6253 				    pmf, PM_DEVICE(keeper)))
6254 				PM_LOCK_POWER(keeper, &circ);
6255 				for (j = 0; j < PM_NUMCMPTS(keeper);
6256 				    j++) {
6257 					if (PM_CURPOWER(keeper, j)) {
6258 						PMD(PMD_KEEPS, ("%s: comp="
6259 						    "%d is up\n", pmf, j))
6260 						up++;
6261 					}
6262 				}
6263 				if (up) {
6264 					if (PM_SKBU(kept))
6265 						DEVI(kept)->devi_pm_flags &=
6266 						    ~PMC_SKIP_BRINGUP;
6267 					bring_pmdep_up(kept, 1);
6268 				}
6269 				PM_UNLOCK_POWER(keeper, circ);
6270 				ddi_release_devi(keeper);
6271 			}
6272 		}
6273 	}
6274 	ddi_release_devi(kept);
6275 }
6276 
6277 static void
6278 pm_process_dep_request(pm_dep_wk_t *work)
6279 {
6280 	PMD_FUNC(pmf, "dep_req")
6281 	int ret;
6282 
6283 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6284 	    pdw_type_decode(work->pdw_type)))
6285 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6286 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6287 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6288 
6289 	switch (work->pdw_type) {
6290 	case PM_DEP_WK_POWER_ON:
6291 		/* Bring up the kept devices and put a hold on them */
6292 		bring_wekeeps_up(work->pdw_keeper);
6293 		break;
6294 	case PM_DEP_WK_POWER_OFF:
6295 		/* Release the kept devices */
6296 		pm_rele_dep(work->pdw_keeper);
6297 		break;
6298 	case PM_DEP_WK_DETACH:
6299 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6300 		break;
6301 	case PM_DEP_WK_REMOVE_DEP:
6302 		pm_discard_dependencies();
6303 		break;
6304 	case PM_DEP_WK_BRINGUP_SELF:
6305 		/*
6306 		 * We deferred satisfying our dependency till now, so satisfy
6307 		 * it again and bring ourselves up.
6308 		 */
6309 		pm_bring_self_up(work->pdw_kept);
6310 		break;
6311 	case PM_DEP_WK_RECORD_KEEPER:
6312 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6313 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6314 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6315 		break;
6316 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6317 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6318 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6319 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6320 		break;
6321 	case PM_DEP_WK_KEPT:
6322 		ret = pm_kept(work->pdw_kept);
6323 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6324 		    ret))
6325 		break;
6326 	case PM_DEP_WK_KEEPER:
6327 		ret = pm_keeper(work->pdw_keeper);
6328 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6329 		    pmf, ret))
6330 		break;
6331 	case PM_DEP_WK_ATTACH:
6332 		ret = pm_keeper(work->pdw_keeper);
6333 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6334 		    pmf, ret))
6335 		ret = pm_kept(work->pdw_kept);
6336 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6337 		    pmf, ret))
6338 		break;
6339 	case PM_DEP_WK_CHECK_KEPT:
6340 		ret = pm_is_kept(work->pdw_kept);
6341 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6342 		    pmf, work->pdw_kept, ret))
6343 		break;
6344 	case PM_DEP_WK_CPR_SUSPEND:
6345 		pm_discard_dependencies();
6346 		break;
6347 	case PM_DEP_WK_CPR_RESUME:
6348 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6349 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6350 		break;
6351 	default:
6352 		ASSERT(0);
6353 		break;
6354 	}
6355 	/*
6356 	 * Free the work structure if the requester is not waiting
6357 	 * Otherwise it is the requester's responsiblity to free it.
6358 	 */
6359 	if (!work->pdw_wait) {
6360 		if (work->pdw_keeper)
6361 			kmem_free(work->pdw_keeper,
6362 			    strlen(work->pdw_keeper) + 1);
6363 		if (work->pdw_kept)
6364 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6365 		kmem_free(work, sizeof (pm_dep_wk_t));
6366 	} else {
6367 		/*
6368 		 * Notify requester if it is waiting for it.
6369 		 */
6370 		work->pdw_ret = ret;
6371 		work->pdw_done = 1;
6372 		cv_signal(&work->pdw_cv);
6373 	}
6374 }
6375 
6376 /*
6377  * Process PM dependency requests.
6378  */
6379 static void
6380 pm_dep_thread(void)
6381 {
6382 	pm_dep_wk_t *work;
6383 	callb_cpr_t cprinfo;
6384 
6385 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6386 	    "pm_dep_thread");
6387 	for (;;) {
6388 		mutex_enter(&pm_dep_thread_lock);
6389 		if (pm_dep_thread_workq == NULL) {
6390 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6391 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6392 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6393 		}
6394 		work = pm_dep_thread_workq;
6395 		pm_dep_thread_workq = work->pdw_next;
6396 		if (pm_dep_thread_tail == work)
6397 			pm_dep_thread_tail = work->pdw_next;
6398 		mutex_exit(&pm_dep_thread_lock);
6399 		pm_process_dep_request(work);
6400 
6401 	}
6402 	/*NOTREACHED*/
6403 }
6404 
6405 /*
6406  * Set the power level of the indicated device to unknown (if it is not a
6407  * backwards compatible device), as it has just been resumed, and it won't
6408  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6409  */
6410 void
6411 pm_forget_power_level(dev_info_t *dip)
6412 {
6413 	dev_info_t *pdip = ddi_get_parent(dip);
6414 	int i, count = 0;
6415 
6416 	if (!PM_ISBC(dip)) {
6417 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6418 			count += (PM_CURPOWER(dip, i) == 0);
6419 
6420 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6421 			e_pm_hold_rele_power(pdip, count);
6422 
6423 		/*
6424 		 * Count this as a power cycle if we care
6425 		 */
6426 		if (DEVI(dip)->devi_pm_volpmd &&
6427 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6428 			DEVI(dip)->devi_pm_volpmd = 0;
6429 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6430 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6431 	}
6432 }
6433 
6434 /*
6435  * This function advises the caller whether it should make a power-off
6436  * transition at this time or not.  If the transition is not advised
6437  * at this time, the time that the next power-off transition can
6438  * be made from now is returned through "intervalp" pointer.
6439  * This function returns:
6440  *
6441  *  1  power-off advised
6442  *  0  power-off not advised, intervalp will point to seconds from
6443  *	  now that a power-off is advised.  If it is passed the number
6444  *	  of years that policy specifies the device should last,
6445  *	  a large number is returned as the time interval.
6446  *  -1  error
6447  */
6448 int
6449 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6450 {
6451 	PMD_FUNC(pmf, "pm_trans_check")
6452 	char dbuf[DC_SCSI_MFR_LEN];
6453 	struct pm_scsi_cycles *scp;
6454 	int service_years, service_weeks, full_years;
6455 	time_t now, service_seconds, tdiff;
6456 	time_t within_year, when_allowed;
6457 	char *ptr;
6458 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6459 	int cycles_diff, cycles_over;
6460 
6461 	if (datap == NULL) {
6462 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6463 		return (-1);
6464 	}
6465 
6466 	if (datap->format == DC_SCSI_FORMAT) {
6467 		/*
6468 		 * Power cycles of the scsi drives are distributed
6469 		 * over 5 years with the following percentage ratio:
6470 		 *
6471 		 *	30%, 25%, 20%, 15%, and 10%
6472 		 *
6473 		 * The power cycle quota for each year is distributed
6474 		 * linearly through out the year.  The equation for
6475 		 * determining the expected cycles is:
6476 		 *
6477 		 *	e = a * (n / y)
6478 		 *
6479 		 * e = expected cycles
6480 		 * a = allocated cycles for this year
6481 		 * n = number of seconds since beginning of this year
6482 		 * y = number of seconds in a year
6483 		 *
6484 		 * Note that beginning of the year starts the day that
6485 		 * the drive has been put on service.
6486 		 *
6487 		 * If the drive has passed its expected cycles, we
6488 		 * can determine when it can start to power cycle
6489 		 * again to keep it on track to meet the 5-year
6490 		 * life expectancy.  The equation for determining
6491 		 * when to power cycle is:
6492 		 *
6493 		 *	w = y * (c / a)
6494 		 *
6495 		 * w = when it can power cycle again
6496 		 * y = number of seconds in a year
6497 		 * c = current number of cycles
6498 		 * a = allocated cycles for the year
6499 		 *
6500 		 */
6501 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6502 
6503 		scp = &datap->un.scsi_cycles;
6504 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6505 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6506 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6507 		if (scp->ncycles < 0 || scp->flag != 0) {
6508 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6509 			return (-1);
6510 		}
6511 
6512 		if (scp->ncycles > scp->lifemax) {
6513 			*intervalp = (LONG_MAX / hz);
6514 			return (0);
6515 		}
6516 
6517 		/*
6518 		 * convert service date to time_t
6519 		 */
6520 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6521 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6522 		ptr = dbuf;
6523 		service_years = stoi(&ptr) - EPOCH_YEAR;
6524 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6525 		    DC_SCSI_WEEK_LEN);
6526 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6527 
6528 		/*
6529 		 * scsi standard does not specify WW data,
6530 		 * could be (00-51) or (01-52)
6531 		 */
6532 		ptr = dbuf;
6533 		service_weeks = stoi(&ptr);
6534 		if (service_years < 0 ||
6535 		    service_weeks < 0 || service_weeks > 52) {
6536 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6537 			    pmf, service_years, service_weeks))
6538 			return (-1);
6539 		}
6540 
6541 		/*
6542 		 * calculate service date in seconds-since-epoch,
6543 		 * adding one day for each leap-year.
6544 		 *
6545 		 * (years-since-epoch + 2) fixes integer truncation,
6546 		 * example: (8) leap-years during [1972, 2000]
6547 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6548 		 */
6549 		service_seconds = (service_years * DC_SPY) +
6550 		    (service_weeks * DC_SPW) +
6551 		    (((service_years + 2) / 4) * DC_SPD);
6552 
6553 		now = gethrestime_sec();
6554 		/*
6555 		 * since the granularity of 'svc_date' is day not second,
6556 		 * 'now' should be rounded up to full day.
6557 		 */
6558 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6559 		if (service_seconds > now) {
6560 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6561 			    "than now (%ld)!\n", pmf, service_seconds, now))
6562 			return (-1);
6563 		}
6564 
6565 		tdiff = now - service_seconds;
6566 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6567 
6568 		/*
6569 		 * NOTE - Leap years are not considered in the calculations
6570 		 * below.
6571 		 */
6572 		full_years = (tdiff / DC_SPY);
6573 		if ((full_years >= DC_SCSI_NPY) &&
6574 		    (scp->ncycles <= scp->lifemax))
6575 			return (1);
6576 
6577 		/*
6578 		 * Determine what is the normal cycle usage for the
6579 		 * device at the beginning and the end of this year.
6580 		 */
6581 		lower_bound_cycles = (!full_years) ? 0 :
6582 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6583 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6584 
6585 		if (scp->ncycles <= lower_bound_cycles)
6586 			return (1);
6587 
6588 		/*
6589 		 * The linear slope that determines how many cycles
6590 		 * are allowed this year is number of seconds
6591 		 * passed this year over total number of seconds in a year.
6592 		 */
6593 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6594 		within_year = (tdiff % DC_SPY);
6595 		cycles_allowed = lower_bound_cycles +
6596 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6597 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6598 		    full_years, within_year))
6599 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6600 		    cycles_allowed))
6601 
6602 		if (scp->ncycles <= cycles_allowed)
6603 			return (1);
6604 
6605 		/*
6606 		 * The transition is not advised now but we can
6607 		 * determine when the next transition can be made.
6608 		 *
6609 		 * Depending on how many cycles the device has been
6610 		 * over-used, we may need to skip years with
6611 		 * different percentage quota in order to determine
6612 		 * when the next transition can be made.
6613 		 */
6614 		cycles_over = (scp->ncycles - lower_bound_cycles);
6615 		while (cycles_over > cycles_diff) {
6616 			full_years++;
6617 			if (full_years >= DC_SCSI_NPY) {
6618 				*intervalp = (LONG_MAX / hz);
6619 				return (0);
6620 			}
6621 			cycles_over -= cycles_diff;
6622 			lower_bound_cycles = upper_bound_cycles;
6623 			upper_bound_cycles =
6624 			    (scp->lifemax * pcnt[full_years]) / 100;
6625 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6626 		}
6627 
6628 		/*
6629 		 * The linear slope that determines when the next transition
6630 		 * can be made is the relative position of used cycles within a
6631 		 * year over total number of cycles within that year.
6632 		 */
6633 		when_allowed = service_seconds + (full_years * DC_SPY) +
6634 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6635 		*intervalp = (when_allowed - now);
6636 		if (*intervalp > (LONG_MAX / hz))
6637 			*intervalp = (LONG_MAX / hz);
6638 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6639 		    *intervalp))
6640 		return (0);
6641 	}
6642 
6643 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6644 	return (-1);
6645 }
6646 
6647 /*
6648  * Nexus drivers call into pm framework to indicate which child driver is about
6649  * to be installed.  In some platforms, ppm may need to configure the hardware
6650  * for successful installation of a driver.
6651  */
6652 int
6653 pm_init_child(dev_info_t *dip)
6654 {
6655 	power_req_t power_req;
6656 
6657 	ASSERT(ddi_binding_name(dip));
6658 	ASSERT(ddi_get_name_addr(dip));
6659 	pm_ppm_claim(dip);
6660 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6661 		power_req.request_type = PMR_PPM_INIT_CHILD;
6662 		power_req.req.ppm_config_req.who = dip;
6663 		ASSERT(PPM(dip) != NULL);
6664 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6665 		    NULL));
6666 	} else {
6667 #ifdef DEBUG
6668 		/* pass it to the default handler so we can debug things */
6669 		power_req.request_type = PMR_PPM_INIT_CHILD;
6670 		power_req.req.ppm_config_req.who = dip;
6671 		(void) pm_ctlops(NULL, dip,
6672 		    DDI_CTLOPS_POWER, &power_req, NULL);
6673 #endif
6674 	}
6675 	return (DDI_SUCCESS);
6676 }
6677 
6678 /*
6679  * Bring parent of a node that is about to be probed up to full power, and
6680  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6681  * it is time to let it go down again
6682  */
6683 void
6684 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6685 {
6686 	int result;
6687 	power_req_t power_req;
6688 
6689 	bzero(cp, sizeof (*cp));
6690 	cp->ppc_dip = dip;
6691 
6692 	pm_ppm_claim(dip);
6693 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6694 		power_req.request_type = PMR_PPM_PRE_PROBE;
6695 		power_req.req.ppm_config_req.who = dip;
6696 		ASSERT(PPM(dip) != NULL);
6697 		(void) pm_ctlops(PPM(dip), dip,
6698 		    DDI_CTLOPS_POWER, &power_req, &result);
6699 		cp->ppc_ppm = PPM(dip);
6700 	} else {
6701 #ifdef DEBUG
6702 		/* pass it to the default handler so we can debug things */
6703 		power_req.request_type = PMR_PPM_PRE_PROBE;
6704 		power_req.req.ppm_config_req.who = dip;
6705 		(void) pm_ctlops(NULL, dip,
6706 		    DDI_CTLOPS_POWER, &power_req, &result);
6707 #endif
6708 		cp->ppc_ppm = NULL;
6709 	}
6710 }
6711 
6712 int
6713 pm_pre_config(dev_info_t *dip, char *devnm)
6714 {
6715 	PMD_FUNC(pmf, "pre_config")
6716 	int ret;
6717 
6718 	if (MDI_VHCI(dip)) {
6719 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6720 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6721 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6722 	} else if (!PM_GET_PM_INFO(dip))
6723 		return (DDI_SUCCESS);
6724 
6725 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6726 	pm_hold_power(dip);
6727 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6728 	if (ret != DDI_SUCCESS)
6729 		pm_rele_power(dip);
6730 	return (ret);
6731 }
6732 
6733 /*
6734  * This routine is called by devfs during its walk to unconfigue a node.
6735  * If the call is due to auto mod_unloads and the dip is not at its
6736  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6737  * return DDI_SUCCESS.
6738  */
6739 int
6740 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6741 {
6742 	PMD_FUNC(pmf, "pre_unconfig")
6743 	int ret;
6744 
6745 	if (MDI_VHCI(dip)) {
6746 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6747 		    PM_DEVICE(dip), flags))
6748 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6749 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6750 	} else if (!PM_GET_PM_INFO(dip))
6751 		return (DDI_SUCCESS);
6752 
6753 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6754 	    flags))
6755 	*held = 0;
6756 
6757 	/*
6758 	 * If the dip is a leaf node, don't power it up.
6759 	 */
6760 	if (!ddi_get_child(dip))
6761 		return (DDI_SUCCESS);
6762 
6763 	/*
6764 	 * Do not power up the node if it is called due to auto-modunload.
6765 	 */
6766 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6767 		return (DDI_FAILURE);
6768 
6769 	pm_hold_power(dip);
6770 	*held = 1;
6771 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6772 	if (ret != DDI_SUCCESS) {
6773 		pm_rele_power(dip);
6774 		*held = 0;
6775 	}
6776 	return (ret);
6777 }
6778 
6779 /*
6780  * Notify ppm of attach action.  Parent is already held at full power by
6781  * probe action.
6782  */
6783 void
6784 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6785 {
6786 	static char *me = "pm_pre_attach";
6787 	power_req_t power_req;
6788 	int result;
6789 
6790 	/*
6791 	 * Initialize and fill in the PPM cookie
6792 	 */
6793 	bzero(cp, sizeof (*cp));
6794 	cp->ppc_cmd = (int)cmd;
6795 	cp->ppc_ppm = PPM(dip);
6796 	cp->ppc_dip = dip;
6797 
6798 	/*
6799 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6800 	 * Power Management stuff. DDI_RESUME also has to purge it's
6801 	 * powerlevel information.
6802 	 */
6803 	switch (cmd) {
6804 	case DDI_ATTACH:
6805 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6806 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6807 			power_req.req.ppm_config_req.who = dip;
6808 			ASSERT(PPM(dip));
6809 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6810 			    &power_req, &result);
6811 		}
6812 #ifdef DEBUG
6813 		else {
6814 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6815 			power_req.req.ppm_config_req.who = dip;
6816 			(void) pm_ctlops(NULL, dip,
6817 			    DDI_CTLOPS_POWER, &power_req, &result);
6818 		}
6819 #endif
6820 		break;
6821 	case DDI_RESUME:
6822 		pm_forget_power_level(dip);
6823 
6824 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6825 			power_req.request_type = PMR_PPM_PRE_RESUME;
6826 			power_req.req.resume_req.who = cp->ppc_dip;
6827 			power_req.req.resume_req.cmd =
6828 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6829 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6830 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6831 			    DDI_CTLOPS_POWER, &power_req, &result);
6832 		}
6833 #ifdef DEBUG
6834 		else {
6835 			power_req.request_type = PMR_PPM_PRE_RESUME;
6836 			power_req.req.resume_req.who = cp->ppc_dip;
6837 			power_req.req.resume_req.cmd =
6838 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6839 			(void) pm_ctlops(NULL, cp->ppc_dip,
6840 			    DDI_CTLOPS_POWER, &power_req, &result);
6841 		}
6842 #endif
6843 		break;
6844 
6845 	case DDI_PM_RESUME:
6846 		break;
6847 
6848 	default:
6849 		panic(me);
6850 	}
6851 }
6852 
6853 /*
6854  * Nexus drivers call into pm framework to indicate which child driver is
6855  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6856  * hardware since the device driver is no longer installed.
6857  */
6858 int
6859 pm_uninit_child(dev_info_t *dip)
6860 {
6861 	power_req_t power_req;
6862 
6863 	ASSERT(ddi_binding_name(dip));
6864 	ASSERT(ddi_get_name_addr(dip));
6865 	pm_ppm_claim(dip);
6866 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6867 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6868 		power_req.req.ppm_config_req.who = dip;
6869 		ASSERT(PPM(dip));
6870 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6871 		    NULL));
6872 	} else {
6873 #ifdef DEBUG
6874 		/* pass it to the default handler so we can debug things */
6875 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6876 		power_req.req.ppm_config_req.who = dip;
6877 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6878 #endif
6879 	}
6880 	return (DDI_SUCCESS);
6881 }
6882 /*
6883  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
6884  * Also notify ppm of result of probe if there is a ppm that cares
6885  */
6886 void
6887 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
6888 {
6889 	_NOTE(ARGUNUSED(probe_failed))
6890 	int result;
6891 	power_req_t power_req;
6892 
6893 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6894 		power_req.request_type = PMR_PPM_POST_PROBE;
6895 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6896 		power_req.req.ppm_config_req.result = ret;
6897 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6898 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
6899 		    &power_req, &result);
6900 	}
6901 #ifdef DEBUG
6902 	else {
6903 		power_req.request_type = PMR_PPM_POST_PROBE;
6904 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6905 		power_req.req.ppm_config_req.result = ret;
6906 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
6907 		    &power_req, &result);
6908 	}
6909 #endif
6910 }
6911 
6912 void
6913 pm_post_config(dev_info_t *dip, char *devnm)
6914 {
6915 	PMD_FUNC(pmf, "post_config")
6916 
6917 	if (MDI_VHCI(dip)) {
6918 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6919 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
6920 		return;
6921 	} else if (!PM_GET_PM_INFO(dip))
6922 		return;
6923 
6924 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6925 	pm_rele_power(dip);
6926 }
6927 
6928 void
6929 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
6930 {
6931 	PMD_FUNC(pmf, "post_unconfig")
6932 
6933 	if (MDI_VHCI(dip)) {
6934 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
6935 		    PM_DEVICE(dip), held))
6936 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
6937 		return;
6938 	} else if (!PM_GET_PM_INFO(dip))
6939 		return;
6940 
6941 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
6942 	    held))
6943 	if (!held)
6944 		return;
6945 	/*
6946 	 * We have held power in pre_unconfig, release it here.
6947 	 */
6948 	pm_rele_power(dip);
6949 }
6950 
6951 /*
6952  * Notify ppm of result of attach if there is a ppm that cares
6953  */
6954 void
6955 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
6956 {
6957 	int result;
6958 	power_req_t power_req;
6959 	dev_info_t	*dip;
6960 
6961 	if (cp->ppc_cmd != DDI_ATTACH)
6962 		return;
6963 
6964 	dip = cp->ppc_dip;
6965 
6966 	if (ret == DDI_SUCCESS) {
6967 		/*
6968 		 * Attach succeeded, so proceed to doing post-attach pm tasks
6969 		 */
6970 		if (PM_GET_PM_INFO(dip) == NULL)
6971 			(void) pm_start(dip);
6972 	} else {
6973 		/*
6974 		 * Attach may have got pm started before failing
6975 		 */
6976 		pm_stop(dip);
6977 	}
6978 
6979 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6980 		power_req.request_type = PMR_PPM_POST_ATTACH;
6981 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6982 		power_req.req.ppm_config_req.result = ret;
6983 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6984 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6985 		    DDI_CTLOPS_POWER, &power_req, &result);
6986 	}
6987 #ifdef DEBUG
6988 	else {
6989 		power_req.request_type = PMR_PPM_POST_ATTACH;
6990 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6991 		power_req.req.ppm_config_req.result = ret;
6992 		(void) pm_ctlops(NULL, cp->ppc_dip,
6993 		    DDI_CTLOPS_POWER, &power_req, &result);
6994 	}
6995 #endif
6996 }
6997 
6998 /*
6999  * Notify ppm of attach action.  Parent is already held at full power by
7000  * probe action.
7001  */
7002 void
7003 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7004 {
7005 	int result;
7006 	power_req_t power_req;
7007 
7008 	bzero(cp, sizeof (*cp));
7009 	cp->ppc_dip = dip;
7010 	cp->ppc_cmd = (int)cmd;
7011 
7012 	switch (cmd) {
7013 	case DDI_DETACH:
7014 		pm_detaching(dip);		/* suspend pm while detaching */
7015 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7016 			power_req.request_type = PMR_PPM_PRE_DETACH;
7017 			power_req.req.ppm_config_req.who = dip;
7018 			ASSERT(PPM(dip));
7019 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7020 			    &power_req, &result);
7021 			cp->ppc_ppm = PPM(dip);
7022 		} else {
7023 #ifdef DEBUG
7024 			/* pass to the default handler so we can debug things */
7025 			power_req.request_type = PMR_PPM_PRE_DETACH;
7026 			power_req.req.ppm_config_req.who = dip;
7027 			(void) pm_ctlops(NULL, dip,
7028 			    DDI_CTLOPS_POWER, &power_req, &result);
7029 #endif
7030 			cp->ppc_ppm = NULL;
7031 		}
7032 		break;
7033 
7034 	default:
7035 		break;
7036 	}
7037 }
7038 
7039 /*
7040  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7041  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7042  * make an entry to record the details, which includes certain flag settings.
7043  */
7044 static void
7045 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7046     int wasvolpmd, major_t major)
7047 {
7048 	PMD_FUNC(pmf, "record_invol_path")
7049 	major_t pm_path_to_major(char *);
7050 	size_t plen;
7051 	pm_noinvol_t *ip, *np, *pp;
7052 	pp = NULL;
7053 
7054 	plen = strlen(path) + 1;
7055 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7056 	np->ni_size = plen;
7057 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7058 	np->ni_noinvolpm = noinvolpm;
7059 	np->ni_volpmd = volpmd;
7060 	np->ni_wasvolpmd = wasvolpmd;
7061 	np->ni_flags = flags;
7062 	(void) strcpy(np->ni_path, path);
7063 	/*
7064 	 * If we haven't actually seen the node attached, it is hard to figure
7065 	 * out its major.  If we could hold the node by path, we would be much
7066 	 * happier here.
7067 	 */
7068 	if (major == (major_t)-1) {
7069 		np->ni_major = pm_path_to_major(path);
7070 	} else {
7071 		np->ni_major = major;
7072 	}
7073 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7074 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7075 		int comp = strcmp(path, ip->ni_path);
7076 		if (comp < 0) {
7077 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7078 			    pmf, path, ip->ni_path))
7079 			/* insert before current entry */
7080 			np->ni_next = ip;
7081 			if (pp) {
7082 				pp->ni_next = np;
7083 			} else {
7084 				pm_noinvol_head = np;
7085 			}
7086 			rw_exit(&pm_noinvol_rwlock);
7087 #ifdef DEBUG
7088 			if (pm_debug & PMD_NOINVOL)
7089 				pr_noinvol("record_invol_path exit0");
7090 #endif
7091 			return;
7092 		} else if (comp == 0) {
7093 			panic("%s already in pm_noinvol list", path);
7094 		}
7095 	}
7096 	/*
7097 	 * If we did not find an entry in the list that this should go before,
7098 	 * then it must go at the end
7099 	 */
7100 	if (pp) {
7101 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7102 		    pp->ni_path))
7103 		ASSERT(pp->ni_next == 0);
7104 		pp->ni_next = np;
7105 	} else {
7106 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7107 		ASSERT(!pm_noinvol_head);
7108 		pm_noinvol_head = np;
7109 	}
7110 	rw_exit(&pm_noinvol_rwlock);
7111 #ifdef DEBUG
7112 	if (pm_debug & PMD_NOINVOL)
7113 		pr_noinvol("record_invol_path exit");
7114 #endif
7115 }
7116 
7117 void
7118 pm_record_invol(dev_info_t *dip)
7119 {
7120 	char *pathbuf;
7121 	int pm_all_components_off(dev_info_t *);
7122 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7123 
7124 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7125 	(void) ddi_pathname(dip, pathbuf);
7126 
7127 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7128 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7129 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7130 
7131 	/*
7132 	 * If this child's detach will be holding up its ancestors, then we
7133 	 * allow for an exception to that if all children of this type have
7134 	 * gone down voluntarily.
7135 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7136 	 */
7137 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7138 	    dip);
7139 	kmem_free(pathbuf, MAXPATHLEN);
7140 }
7141 
7142 void
7143 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7144 {
7145 	dev_info_t *dip = cp->ppc_dip;
7146 	int result;
7147 	power_req_t power_req;
7148 
7149 	switch (cp->ppc_cmd) {
7150 	case DDI_DETACH:
7151 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7152 			power_req.request_type = PMR_PPM_POST_DETACH;
7153 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7154 			power_req.req.ppm_config_req.result = ret;
7155 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7156 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7157 			    DDI_CTLOPS_POWER, &power_req, &result);
7158 		}
7159 #ifdef DEBUG
7160 		else {
7161 			power_req.request_type = PMR_PPM_POST_DETACH;
7162 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7163 			power_req.req.ppm_config_req.result = ret;
7164 			(void) pm_ctlops(NULL, cp->ppc_dip,
7165 			    DDI_CTLOPS_POWER, &power_req, &result);
7166 		}
7167 #endif
7168 		if (ret == DDI_SUCCESS) {
7169 			/*
7170 			 * For hotplug detach we assume it is *really* gone
7171 			 */
7172 			if (cp->ppc_cmd == DDI_DETACH &&
7173 			    ((DEVI(dip)->devi_pm_flags &
7174 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7175 			    DEVI(dip)->devi_pm_noinvolpm))
7176 				pm_record_invol(dip);
7177 			DEVI(dip)->devi_pm_flags &=
7178 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7179 
7180 			/*
7181 			 * If console fb is detaching, then we don't need to
7182 			 * worry any more about it going off (pm_detaching has
7183 			 * brought up all components)
7184 			 */
7185 			if (PM_IS_CFB(dip)) {
7186 				mutex_enter(&pm_cfb_lock);
7187 				ASSERT(cfb_dip_detaching);
7188 				ASSERT(cfb_dip == NULL);
7189 				ASSERT(pm_cfb_comps_off == 0);
7190 				cfb_dip_detaching = NULL;
7191 				mutex_exit(&pm_cfb_lock);
7192 			}
7193 			pm_stop(dip);	/* make it permanent */
7194 		} else {
7195 			if (PM_IS_CFB(dip)) {
7196 				mutex_enter(&pm_cfb_lock);
7197 				ASSERT(cfb_dip_detaching);
7198 				ASSERT(cfb_dip == NULL);
7199 				ASSERT(pm_cfb_comps_off == 0);
7200 				cfb_dip = cfb_dip_detaching;
7201 				cfb_dip_detaching = NULL;
7202 				mutex_exit(&pm_cfb_lock);
7203 			}
7204 			pm_detach_failed(dip);	/* resume power management */
7205 		}
7206 		break;
7207 	case DDI_PM_SUSPEND:
7208 		break;
7209 	case DDI_SUSPEND:
7210 		break;				/* legal, but nothing to do */
7211 	default:
7212 #ifdef DEBUG
7213 		panic("pm_post_detach: unrecognized cmd %d for detach",
7214 		    cp->ppc_cmd);
7215 		/*NOTREACHED*/
7216 #else
7217 		break;
7218 #endif
7219 	}
7220 }
7221 
7222 /*
7223  * Called after vfs_mountroot has got the clock started to fix up timestamps
7224  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7225  * devices look busy but have a 0 busycnt
7226  */
7227 int
7228 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7229 {
7230 	_NOTE(ARGUNUSED(arg))
7231 
7232 	pm_info_t *info = PM_GET_PM_INFO(dip);
7233 	struct pm_component *cp;
7234 	int i;
7235 
7236 	if (!info)
7237 		return (DDI_WALK_CONTINUE);
7238 	PM_LOCK_BUSY(dip);
7239 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7240 		cp = PM_CP(dip, i);
7241 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7242 			cp->pmc_timestamp = gethrestime_sec();
7243 	}
7244 	PM_UNLOCK_BUSY(dip);
7245 	return (DDI_WALK_CONTINUE);
7246 }
7247 
7248 /*
7249  * Called at attach time to see if the device being attached has a record in
7250  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7251  * parents and set a flag in the dip
7252  */
7253 void
7254 pm_noinvol_specd(dev_info_t *dip)
7255 {
7256 	PMD_FUNC(pmf, "noinvol_specd")
7257 	char *pathbuf;
7258 	pm_noinvol_t *ip, *pp = NULL;
7259 	int wasvolpmd;
7260 	int found = 0;
7261 
7262 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7263 		return;
7264 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7265 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7266 	(void) ddi_pathname(dip, pathbuf);
7267 
7268 	PM_LOCK_DIP(dip);
7269 	DEVI(dip)->devi_pm_volpmd = 0;
7270 	DEVI(dip)->devi_pm_noinvolpm = 0;
7271 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7272 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7273 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7274 		    pmf, pathbuf, ip->ni_path))
7275 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7276 			found++;
7277 			break;
7278 		}
7279 	}
7280 	rw_exit(&pm_noinvol_rwlock);
7281 	if (!found) {
7282 		PM_UNLOCK_DIP(dip);
7283 		kmem_free(pathbuf, MAXPATHLEN);
7284 		return;
7285 	}
7286 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7287 	pp = NULL;
7288 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7289 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7290 		    pmf, pathbuf, ip->ni_path))
7291 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7292 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7293 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7294 			/*
7295 			 * Handle special case of console fb
7296 			 */
7297 			if (PM_IS_CFB(dip)) {
7298 				mutex_enter(&pm_cfb_lock);
7299 				cfb_dip = dip;
7300 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7301 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7302 				mutex_exit(&pm_cfb_lock);
7303 			}
7304 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7305 			ASSERT((DEVI(dip)->devi_pm_flags &
7306 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7307 			    DEVI(dip)->devi_pm_noinvolpm);
7308 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7309 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7310 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7311 			    ip->ni_noinvolpm, ip->ni_volpmd,
7312 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7313 			/*
7314 			 * free the entry in hopes the list will now be empty
7315 			 * and we won't have to search it any more until the
7316 			 * device detaches
7317 			 */
7318 			if (pp) {
7319 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7320 				    pmf, ip->ni_path, pp->ni_path))
7321 				pp->ni_next = ip->ni_next;
7322 			} else {
7323 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7324 				    pmf, ip->ni_path))
7325 				ASSERT(pm_noinvol_head == ip);
7326 				pm_noinvol_head = ip->ni_next;
7327 			}
7328 			PM_UNLOCK_DIP(dip);
7329 			wasvolpmd = ip->ni_wasvolpmd;
7330 			rw_exit(&pm_noinvol_rwlock);
7331 			kmem_free(ip->ni_path, ip->ni_size);
7332 			kmem_free(ip, sizeof (*ip));
7333 			/*
7334 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7335 			 * (and volpmd if appropriate)
7336 			 */
7337 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7338 			    wasvolpmd, pathbuf, dip);
7339 #ifdef DEBUG
7340 			if (pm_debug & PMD_NOINVOL)
7341 				pr_noinvol("noinvol_specd exit");
7342 #endif
7343 			kmem_free(pathbuf, MAXPATHLEN);
7344 			return;
7345 		}
7346 	}
7347 	kmem_free(pathbuf, MAXPATHLEN);
7348 	rw_exit(&pm_noinvol_rwlock);
7349 	PM_UNLOCK_DIP(dip);
7350 }
7351 
7352 int
7353 pm_all_components_off(dev_info_t *dip)
7354 {
7355 	int i;
7356 	pm_component_t *cp;
7357 
7358 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7359 		cp = PM_CP(dip, i);
7360 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7361 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7362 			return (0);
7363 	}
7364 	return (1);	/* all off */
7365 }
7366 
7367 /*
7368  * Make sure that all "no involuntary power cycles" devices are attached.
7369  * Called before doing a cpr suspend to make sure the driver has a say about
7370  * the power cycle
7371  */
7372 int
7373 pm_reattach_noinvol(void)
7374 {
7375 	PMD_FUNC(pmf, "reattach_noinvol")
7376 	pm_noinvol_t *ip;
7377 	char *path;
7378 	dev_info_t *dip;
7379 
7380 	/*
7381 	 * Prevent the modunload thread from unloading any modules until we
7382 	 * have completely stopped all kernel threads.
7383 	 */
7384 	modunload_disable();
7385 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7386 		/*
7387 		 * Forget we'v ever seen any entry
7388 		 */
7389 		ip->ni_persistent = 0;
7390 	}
7391 restart:
7392 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7393 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7394 		major_t maj;
7395 		maj = ip->ni_major;
7396 		path = ip->ni_path;
7397 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7398 			if (ip->ni_persistent) {
7399 				/*
7400 				 * If we weren't able to make this entry
7401 				 * go away, then we give up, as
7402 				 * holding/attaching the driver ought to have
7403 				 * resulted in this entry being deleted
7404 				 */
7405 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7406 				    "(%s|%d)\n", pmf, ip->ni_path,
7407 				    ddi_major_to_name(maj), (int)maj))
7408 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7409 				    ip->ni_path);
7410 				modunload_enable();
7411 				rw_exit(&pm_noinvol_rwlock);
7412 				return (0);
7413 			}
7414 			ip->ni_persistent++;
7415 			rw_exit(&pm_noinvol_rwlock);
7416 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7417 			dip = e_ddi_hold_devi_by_path(path, 0);
7418 			if (dip == NULL) {
7419 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7420 				    pmf, path, (int)maj))
7421 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7422 				    "driver", path);
7423 				modunload_enable();
7424 				return (0);
7425 			} else {
7426 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7427 				/*
7428 				 * Since the modunload thread is stopped, we
7429 				 * don't have to keep the driver held, which
7430 				 * saves a ton of bookkeeping
7431 				 */
7432 				ddi_release_devi(dip);
7433 				goto restart;
7434 			}
7435 		} else {
7436 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7437 			    pmf, ip->ni_path))
7438 			continue;
7439 		}
7440 	}
7441 	rw_exit(&pm_noinvol_rwlock);
7442 	return (1);
7443 }
7444 
7445 void
7446 pm_reattach_noinvol_fini(void)
7447 {
7448 	modunload_enable();
7449 }
7450 
7451 /*
7452  * Display pm support code
7453  */
7454 
7455 
7456 /*
7457  * console frame-buffer power-mgmt gets enabled when debugging
7458  * services are not present or console fbpm override is set
7459  */
7460 void
7461 pm_cfb_setup(const char *stdout_path)
7462 {
7463 	PMD_FUNC(pmf, "cfb_setup")
7464 	extern int obpdebug;
7465 	char *devname;
7466 	dev_info_t *dip;
7467 	int devname_len;
7468 	extern dev_info_t *fbdip;
7469 
7470 	/*
7471 	 * By virtue of this function being called (from consconfig),
7472 	 * we know stdout is a framebuffer.
7473 	 */
7474 	stdout_is_framebuffer = 1;
7475 
7476 	if (obpdebug || (boothowto & RB_DEBUG)) {
7477 		if (pm_cfb_override == 0) {
7478 			/*
7479 			 * Console is frame buffer, but we want to suppress
7480 			 * pm on it because of debugging setup
7481 			 */
7482 			pm_cfb_enabled = 0;
7483 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7484 			    "console power management.");
7485 			/*
7486 			 * however, we still need to know which is the console
7487 			 * fb in order to suppress pm on it
7488 			 */
7489 		} else {
7490 			cmn_err(CE_WARN, "Kernel debugger present: see "
7491 			    "kmdb(1M) for interaction with power management.");
7492 		}
7493 	}
7494 #ifdef DEBUG
7495 	/*
7496 	 * IF console is fb and is power managed, don't do prom_printfs from
7497 	 * pm debug macro
7498 	 */
7499 	if (pm_cfb_enabled) {
7500 		if (pm_debug)
7501 			prom_printf("pm debug output will be to log only\n");
7502 		pm_divertdebug++;
7503 	}
7504 #endif
7505 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7506 	devname_len = strlen(devname) + 1;
7507 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7508 	/* if the driver is attached */
7509 	if ((dip = fbdip) != NULL) {
7510 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7511 		    PM_DEVICE(dip)))
7512 		/*
7513 		 * We set up here as if the driver were power manageable in case
7514 		 * we get a later attach of a pm'able driver (which would result
7515 		 * in a panic later)
7516 		 */
7517 		cfb_dip = dip;
7518 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7519 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7520 		    PM_DEVICE(dip)))
7521 #ifdef DEBUG
7522 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7523 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7524 			    pmf, PM_DEVICE(dip)))
7525 		}
7526 #endif
7527 	} else {
7528 		char *ep;
7529 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7530 		pm_record_invol_path(devname,
7531 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7532 		    (major_t)-1);
7533 		for (ep = strrchr(devname, '/'); ep != devname;
7534 		    ep = strrchr(devname, '/')) {
7535 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7536 			*ep = '\0';
7537 			dip = pm_name_to_dip(devname, 0);
7538 			if (dip != NULL) {
7539 				/*
7540 				 * Walk up the tree incrementing
7541 				 * devi_pm_noinvolpm
7542 				 */
7543 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7544 				    0, 0, devname, dip);
7545 				break;
7546 			} else {
7547 				pm_record_invol_path(devname,
7548 				    PMC_NO_INVOL, 1, 0, 0, (major_t)-1);
7549 			}
7550 		}
7551 	}
7552 	kmem_free(devname, devname_len);
7553 }
7554 
7555 void
7556 pm_cfb_rele(void)
7557 {
7558 	mutex_enter(&pm_cfb_lock);
7559 	/*
7560 	 * this call isn't using the console any  more, it is ok to take it
7561 	 * down if the count goes to 0
7562 	 */
7563 	cfb_inuse--;
7564 	mutex_exit(&pm_cfb_lock);
7565 }
7566 
7567 /*
7568  * software interrupt handler for fbpm; this function exists because we can't
7569  * bring up the frame buffer power from above lock level.  So if we need to,
7570  * we instead schedule a softint that runs this routine and takes us into
7571  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7572  */
7573 static uint_t
7574 pm_cfb_softint(caddr_t int_handler_arg)
7575 {
7576 	_NOTE(ARGUNUSED(int_handler_arg))
7577 	int rval = DDI_INTR_UNCLAIMED;
7578 
7579 	mutex_enter(&pm_cfb_lock);
7580 	if (pm_soft_pending) {
7581 		mutex_exit(&pm_cfb_lock);
7582 		debug_enter((char *)NULL);
7583 		/* acquired in debug_enter before calling pm_cfb_trigger */
7584 		pm_cfb_rele();
7585 		mutex_enter(&pm_cfb_lock);
7586 		pm_soft_pending = 0;
7587 		mutex_exit(&pm_cfb_lock);
7588 		rval = DDI_INTR_CLAIMED;
7589 	} else
7590 		mutex_exit(&pm_cfb_lock);
7591 
7592 	return (rval);
7593 }
7594 
7595 void
7596 pm_cfb_setup_intr(void)
7597 {
7598 	PMD_FUNC(pmf, "cfb_setup_intr")
7599 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7600 	void pm_cfb_check_and_powerup(void);
7601 
7602 	if (!stdout_is_framebuffer) {
7603 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7604 		return;
7605 	}
7606 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7607 #ifdef DEBUG
7608 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7609 #endif
7610 	/*
7611 	 * setup software interrupt handler
7612 	 */
7613 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7614 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7615 		panic("pm: unable to register soft intr.");
7616 
7617 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7618 }
7619 
7620 /*
7621  * Checks to see if it is safe to write to the console wrt power management
7622  * (i.e. if the console is a framebuffer, then it must be at full power)
7623  * returns 1 when power is off (power-up is needed)
7624  * returns 0 when power is on (power-up not needed)
7625  */
7626 int
7627 pm_cfb_check_and_hold(void)
7628 {
7629 	/*
7630 	 * cfb_dip is set iff console is a power manageable frame buffer
7631 	 * device
7632 	 */
7633 	extern int modrootloaded;
7634 
7635 	mutex_enter(&pm_cfb_lock);
7636 	cfb_inuse++;
7637 	ASSERT(cfb_inuse);	/* wrap? */
7638 	if (modrootloaded && cfb_dip) {
7639 		/*
7640 		 * don't power down the frame buffer, the prom is using it
7641 		 */
7642 		if (pm_cfb_comps_off) {
7643 			mutex_exit(&pm_cfb_lock);
7644 			return (1);
7645 		}
7646 	}
7647 	mutex_exit(&pm_cfb_lock);
7648 	return (0);
7649 }
7650 
7651 /*
7652  * turn on cfb power (which is known to be off).
7653  * Must be called below lock level!
7654  */
7655 void
7656 pm_cfb_powerup(void)
7657 {
7658 	pm_info_t *info;
7659 	int norm;
7660 	int ccount, ci;
7661 	int unused;
7662 #ifdef DEBUG
7663 	/*
7664 	 * Can't reenter prom_prekern, so suppress pm debug messages
7665 	 * (still go to circular buffer).
7666 	 */
7667 	mutex_enter(&pm_debug_lock);
7668 	pm_divertdebug++;
7669 	mutex_exit(&pm_debug_lock);
7670 #endif
7671 	info = PM_GET_PM_INFO(cfb_dip);
7672 	ASSERT(info);
7673 
7674 	ccount = PM_NUMCMPTS(cfb_dip);
7675 	for (ci = 0; ci < ccount; ci++) {
7676 		norm = pm_get_normal_power(cfb_dip, ci);
7677 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7678 		    PM_CANBLOCK_BYPASS, 0, &unused);
7679 	}
7680 #ifdef DEBUG
7681 	mutex_enter(&pm_debug_lock);
7682 	pm_divertdebug--;
7683 	mutex_exit(&pm_debug_lock);
7684 #endif
7685 }
7686 
7687 /*
7688  * Check if the console framebuffer is powered up.  If not power it up.
7689  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7690  * must be released by calling pm_cfb_rele when the console fb operation
7691  * is completed.
7692  */
7693 void
7694 pm_cfb_check_and_powerup(void)
7695 {
7696 	if (pm_cfb_check_and_hold())
7697 		pm_cfb_powerup();
7698 }
7699 
7700 /*
7701  * Trigger a low level interrupt to power up console frame buffer.
7702  */
7703 void
7704 pm_cfb_trigger(void)
7705 {
7706 	if (cfb_dip == NULL)
7707 		return;
7708 
7709 	mutex_enter(&pm_cfb_lock);
7710 	/*
7711 	 * If machine appears to be hung, pulling the keyboard connector of
7712 	 * the console will cause a high level interrupt and go to debug_enter.
7713 	 * But, if the fb is powered down, this routine will be called to bring
7714 	 * it up (by generating a softint to do the work).  If soft interrupts
7715 	 * are not running, and the keyboard connector is pulled again, the
7716 	 * following code detects this condition and calls panic which allows
7717 	 * the fb to be brought up from high level.
7718 	 *
7719 	 * If two nearly simultaneous calls to debug_enter occur (both from
7720 	 * high level) the code described above will cause a panic.
7721 	 */
7722 	if (lbolt <= pm_soft_pending) {
7723 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7724 		panic(panicstr);	/* does a power up at any intr level */
7725 		/* NOTREACHED */
7726 	}
7727 	pm_soft_pending = lbolt;
7728 	mutex_exit(&pm_cfb_lock);
7729 	ddi_trigger_softintr(pm_soft_id);
7730 }
7731 
7732 major_t
7733 pm_path_to_major(char *path)
7734 {
7735 	PMD_FUNC(pmf, "path_to_major")
7736 	char *np, *ap, *bp;
7737 	major_t ret;
7738 	size_t len;
7739 	static major_t i_path_to_major(char *, char *);
7740 
7741 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7742 
7743 	np = strrchr(path, '/');
7744 	if (np != NULL)
7745 		np++;
7746 	else
7747 		np = path;
7748 	len = strlen(np) + 1;
7749 	bp = kmem_alloc(len, KM_SLEEP);
7750 	(void) strcpy(bp, np);
7751 	if ((ap = strchr(bp, '@')) != NULL) {
7752 		*ap = '\0';
7753 	}
7754 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7755 	ret = i_path_to_major(path, np);
7756 	kmem_free(bp, len);
7757 	return (ret);
7758 }
7759 
7760 #ifdef DEBUG
7761 
7762 char *pm_msgp;
7763 char *pm_bufend;
7764 char *pm_msgbuf = NULL;
7765 int   pm_logpages = 2;
7766 
7767 #define	PMLOGPGS	pm_logpages
7768 
7769 /*PRINTFLIKE1*/
7770 void
7771 pm_log(const char *fmt, ...)
7772 {
7773 	va_list adx;
7774 	size_t size;
7775 
7776 	mutex_enter(&pm_debug_lock);
7777 	if (pm_msgbuf == NULL) {
7778 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7779 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7780 		pm_msgp = pm_msgbuf;
7781 	}
7782 	va_start(adx, fmt);
7783 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7784 	va_end(adx);
7785 	va_start(adx, fmt);
7786 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7787 		bzero(pm_msgp, pm_bufend - pm_msgp);
7788 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7789 		if (!pm_divertdebug)
7790 			prom_printf("%s", pm_msgp);
7791 		pm_msgp = pm_msgbuf + size;
7792 	} else {
7793 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7794 		if (!pm_divertdebug)
7795 			prom_printf("%s", pm_msgp);
7796 		pm_msgp += size;
7797 	}
7798 	va_end(adx);
7799 	mutex_exit(&pm_debug_lock);
7800 }
7801 #endif	/* DEBUG */
7802 
7803 /*
7804  * We want to save the state of any directly pm'd devices over the suspend/
7805  * resume process so that we can put them back the way the controlling
7806  * process left them.
7807  */
7808 void
7809 pm_save_direct_levels(void)
7810 {
7811 	pm_processes_stopped = 1;
7812 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7813 }
7814 
7815 static int
7816 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7817 {
7818 	_NOTE(ARGUNUSED(arg))
7819 	int i;
7820 	int *ip;
7821 	pm_info_t *info = PM_GET_PM_INFO(dip);
7822 
7823 	if (!info)
7824 		return (DDI_WALK_CONTINUE);
7825 
7826 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7827 		if (PM_NUMCMPTS(dip) > 2) {
7828 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7829 			    sizeof (int), KM_SLEEP);
7830 			ip = info->pmi_lp;
7831 		} else {
7832 			ip = info->pmi_levels;
7833 		}
7834 		/* autopm and processes are stopped, ok not to lock power */
7835 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7836 			*ip++ = PM_CURPOWER(dip, i);
7837 		/*
7838 		 * There is a small window between stopping the
7839 		 * processes and setting pm_processes_stopped where
7840 		 * a driver could get hung up in a pm_raise_power()
7841 		 * call.  Free any such driver now.
7842 		 */
7843 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7844 	}
7845 
7846 	return (DDI_WALK_CONTINUE);
7847 }
7848 
7849 void
7850 pm_restore_direct_levels(void)
7851 {
7852 	/*
7853 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
7854 	 * threads failed) then we don't want to try to restore them
7855 	 */
7856 	if (!pm_processes_stopped)
7857 		return;
7858 
7859 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
7860 	pm_processes_stopped = 0;
7861 }
7862 
7863 static int
7864 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
7865 {
7866 	_NOTE(ARGUNUSED(arg))
7867 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
7868 	int i, nc, result;
7869 	int *ip;
7870 
7871 	pm_info_t *info = PM_GET_PM_INFO(dip);
7872 	if (!info)
7873 		return (DDI_WALK_CONTINUE);
7874 
7875 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7876 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
7877 			ip = &info->pmi_lp[nc - 1];
7878 		} else {
7879 			ip = &info->pmi_levels[nc - 1];
7880 		}
7881 		/*
7882 		 * Because fb drivers fail attempts to turn off the
7883 		 * fb when the monitor is on, but treat a request to
7884 		 * turn on the monitor as a request to turn on the
7885 		 * fb too, we process components in descending order
7886 		 * Because autopm is disabled and processes aren't
7887 		 * running, it is ok to examine current power outside
7888 		 * of the power lock
7889 		 */
7890 		for (i = nc - 1; i >= 0; i--, ip--) {
7891 			if (PM_CURPOWER(dip, i) == *ip)
7892 				continue;
7893 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
7894 			    PM_CANBLOCK_BYPASS, 0, &result) !=
7895 				DDI_SUCCESS) {
7896 				cmn_err(CE_WARN, "cpr: unable "
7897 				    "to restore power level of "
7898 				    "component %d of directly "
7899 				    "power manged device %s@%s"
7900 				    " to %d",
7901 				    i, PM_NAME(dip),
7902 				    PM_ADDR(dip), *ip);
7903 				PMD(PMD_FAIL, ("%s: failed to restore "
7904 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
7905 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
7906 				    PM_CURPOWER(dip, i), *ip, result))
7907 			}
7908 		}
7909 		if (nc > 2) {
7910 			kmem_free(info->pmi_lp, nc * sizeof (int));
7911 			info->pmi_lp = NULL;
7912 		}
7913 	}
7914 	return (DDI_WALK_CONTINUE);
7915 }
7916 
7917 /*
7918  * Stolen from the bootdev module
7919  * attempt to convert a path to a major number
7920  */
7921 static major_t
7922 i_path_to_major(char *path, char *leaf_name)
7923 {
7924 	extern major_t path_to_major(char *pathname);
7925 	major_t maj;
7926 
7927 	if ((maj = path_to_major(path)) == (major_t)-1) {
7928 		maj = ddi_name_to_major(leaf_name);
7929 	}
7930 
7931 	return (maj);
7932 }
7933 
7934 /*
7935  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
7936  * An entry in the list means that the device is detached, so we need to
7937  * adjust its ancestors as if they had just seen this attach, and any detached
7938  * ancestors need to have their list entries adjusted.
7939  */
7940 void
7941 pm_driver_removed(major_t major)
7942 {
7943 	static void i_pm_driver_removed(major_t major);
7944 
7945 	/*
7946 	 * Serialize removal of drivers. This is to keep ancestors of
7947 	 * a node that is being deleted from getting deleted and added back
7948 	 * with different counters.
7949 	 */
7950 	mutex_enter(&pm_remdrv_lock);
7951 	i_pm_driver_removed(major);
7952 	mutex_exit(&pm_remdrv_lock);
7953 }
7954 
7955 /*
7956  * This routine is called recursively by pm_noinvol_process_ancestors()
7957  */
7958 static void
7959 i_pm_driver_removed(major_t major)
7960 {
7961 	PMD_FUNC(pmf, "driver_removed")
7962 	static void adjust_ancestors(char *, int);
7963 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
7964 	static void pm_noinvol_process_ancestors(char *);
7965 	pm_noinvol_t *ip, *pp = NULL;
7966 	int wasvolpmd;
7967 	ASSERT(major != (major_t)-1);
7968 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
7969 again:
7970 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7971 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7972 		if (major != ip->ni_major)
7973 			continue;
7974 		/*
7975 		 * If it is an ancestor of no-invol node, which is
7976 		 * not removed, skip it. This is to cover the case of
7977 		 * ancestor removed without removing its descendants.
7978 		 */
7979 		if (pm_is_noinvol_ancestor(ip)) {
7980 			ip->ni_flags |= PMC_DRIVER_REMOVED;
7981 			continue;
7982 		}
7983 		wasvolpmd = ip->ni_wasvolpmd;
7984 		/*
7985 		 * remove the entry from the list
7986 		 */
7987 		if (pp) {
7988 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
7989 			    pmf, ip->ni_path, pp->ni_path))
7990 			pp->ni_next = ip->ni_next;
7991 		} else {
7992 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
7993 			    ip->ni_path))
7994 			ASSERT(pm_noinvol_head == ip);
7995 			pm_noinvol_head = ip->ni_next;
7996 		}
7997 		rw_exit(&pm_noinvol_rwlock);
7998 		adjust_ancestors(ip->ni_path, wasvolpmd);
7999 		/*
8000 		 * Had an ancestor been removed before this node, it would have
8001 		 * been skipped. Adjust the no-invol counters for such skipped
8002 		 * ancestors.
8003 		 */
8004 		pm_noinvol_process_ancestors(ip->ni_path);
8005 		kmem_free(ip->ni_path, ip->ni_size);
8006 		kmem_free(ip, sizeof (*ip));
8007 		goto again;
8008 	}
8009 	rw_exit(&pm_noinvol_rwlock);
8010 }
8011 
8012 /*
8013  * returns 1, if *aip is a ancestor of a no-invol node
8014  *	   0, otherwise
8015  */
8016 static int
8017 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8018 {
8019 	pm_noinvol_t *ip;
8020 
8021 	ASSERT(strlen(aip->ni_path) != 0);
8022 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8023 		if (ip == aip)
8024 			continue;
8025 		/*
8026 		 * To be an ancestor, the path must be an initial substring of
8027 		 * the descendent, and end just before a '/' in the
8028 		 * descendent's path.
8029 		 */
8030 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8031 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8032 			return (1);
8033 	}
8034 	return (0);
8035 }
8036 
8037 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8038 /*
8039  * scan through the pm_noinvolpm list adjusting ancestors of the current
8040  * node;  Modifies string *path.
8041  */
8042 static void
8043 adjust_ancestors(char *path, int wasvolpmd)
8044 {
8045 	PMD_FUNC(pmf, "adjust_ancestors")
8046 	char *cp;
8047 	pm_noinvol_t *lp;
8048 	pm_noinvol_t *pp = NULL;
8049 	major_t locked = (major_t)UINT_MAX;
8050 	dev_info_t *dip;
8051 	char	*pathbuf;
8052 	size_t pathbuflen = strlen(path) + 1;
8053 
8054 	/*
8055 	 * First we look up the ancestor's dip.  If we find it, then we
8056 	 * adjust counts up the tree
8057 	 */
8058 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8059 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8060 	(void) strcpy(pathbuf, path);
8061 	cp = strrchr(pathbuf, '/');
8062 	if (cp == NULL)	{
8063 		/* if no ancestors, then nothing to do */
8064 		kmem_free(pathbuf, pathbuflen);
8065 		return;
8066 	}
8067 	*cp = '\0';
8068 	dip = pm_name_to_dip(pathbuf, 1);
8069 	if (dip != NULL) {
8070 		locked = PM_MAJOR(dip);
8071 
8072 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8073 		    path, dip);
8074 
8075 		if (locked != (major_t)UINT_MAX)
8076 			ddi_release_devi(dip);
8077 	} else {
8078 		char *apath;
8079 		size_t len = strlen(pathbuf) + 1;
8080 		int  lock_held = 1;
8081 
8082 		/*
8083 		 * Now check for ancestors that exist only in the list
8084 		 */
8085 		apath = kmem_alloc(len, KM_SLEEP);
8086 		(void) strcpy(apath, pathbuf);
8087 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8088 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8089 			/*
8090 			 * This can only happen once.  Since we have to drop
8091 			 * the lock, we need to extract the relevant info.
8092 			 */
8093 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8094 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8095 				    lp->ni_path, lp->ni_noinvolpm,
8096 				    lp->ni_noinvolpm - 1))
8097 				lp->ni_noinvolpm--;
8098 				if (wasvolpmd && lp->ni_volpmd) {
8099 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8100 					    "%d\n", pmf, lp->ni_path,
8101 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8102 					lp->ni_volpmd--;
8103 				}
8104 				/*
8105 				 * remove the entry from the list, if there
8106 				 * are no more no-invol descendants and node
8107 				 * itself is not a no-invol node.
8108 				 */
8109 				if (!(lp->ni_noinvolpm ||
8110 				    (lp->ni_flags & PMC_NO_INVOL))) {
8111 					ASSERT(lp->ni_volpmd == 0);
8112 					if (pp) {
8113 						PMD(PMD_NOINVOL, ("%s: freeing "
8114 						    "%s, prev is %s\n", pmf,
8115 						    lp->ni_path, pp->ni_path))
8116 						pp->ni_next = lp->ni_next;
8117 					} else {
8118 						PMD(PMD_NOINVOL, ("%s: free %s "
8119 						    "head\n", pmf, lp->ni_path))
8120 						ASSERT(pm_noinvol_head == lp);
8121 						pm_noinvol_head = lp->ni_next;
8122 					}
8123 					lock_held = 0;
8124 					rw_exit(&pm_noinvol_rwlock);
8125 					adjust_ancestors(apath, wasvolpmd);
8126 					/* restore apath */
8127 					(void) strcpy(apath, pathbuf);
8128 					kmem_free(lp->ni_path, lp->ni_size);
8129 					kmem_free(lp, sizeof (*lp));
8130 				}
8131 				break;
8132 			}
8133 		}
8134 		if (lock_held)
8135 			rw_exit(&pm_noinvol_rwlock);
8136 		adjust_ancestors(apath, wasvolpmd);
8137 		kmem_free(apath, len);
8138 	}
8139 	kmem_free(pathbuf, pathbuflen);
8140 }
8141 
8142 /*
8143  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8144  * which were skipped even though their drivers were removed.
8145  */
8146 static void
8147 pm_noinvol_process_ancestors(char *path)
8148 {
8149 	pm_noinvol_t *lp;
8150 
8151 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8152 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8153 		if (strstr(path, lp->ni_path) &&
8154 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8155 			rw_exit(&pm_noinvol_rwlock);
8156 			i_pm_driver_removed(lp->ni_major);
8157 			return;
8158 		}
8159 	}
8160 	rw_exit(&pm_noinvol_rwlock);
8161 }
8162 
8163 /*
8164  * Returns true if (detached) device needs to be kept up because it exported the
8165  * "no-involuntary-power-cycles" property or we're pretending it did (console
8166  * fb case) or it is an ancestor of such a device and has used up the "one
8167  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8168  * upon detach.  In any event, we need an exact hit on the path or we return
8169  * false.
8170  */
8171 int
8172 pm_noinvol_detached(char *path)
8173 {
8174 	PMD_FUNC(pmf, "noinvol_detached")
8175 	pm_noinvol_t *ip;
8176 	int ret = 0;
8177 
8178 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8179 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8180 		if (strcmp(path, ip->ni_path) == 0) {
8181 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8182 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8183 				    "%s\n", pmf, path))
8184 				ret = 1;
8185 				break;
8186 			}
8187 #ifdef	DEBUG
8188 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8189 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8190 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8191 				    path))
8192 #endif
8193 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8194 			break;
8195 		}
8196 	}
8197 	rw_exit(&pm_noinvol_rwlock);
8198 	return (ret);
8199 }
8200 
8201 int
8202 pm_is_cfb(dev_info_t *dip)
8203 {
8204 	return (dip == cfb_dip);
8205 }
8206 
8207 #ifdef	DEBUG
8208 /*
8209  * Return true if all components of the console frame buffer are at
8210  * "normal" power, i.e., fully on.  For the case where the console is not
8211  * a framebuffer, we also return true
8212  */
8213 int
8214 pm_cfb_is_up(void)
8215 {
8216 	return (pm_cfb_comps_off == 0);
8217 }
8218 #endif
8219 
8220 /*
8221  * Preventing scan from powering down the node by incrementing the
8222  * kidsupcnt.
8223  */
8224 void
8225 pm_hold_power(dev_info_t *dip)
8226 {
8227 	e_pm_hold_rele_power(dip, 1);
8228 }
8229 
8230 /*
8231  * Releasing the hold by decrementing the kidsupcnt allowing scan
8232  * to power down the node if all conditions are met.
8233  */
8234 void
8235 pm_rele_power(dev_info_t *dip)
8236 {
8237 	e_pm_hold_rele_power(dip, -1);
8238 }
8239 
8240 /*
8241  * A wrapper of pm_all_to_normal() to power up a dip
8242  * to its normal level
8243  */
8244 int
8245 pm_powerup(dev_info_t *dip)
8246 {
8247 	PMD_FUNC(pmf, "pm_powerup")
8248 
8249 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8250 	ASSERT(!(servicing_interrupt()));
8251 
8252 	/*
8253 	 * in case this node is not already participating pm
8254 	 */
8255 	if (!PM_GET_PM_INFO(dip)) {
8256 		if (!DEVI_IS_ATTACHING(dip))
8257 			return (DDI_SUCCESS);
8258 		if (pm_start(dip) != DDI_SUCCESS)
8259 			return (DDI_FAILURE);
8260 		if (!PM_GET_PM_INFO(dip))
8261 			return (DDI_SUCCESS);
8262 	}
8263 
8264 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8265 }
8266 
8267 int
8268 pm_rescan_walk(dev_info_t *dip, void *arg)
8269 {
8270 	_NOTE(ARGUNUSED(arg))
8271 
8272 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8273 		return (DDI_WALK_CONTINUE);
8274 
8275 	/*
8276 	 * Currently pm_cpr_callb/resume code is the only caller
8277 	 * and it needs to make sure that stopped scan get
8278 	 * reactivated. Otherwise, rescan walk needn't reactive
8279 	 * stopped scan.
8280 	 */
8281 	pm_scan_init(dip);
8282 
8283 	(void) pm_rescan(dip);
8284 	return (DDI_WALK_CONTINUE);
8285 }
8286 
8287 static dev_info_t *
8288 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8289 {
8290 	dev_info_t *wdip, *pdip;
8291 
8292 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8293 		pdip = ddi_get_parent(wdip);
8294 		if (pdip == dip)
8295 			return (wdip);
8296 	}
8297 	return (NULL);
8298 }
8299 
8300 int
8301 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8302     void *arg, void *result)
8303 {
8304 	PMD_FUNC(pmf, "bp_bus_power")
8305 	dev_info_t	*cdip;
8306 	pm_info_t	*cinfo;
8307 	pm_bp_child_pwrchg_t	*bpc;
8308 	pm_sp_misc_t		*pspm;
8309 	pm_bp_nexus_pwrup_t *bpn;
8310 	pm_bp_child_pwrchg_t new_bpc;
8311 	pm_bp_noinvol_t *bpi;
8312 	dev_info_t *tdip;
8313 	char *pathbuf;
8314 	int		ret = DDI_SUCCESS;
8315 	int		errno = 0;
8316 	pm_component_t *cp;
8317 
8318 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8319 	    pm_decode_op(op)))
8320 	switch (op) {
8321 	case BUS_POWER_CHILD_PWRCHG:
8322 		bpc = (pm_bp_child_pwrchg_t *)arg;
8323 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8324 		tdip = bpc->bpc_dip;
8325 		cdip = pm_get_next_descendent(dip, tdip);
8326 		cinfo = PM_GET_PM_INFO(cdip);
8327 		if (cdip != tdip) {
8328 			/*
8329 			 * If the node is an involved parent, it needs to
8330 			 * power up the node as it is needed.  There is nothing
8331 			 * else the framework can do here.
8332 			 */
8333 			if (PM_WANTS_NOTIFICATION(cdip)) {
8334 				PMD(PMD_SET, ("%s: call bus_power for "
8335 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8336 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8337 				    impl_arg, op, arg, result));
8338 			}
8339 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8340 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8341 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8342 			/*
8343 			 * we presume that the parent needs to be up in
8344 			 * order for the child to change state (either
8345 			 * because it must already be on if the child is on
8346 			 * (and the pm_all_to_normal_nexus() will be a nop)
8347 			 * or because it will need to be on for the child
8348 			 * to come on; so we make the call regardless
8349 			 */
8350 			pm_hold_power(cdip);
8351 			if (cinfo) {
8352 				pm_canblock_t canblock = pspm->pspm_canblock;
8353 				ret = pm_all_to_normal_nexus(cdip, canblock);
8354 				if (ret != DDI_SUCCESS) {
8355 					pm_rele_power(cdip);
8356 					return (ret);
8357 				}
8358 			}
8359 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8360 			    PM_DEVICE(cdip)))
8361 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8362 			    result);
8363 			pm_rele_power(cdip);
8364 		} else {
8365 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8366 			    result);
8367 		}
8368 		return (ret);
8369 
8370 	case BUS_POWER_NEXUS_PWRUP:
8371 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8372 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8373 
8374 		if (!e_pm_valid_info(dip, NULL) ||
8375 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8376 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8377 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8378 			    pmf, PM_DEVICE(dip)))
8379 			*pspm->pspm_errnop = EIO;
8380 			*(int *)result = DDI_FAILURE;
8381 			return (DDI_FAILURE);
8382 		}
8383 
8384 		ASSERT(bpn->bpn_dip == dip);
8385 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8386 		    PM_DEVICE(dip)))
8387 		new_bpc.bpc_dip = dip;
8388 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8389 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8390 		new_bpc.bpc_comp = bpn->bpn_comp;
8391 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8392 		new_bpc.bpc_nlevel = bpn->bpn_level;
8393 		new_bpc.bpc_private = bpn->bpn_private;
8394 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8395 		    PM_LEVEL_UPONLY;
8396 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8397 		    &errno;
8398 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8399 		    (void *)&new_bpc, result);
8400 		kmem_free(pathbuf, MAXPATHLEN);
8401 		return (ret);
8402 
8403 	case BUS_POWER_NOINVOL:
8404 		bpi = (pm_bp_noinvol_t *)arg;
8405 		tdip = bpi->bpni_dip;
8406 		cdip = pm_get_next_descendent(dip, tdip);
8407 
8408 		/* In case of rem_drv, the leaf node has been removed */
8409 		if (cdip == NULL)
8410 			return (DDI_SUCCESS);
8411 
8412 		cinfo = PM_GET_PM_INFO(cdip);
8413 		if (cdip != tdip) {
8414 			if (PM_WANTS_NOTIFICATION(cdip)) {
8415 				PMD(PMD_NOINVOL,
8416 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8417 				    pmf, PM_DEVICE(cdip)))
8418 				ret = (*PM_BUS_POWER_FUNC(cdip))
8419 				    (cdip, NULL, op, arg, result);
8420 				if ((cinfo) && (ret == DDI_SUCCESS))
8421 					(void) pm_noinvol_update_node(cdip,
8422 					    bpi);
8423 				return (ret);
8424 			} else {
8425 				PMD(PMD_NOINVOL,
8426 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8427 				    PM_DEVICE(cdip)))
8428 				ret = pm_busop_bus_power(cdip, NULL, op,
8429 				    arg, result);
8430 				/*
8431 				 * Update the current node.
8432 				 */
8433 				if ((cinfo) && (ret == DDI_SUCCESS))
8434 					(void) pm_noinvol_update_node(cdip,
8435 					    bpi);
8436 				return (ret);
8437 			}
8438 		} else {
8439 			/*
8440 			 * For attach, detach, power up:
8441 			 * Do nothing for leaf node since its
8442 			 * counts are already updated.
8443 			 * For CFB and driver removal, since the
8444 			 * path and the target dip passed in is up to and incl.
8445 			 * the immediate ancestor, need to do the update.
8446 			 */
8447 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8448 			    "reached\n", pmf, PM_DEVICE(cdip)))
8449 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8450 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8451 				(void) pm_noinvol_update_node(cdip, bpi);
8452 			return (DDI_SUCCESS);
8453 		}
8454 
8455 	default:
8456 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8457 		return (DDI_FAILURE);
8458 	}
8459 }
8460 
8461 static int
8462 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8463     void *arg, void *resultp)
8464 {
8465 	_NOTE(ARGUNUSED(impl_arg))
8466 	PMD_FUNC(pmf, "bp_set_power")
8467 	pm_ppm_devlist_t *devl;
8468 	int clevel, circ;
8469 #ifdef	DEBUG
8470 	int circ_db, ccirc_db;
8471 #endif
8472 	int ret = DDI_SUCCESS;
8473 	dev_info_t *cdip;
8474 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8475 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8476 	pm_canblock_t canblock = pspm->pspm_canblock;
8477 	int scan = pspm->pspm_scan;
8478 	int comp = bpc->bpc_comp;
8479 	int olevel = bpc->bpc_olevel;
8480 	int nlevel = bpc->bpc_nlevel;
8481 	int comps_off_incr = 0;
8482 	dev_info_t *pdip = ddi_get_parent(dip);
8483 	int dodeps;
8484 	int direction = pspm->pspm_direction;
8485 	int *errnop = pspm->pspm_errnop;
8486 	char *dir = pm_decode_direction(direction);
8487 	int *iresp = (int *)resultp;
8488 	time_t	idletime, thresh;
8489 	pm_component_t *cp = PM_CP(dip, comp);
8490 	int work_type;
8491 
8492 	*iresp = DDI_SUCCESS;
8493 	*errnop = 0;
8494 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8495 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8496 	    pm_decode_op(op)))
8497 
8498 	/*
8499 	 * The following set of conditions indicate we are here to handle a
8500 	 * driver's pm_[raise|lower]_power request, but the device is being
8501 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8502 	 * we want to pm_block and pass a status back to the caller based
8503 	 * on whether the controlling process's next activity on the device
8504 	 * matches the current request or not.  This distinction tells
8505 	 * downstream functions to avoid calling into a driver or changing
8506 	 * the framework's power state.  To actually block, we need:
8507 	 *
8508 	 * PM_ISDIRECT(dip)
8509 	 *	no reason to block unless a process is directly controlling dev
8510 	 * direction != PM_LEVEL_EXACT
8511 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8512 	 * !pm_processes_stopped
8513 	 *	don't block if controlling proc already be stopped for cpr
8514 	 * canblock != PM_CANBLOCK_BYPASS
8515 	 *	our caller must not have explicitly prevented blocking
8516 	 */
8517 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8518 		PM_LOCK_DIP(dip);
8519 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8520 			/* releases dip lock */
8521 			ret = pm_busop_match_request(dip, bpc);
8522 			if (ret == EAGAIN) {
8523 				PM_LOCK_DIP(dip);
8524 				continue;
8525 			}
8526 			return (*iresp = ret);
8527 		}
8528 		PM_UNLOCK_DIP(dip);
8529 	}
8530 	/* BC device is never scanned, so power will stick until we are done */
8531 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8532 	    direction != PM_LEVEL_DOWNONLY) {
8533 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8534 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8535 		    canblock, 0, resultp) != DDI_SUCCESS) {
8536 			/* *resultp set by pm_set_power */
8537 			return (DDI_FAILURE);
8538 		}
8539 	}
8540 	if (PM_WANTS_NOTIFICATION(pdip)) {
8541 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8542 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8543 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8544 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8545 		if (ret != DDI_SUCCESS) {
8546 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8547 			    pmf, PM_DEVICE(pdip)))
8548 			return (DDI_FAILURE);
8549 		}
8550 	} else {
8551 		/*
8552 		 * Since we don't know what the actual power level is,
8553 		 * we place a power hold on the parent no matter what
8554 		 * component and level is changing.
8555 		 */
8556 		pm_hold_power(pdip);
8557 	}
8558 	PM_LOCK_POWER(dip, &circ);
8559 	clevel = PM_CURPOWER(dip, comp);
8560 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8561 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8562 	    clevel, dir))
8563 	switch (direction) {
8564 	case PM_LEVEL_UPONLY:
8565 		/* Powering up */
8566 		if (clevel >= nlevel) {
8567 			PMD(PMD_SET, ("%s: current level is already "
8568 			    "at or above the requested level.\n", pmf))
8569 			*iresp = DDI_SUCCESS;
8570 			ret = DDI_SUCCESS;
8571 			goto post_notify;
8572 		}
8573 		break;
8574 	case PM_LEVEL_EXACT:
8575 		/* specific level request */
8576 		if (clevel == nlevel && !PM_ISBC(dip)) {
8577 			PMD(PMD_SET, ("%s: current level is already "
8578 			    "at the requested level.\n", pmf))
8579 			*iresp = DDI_SUCCESS;
8580 			ret = DDI_SUCCESS;
8581 			goto post_notify;
8582 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8583 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8584 			if (!pm_cfb_enabled) {
8585 				PMD(PMD_ERROR | PMD_CFB,
8586 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8587 				*errnop = EINVAL;
8588 				*iresp = DDI_FAILURE;
8589 				ret = DDI_FAILURE;
8590 				goto post_notify;
8591 			}
8592 			mutex_enter(&pm_cfb_lock);
8593 			while (cfb_inuse) {
8594 				mutex_exit(&pm_cfb_lock);
8595 				if (delay_sig(1) == EINTR) {
8596 					ret = DDI_FAILURE;
8597 					*iresp = DDI_FAILURE;
8598 					*errnop = EINTR;
8599 					goto post_notify;
8600 				}
8601 				mutex_enter(&pm_cfb_lock);
8602 			}
8603 			mutex_exit(&pm_cfb_lock);
8604 		}
8605 		break;
8606 	case PM_LEVEL_DOWNONLY:
8607 		/* Powering down */
8608 		thresh = cur_threshold(dip, comp);
8609 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8610 		if (scan && ((PM_KUC(dip) != 0) ||
8611 		    (cp->pmc_busycount > 0) ||
8612 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8613 #ifdef	DEBUG
8614 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8615 				PMD(PMD_SET, ("%s: scan failed: "
8616 				    "kidsupcnt != 0\n", pmf))
8617 			if (cp->pmc_busycount > 0)
8618 				PMD(PMD_SET, ("%s: scan failed: "
8619 				    "device become busy\n", pmf))
8620 			if (idletime < thresh)
8621 				PMD(PMD_SET, ("%s: scan failed: device "
8622 				    "hasn't been idle long enough\n", pmf))
8623 #endif
8624 			*iresp = DDI_FAILURE;
8625 			*errnop = EBUSY;
8626 			ret = DDI_FAILURE;
8627 			goto post_notify;
8628 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8629 			PMD(PMD_SET, ("%s: current level is already at "
8630 			    "or below the requested level.\n", pmf))
8631 			*iresp = DDI_SUCCESS;
8632 			ret = DDI_SUCCESS;
8633 			goto post_notify;
8634 		}
8635 		break;
8636 	}
8637 
8638 	if (PM_IS_CFB(dip) && (comps_off_incr =
8639 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8640 		/*
8641 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8642 		 * component from full power.  Remember that we tried to
8643 		 * lower power in case it fails and we need to back out
8644 		 * the adjustment.
8645 		 */
8646 		update_comps_off(comps_off_incr, dip);
8647 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8648 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8649 		    pm_cfb_comps_off))
8650 	}
8651 
8652 	if ((*iresp = power_dev(dip,
8653 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8654 #ifdef DEBUG
8655 		/*
8656 		 * All descendents of this node should already be powered off.
8657 		 */
8658 		if (PM_CURPOWER(dip, comp) == 0) {
8659 			pm_desc_pwrchk_t pdpchk;
8660 			pdpchk.pdpc_dip = dip;
8661 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8662 			ndi_devi_enter(dip, &circ_db);
8663 			for (cdip = ddi_get_child(dip); cdip != NULL;
8664 			    cdip = ddi_get_next_sibling(cdip)) {
8665 				ndi_devi_enter(cdip, &ccirc_db);
8666 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8667 				    (void *)&pdpchk);
8668 				ndi_devi_exit(cdip, ccirc_db);
8669 			}
8670 			ndi_devi_exit(dip, circ_db);
8671 		}
8672 #endif
8673 		/*
8674 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8675 		 * back up to full power.
8676 		 */
8677 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8678 			update_comps_off(comps_off_incr, dip);
8679 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8680 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8681 			    comp, clevel, nlevel, pm_cfb_comps_off))
8682 		}
8683 		dodeps = 0;
8684 		if (POWERING_OFF(clevel, nlevel)) {
8685 			if (PM_ISBC(dip)) {
8686 				dodeps = (comp == 0);
8687 			} else {
8688 				int i;
8689 				dodeps = 1;
8690 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8691 					/* if some component still on */
8692 					if (PM_CURPOWER(dip, i)) {
8693 						dodeps = 0;
8694 						break;
8695 					}
8696 				}
8697 			}
8698 			if (dodeps)
8699 				work_type = PM_DEP_WK_POWER_OFF;
8700 		} else if (POWERING_ON(clevel, nlevel)) {
8701 			if (PM_ISBC(dip)) {
8702 				dodeps = (comp == 0);
8703 			} else {
8704 				int i;
8705 				dodeps = 1;
8706 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8707 					if (i == comp)
8708 						continue;
8709 					if (PM_CURPOWER(dip, i) > 0) {
8710 						dodeps = 0;
8711 						break;
8712 					}
8713 				}
8714 			}
8715 			if (dodeps)
8716 				work_type = PM_DEP_WK_POWER_ON;
8717 		}
8718 
8719 		if (dodeps) {
8720 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8721 
8722 			(void) ddi_pathname(dip, pathbuf);
8723 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8724 			    PM_DEP_NOWAIT, NULL, 0);
8725 			kmem_free(pathbuf, MAXPATHLEN);
8726 		}
8727 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8728 			int old;
8729 
8730 			/* If old power cached during deadlock, use it. */
8731 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8732 			    cp->pmc_phc_pwr : olevel);
8733 			mutex_enter(&pm_rsvp_lock);
8734 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8735 			    old, canblock);
8736 			pm_enqueue_notify_others(&devl, canblock);
8737 			mutex_exit(&pm_rsvp_lock);
8738 		}
8739 
8740 		/*
8741 		 * If we are coming from a scan, don't do it again,
8742 		 * else we can have infinite loops.
8743 		 */
8744 		if (!scan)
8745 			pm_rescan(dip);
8746 	} else {
8747 		/* if we incremented pm_comps_off_count, but failed */
8748 		if (comps_off_incr > 0) {
8749 			update_comps_off(-comps_off_incr, dip);
8750 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8751 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8752 			    comp, clevel, nlevel, pm_cfb_comps_off))
8753 		}
8754 		*errnop = EIO;
8755 	}
8756 
8757 post_notify:
8758 	/*
8759 	 * This thread may have been in deadlock with pm_power_has_changed.
8760 	 * Before releasing power lock, clear the flag which marks this
8761 	 * condition.
8762 	 */
8763 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8764 
8765 	/*
8766 	 * Update the old power level in the bus power structure with the
8767 	 * actual power level before the transition was made to the new level.
8768 	 * Some involved parents depend on this information to keep track of
8769 	 * their children's power transition.
8770 	 */
8771 	if (*iresp != DDI_FAILURE)
8772 		bpc->bpc_olevel = clevel;
8773 
8774 	if (PM_WANTS_NOTIFICATION(pdip)) {
8775 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8776 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8777 		PM_UNLOCK_POWER(dip, circ);
8778 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8779 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8780 		    PM_DEVICE(dip), ret))
8781 	} else {
8782 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8783 		PM_UNLOCK_POWER(dip, circ);
8784 		/*
8785 		 * Now that we know what power transition has occurred
8786 		 * (if any), release the power hold.  Leave the hold
8787 		 * in effect in the case of OFF->ON transition.
8788 		 */
8789 		if (!(clevel == 0 && nlevel > 0 &&
8790 		    (!PM_ISBC(dip) || comp == 0)))
8791 			pm_rele_power(pdip);
8792 		/*
8793 		 * If the power transition was an ON->OFF transition,
8794 		 * remove the power hold from the parent.
8795 		 */
8796 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8797 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8798 			pm_rele_power(pdip);
8799 	}
8800 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8801 		return (DDI_FAILURE);
8802 	else
8803 		return (DDI_SUCCESS);
8804 }
8805 
8806 /*
8807  * If an app (SunVTS or Xsun) has taken control, then block until it
8808  * gives it up or makes the requested power level change, unless
8809  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8810  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8811  */
8812 static int
8813 pm_busop_match_request(dev_info_t *dip, void *arg)
8814 {
8815 	PMD_FUNC(pmf, "bp_match_request")
8816 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8817 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8818 	int comp = bpc->bpc_comp;
8819 	int nlevel = bpc->bpc_nlevel;
8820 	pm_canblock_t canblock = pspm->pspm_canblock;
8821 	int direction = pspm->pspm_direction;
8822 	int clevel, circ;
8823 
8824 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8825 	PM_LOCK_POWER(dip, &circ);
8826 	clevel = PM_CURPOWER(dip, comp);
8827 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
8828 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
8829 	if (direction == PM_LEVEL_UPONLY) {
8830 		if (clevel >= nlevel) {
8831 			PM_UNLOCK_POWER(dip, circ);
8832 			PM_UNLOCK_DIP(dip);
8833 			return (DDI_SUCCESS);
8834 		}
8835 	} else if (clevel == nlevel) {
8836 		PM_UNLOCK_POWER(dip, circ);
8837 		PM_UNLOCK_DIP(dip);
8838 		return (DDI_SUCCESS);
8839 	}
8840 	if (canblock == PM_CANBLOCK_FAIL) {
8841 		PM_UNLOCK_POWER(dip, circ);
8842 		PM_UNLOCK_DIP(dip);
8843 		return (DDI_FAILURE);
8844 	}
8845 	if (canblock == PM_CANBLOCK_BLOCK) {
8846 		/*
8847 		 * To avoid a deadlock, we must not hold the
8848 		 * power lock when we pm_block.
8849 		 */
8850 		PM_UNLOCK_POWER(dip, circ);
8851 		PMD(PMD_SET, ("%s: blocking\n", pmf))
8852 		    /* pm_block releases dip lock */
8853 		    switch (pm_block(dip, comp, nlevel, clevel)) {
8854 		    case PMP_RELEASE:
8855 				return (EAGAIN);
8856 		    case PMP_SUCCEED:
8857 				return (DDI_SUCCESS);
8858 		    case PMP_FAIL:
8859 				return (DDI_FAILURE);
8860 		    }
8861 	} else {
8862 		ASSERT(0);
8863 	}
8864 	_NOTE(NOTREACHED);
8865 	return (DDI_FAILURE);	/* keep gcc happy */
8866 }
8867 
8868 static int
8869 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
8870 {
8871 	PMD_FUNC(pmf, "all_to_normal_nexus")
8872 	int		*normal;
8873 	int		i, ncomps;
8874 	size_t		size;
8875 	int		changefailed = 0;
8876 	int		ret, result = DDI_SUCCESS;
8877 	pm_bp_nexus_pwrup_t	bpn;
8878 	pm_sp_misc_t	pspm;
8879 
8880 	ASSERT(PM_GET_PM_INFO(dip));
8881 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8882 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
8883 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
8884 		return (DDI_FAILURE);
8885 	}
8886 	ncomps = PM_NUMCMPTS(dip);
8887 	for (i = 0; i < ncomps; i++) {
8888 		bpn.bpn_dip = dip;
8889 		bpn.bpn_comp = i;
8890 		bpn.bpn_level = normal[i];
8891 		pspm.pspm_canblock = canblock;
8892 		pspm.pspm_scan = 0;
8893 		bpn.bpn_private = &pspm;
8894 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
8895 		    (void *)&bpn, (void *)&result);
8896 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
8897 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
8898 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
8899 			    i, normal[i], result))
8900 			changefailed++;
8901 		}
8902 	}
8903 	kmem_free(normal, size);
8904 	if (changefailed) {
8905 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
8906 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
8907 		return (DDI_FAILURE);
8908 	}
8909 	return (DDI_SUCCESS);
8910 }
8911 
8912 int
8913 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
8914     dev_info_t *tdip)
8915 {
8916 	PMD_FUNC(pmf, "noinvol_update")
8917 	pm_bp_noinvol_t args;
8918 	int ret;
8919 	int result = DDI_SUCCESS;
8920 
8921 	args.bpni_path = path;
8922 	args.bpni_dip = tdip;
8923 	args.bpni_cmd = subcmd;
8924 	args.bpni_wasvolpmd = wasvolpmd;
8925 	args.bpni_volpmd = volpmd;
8926 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
8927 	    "volpmd %d wasvolpmd %d\n", pmf,
8928 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
8929 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
8930 	    &args, &result);
8931 	return (ret);
8932 }
8933 
8934 void
8935 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
8936 {
8937 	PMD_FUNC(pmf, "noinvol_update_node")
8938 
8939 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8940 	switch (req->bpni_cmd) {
8941 	case PM_BP_NOINVOL_ATTACH:
8942 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
8943 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8944 		    DEVI(dip)->devi_pm_noinvolpm,
8945 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8946 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8947 		PM_LOCK_DIP(dip);
8948 		DEVI(dip)->devi_pm_noinvolpm--;
8949 		if (req->bpni_wasvolpmd) {
8950 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
8951 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8952 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8953 			    DEVI(dip)->devi_pm_volpmd - 1))
8954 			if (DEVI(dip)->devi_pm_volpmd)
8955 				DEVI(dip)->devi_pm_volpmd--;
8956 		}
8957 		PM_UNLOCK_DIP(dip);
8958 		break;
8959 
8960 	case PM_BP_NOINVOL_DETACH:
8961 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
8962 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
8963 		    DEVI(dip)->devi_pm_noinvolpm,
8964 		    DEVI(dip)->devi_pm_noinvolpm + 1))
8965 		PM_LOCK_DIP(dip);
8966 		DEVI(dip)->devi_pm_noinvolpm++;
8967 		if (req->bpni_wasvolpmd) {
8968 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
8969 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8970 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8971 			    DEVI(dip)->devi_pm_volpmd + 1))
8972 			DEVI(dip)->devi_pm_volpmd++;
8973 		}
8974 		PM_UNLOCK_DIP(dip);
8975 		break;
8976 
8977 	case PM_BP_NOINVOL_REMDRV:
8978 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8979 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8980 		    DEVI(dip)->devi_pm_noinvolpm,
8981 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8982 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8983 		PM_LOCK_DIP(dip);
8984 		DEVI(dip)->devi_pm_noinvolpm--;
8985 		if (req->bpni_wasvolpmd) {
8986 			PMD(PMD_NOINVOL,
8987 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8988 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
8989 			    DEVI(dip)->devi_pm_volpmd,
8990 			    DEVI(dip)->devi_pm_volpmd - 1))
8991 			/*
8992 			 * A power up could come in between and
8993 			 * clear the volpmd, if that's the case,
8994 			 * volpmd would be clear.
8995 			 */
8996 			if (DEVI(dip)->devi_pm_volpmd)
8997 				DEVI(dip)->devi_pm_volpmd--;
8998 		}
8999 		PM_UNLOCK_DIP(dip);
9000 		break;
9001 
9002 	case PM_BP_NOINVOL_CFB:
9003 		PMD(PMD_NOINVOL,
9004 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9005 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9006 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9007 		PM_LOCK_DIP(dip);
9008 		DEVI(dip)->devi_pm_noinvolpm++;
9009 		PM_UNLOCK_DIP(dip);
9010 		break;
9011 
9012 	case PM_BP_NOINVOL_POWER:
9013 		PMD(PMD_NOINVOL,
9014 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9015 		    pmf, PM_DEVICE(dip),
9016 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9017 		    req->bpni_volpmd))
9018 		PM_LOCK_DIP(dip);
9019 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9020 		PM_UNLOCK_DIP(dip);
9021 		break;
9022 
9023 	default:
9024 		break;
9025 	}
9026 
9027 }
9028 
9029 #ifdef DEBUG
9030 static int
9031 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9032 {
9033 	PMD_FUNC(pmf, "desc_pwrchk")
9034 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9035 	pm_info_t *info = PM_GET_PM_INFO(dip);
9036 	int i, curpwr, ce_level;
9037 
9038 	if (!info)
9039 		return (DDI_WALK_CONTINUE);
9040 
9041 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9042 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9043 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9044 			continue;
9045 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9046 		    CE_WARN;
9047 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9048 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9049 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9050 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9051 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9052 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9053 	}
9054 	return (DDI_WALK_CONTINUE);
9055 }
9056 #endif
9057 
9058 /*
9059  * Record the fact that one thread is borrowing the lock on a device node.
9060  * Use is restricted to the case where the lending thread will block until
9061  * the borrowing thread (always curthread) completes.
9062  */
9063 void
9064 pm_borrow_lock(kthread_t *lender)
9065 {
9066 	lock_loan_t *prev = &lock_loan_head;
9067 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9068 
9069 	cur->pmlk_borrower = curthread;
9070 	cur->pmlk_lender = lender;
9071 	mutex_enter(&pm_loan_lock);
9072 	cur->pmlk_next = prev->pmlk_next;
9073 	prev->pmlk_next = cur;
9074 	mutex_exit(&pm_loan_lock);
9075 }
9076 
9077 /*
9078  * Return the borrowed lock.  A thread can borrow only one.
9079  */
9080 void
9081 pm_return_lock(void)
9082 {
9083 	lock_loan_t *cur;
9084 	lock_loan_t *prev = &lock_loan_head;
9085 
9086 	mutex_enter(&pm_loan_lock);
9087 	ASSERT(prev->pmlk_next != NULL);
9088 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9089 		if (cur->pmlk_borrower == curthread)
9090 			break;
9091 
9092 	ASSERT(cur != NULL);
9093 	prev->pmlk_next = cur->pmlk_next;
9094 	mutex_exit(&pm_loan_lock);
9095 	kmem_free(cur, sizeof (*cur));
9096 }
9097