xref: /titanic_51/usr/src/uts/common/os/sunpm.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sunpm.c builds sunpm.o	"power management framework"
31  *	kernel-resident power management code.  Implements power management
32  *	policy
33  *	Assumes: all backwards compat. device components wake up on &
34  *		 the pm_info pointer in dev_info is initially NULL
35  *
36  * PM - (device) Power Management
37  *
38  * Each device may have 0 or more components.  If a device has no components,
39  * then it can't be power managed.  Each component has 2 or more
40  * power states.
41  *
42  * "Backwards Compatible" (bc) devices:
43  * There are two different types of devices from the point of view of this
44  * code.  The original type, left over from the original PM implementation on
45  * the voyager platform are known in this code as "backwards compatible"
46  * devices (PM_ISBC(dip) returns true).
47  * They are recognized by the pm code by the lack of a pm-components property
48  * and a call made by the driver to pm_create_components(9F).
49  * For these devices, component 0 is special, and represents the power state
50  * of the device.  If component 0 is to be set to power level 0 (off), then
51  * the framework must first call into the driver's detach(9E) routine with
52  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
53  * After setting component 0 from 0 to a non-zero power level, a call must be
54  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
55  *
56  * Currently, the only way to get a bc device power managed is via a set of
57  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
58  *
59  * For non-bc devices, the driver describes the components by exporting a
60  * pm-components(9P) property that tells how many components there are,
61  * tells what each component's power state values are, and provides human
62  * readable strings (currently unused) for each component name and power state.
63  * Devices which export pm-components(9P) are automatically power managed
64  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
65  * after parsing power.conf(4)).
66  * For these devices, all components are considered independent of each other,
67  * and it is up to the driver to decide when a transition requires saving or
68  * restoring hardware state.
69  *
70  * Each device component also has a threshold time associated with each power
71  * transition (see power.conf(4)), and a busy/idle state maintained by the
72  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
73  * Components are created idle.
74  *
75  * The PM framework provides several functions:
76  * -implement PM policy as described in power.conf(4)
77  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
78  *  Policies consist of:
79  *    -set threshold values (defaults if none provided by pmconfig)
80  *    -set dependencies among devices
81  *    -enable/disable autopm
82  *    -turn down idle components based on thresholds (if autopm is enabled)
83  *     (aka scanning)
84  *    -maintain power states based on dependencies among devices
85  *    -upon request, or when the frame buffer powers off, attempt to turn off
86  *     all components that are idle or become idle over the next (10 sec)
87  *     period in an attempt to get down to an EnergyStar compliant state
88  *    -prevent powering off of a device which exported the
89  *     pm-no-involuntary-power-cycles property without active involvement of
90  *     the device's driver (so no removing power when the device driver is
91  *     not attached)
92  * -provide a mechanism for a device driver to request that a device's component
93  *  be brought back to the power level necessary for the use of the device
94  * -allow a process to directly control the power levels of device components
95  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
96  * -ensure that the console frame buffer is powered up before being referenced
97  *  via prom_printf() or other prom calls that might generate console output
98  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
99  * -provide "backwards compatible" behavior for devices without pm-components
100  *  property
101  *
102  * Scanning:
103  * Whenever autopm is enabled, the framework attempts to bring each component
104  * of each device to its lowest power based on the threshold of idleness
105  * associated with each transition and the busy/idle state of the component.
106  *
107  * The actual work of this is done by pm_scan_dev(), which cycles through each
108  * component of a device, checking its idleness against its current threshold,
109  * and calling pm_set_power() as appropriate to change the power level.
110  * This function also indicates when it would next be profitable to scan the
111  * device again, and a new scan is scheduled after that time.
112  *
113  * Dependencies:
114  * It is possible to establish a dependency between the power states of two
115  * otherwise unrelated devices.  This is currently done to ensure that the
116  * cdrom is always up whenever the console framebuffer is up, so that the user
117  * can insert a cdrom and see a popup as a result.
118  *
119  * The dependency terminology used in power.conf(4) is not easy to understand,
120  * so we've adopted a different terminology in the implementation.  We write
121  * of a "keeps up" and a "kept up" device.  A relationship can be established
122  * where one device keeps up another.  That means that if the keepsup device
123  * has any component that is at a non-zero power level, all components of the
124  * "kept up" device must be brought to full power.  This relationship is
125  * asynchronous.  When the keeping device is powered up, a request is queued
126  * to a worker thread to bring up the kept device.  The caller does not wait.
127  * Scan will not turn down a kept up device.
128  *
129  * Direct PM:
130  * A device may be directly power managed by a process.  If a device is
131  * directly pm'd, then it will not be scanned, and dependencies will not be
132  * enforced.  * If a directly pm'd device's driver requests a power change (via
133  * pm_raise_power(9F)), then the request is blocked and notification is sent
134  * to the controlling process, which must issue the requested power change for
135  * the driver to proceed.
136  *
137  */
138 
139 #include <sys/types.h>
140 #include <sys/errno.h>
141 #include <sys/callb.h>		/* callback registration during CPR */
142 #include <sys/conf.h>		/* driver flags and functions */
143 #include <sys/open.h>		/* OTYP_CHR definition */
144 #include <sys/stat.h>		/* S_IFCHR definition */
145 #include <sys/pathname.h>	/* name -> dev_info xlation */
146 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
147 #include <sys/kmem.h>		/* memory alloc stuff */
148 #include <sys/debug.h>
149 #include <sys/archsystm.h>
150 #include <sys/pm.h>
151 #include <sys/ddi.h>
152 #include <sys/sunddi.h>
153 #include <sys/sunndi.h>
154 #include <sys/sunpm.h>
155 #include <sys/epm.h>
156 #include <sys/vfs.h>
157 #include <sys/mode.h>
158 #include <sys/mkdev.h>
159 #include <sys/promif.h>
160 #include <sys/consdev.h>
161 #include <sys/esunddi.h>
162 #include <sys/modctl.h>
163 #include <sys/fs/ufs_fs.h>
164 #include <sys/note.h>
165 #include <sys/taskq.h>
166 #include <sys/bootconf.h>
167 #include <sys/reboot.h>
168 #include <sys/spl.h>
169 #include <sys/disp.h>
170 #include <sys/sobject.h>
171 #include <sys/sunmdi.h>
172 
173 
174 /*
175  * PM LOCKING
176  *	The list of locks:
177  * Global pm mutex locks.
178  *
179  * pm_scan_lock:
180  *		It protects the timeout id of the scan thread, and the value
181  *		of autopm_enabled.  This lock is not held concurrently with
182  *		any other PM locks.
183  *
184  * pm_clone_lock:	Protects the clone list and count of poll events
185  *		pending for the pm driver.
186  *		Lock ordering:
187  *			pm_clone_lock -> pm_pscc_interest_rwlock,
188  *			pm_clone_lock -> pm_pscc_direct_rwlock.
189  *
190  * pm_rsvp_lock:
191  *		Used to synchronize the data structures used for processes
192  *		to rendezvous with state change information when doing
193  *		direct PM.
194  *		Lock ordering:
195  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
196  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
197  *			pm_rsvp_lock -> pm_clone_lock.
198  *
199  * ppm_lock:	protects the list of registered ppm drivers
200  *		Lock ordering:
201  *			ppm_lock -> ppm driver unit_lock
202  *
203  * pm_compcnt_lock:
204  *		Protects count of components that are not at their lowest
205  *		power level.
206  *		Lock ordering:
207  *			pm_compcnt_lock -> ppm_lock.
208  *
209  * pm_dep_thread_lock:
210  *		Protects work list for pm_dep_thread.  Not taken concurrently
211  *		with any other pm lock.
212  *
213  * pm_remdrv_lock:
214  *		Serializes the operation of removing noinvol data structure
215  *		entries for a branch of the tree when a driver has been
216  *		removed from the system (modctl_rem_major).
217  *		Lock ordering:
218  *			pm_remdrv_lock -> pm_noinvol_rwlock.
219  *
220  * pm_cfb_lock: (High level spin lock)
221  *		Protects the count of how many components of the console
222  *		frame buffer are off (so we know if we have to bring up the
223  *		console as a result of a prom_printf, etc.
224  *		No other locks are taken while holding this lock.
225  *
226  * pm_loan_lock:
227  *		Protects the lock_loan list.  List is used to record that one
228  *		thread has acquired a power lock but has launched another thread
229  *		to complete its processing.  An entry in the list indicates that
230  *		the worker thread can borrow the lock held by the other thread,
231  *		which must block on the completion of the worker.  Use is
232  *		specific to module loading.
233  *		No other locks are taken while holding this lock.
234  *
235  * Global PM rwlocks
236  *
237  * pm_thresh_rwlock:
238  *		Protects the list of thresholds recorded for future use (when
239  *		devices attach).
240  *		Lock ordering:
241  *			pm_thresh_rwlock -> devi_pm_lock
242  *
243  * pm_noinvol_rwlock:
244  *		Protects list of detached nodes that had noinvol registered.
245  *		No other PM locks are taken while holding pm_noinvol_rwlock.
246  *
247  * pm_pscc_direct_rwlock:
248  *		Protects the list that maps devices being directly power
249  *		managed to the processes that manage them.
250  *		Lock ordering:
251  *			pm_pscc_direct_rwlock -> psce_lock
252  *
253  * pm_pscc_interest_rwlock;
254  *		Protects the list that maps state change events to processes
255  *		that want to know about them.
256  *		Lock ordering:
257  *			pm_pscc_interest_rwlock -> psce_lock
258  *
259  * per-dip locks:
260  *
261  * Each node has these per-dip locks, which are only used if the device is
262  * a candidate for power management (e.g. has pm components)
263  *
264  * devi_pm_lock:
265  *		Protects all power management state of the node except for
266  *		power level, which is protected by ndi_devi_enter().
267  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
268  *		Lock ordering:
269  *			devi_pm_lock -> pm_rsvp_lock,
270  *			devi_pm_lock -> pm_dep_thread_lock,
271  *			devi_pm_lock -> pm_noinvol_rwlock,
272  *			devi_pm_lock -> power lock
273  *
274  * power lock (ndi_devi_enter()):
275  *		Since changing power level is possibly a slow operation (30
276  *		seconds to spin up a disk drive), this is locked separately.
277  *		Since a call into the driver to change the power level of one
278  *		component may result in a call back into the framework to change
279  *		the power level of another, this lock allows re-entrancy by
280  *		the same thread (ndi_devi_enter is used for this because
281  *		the USB framework uses ndi_devi_enter in its power entry point,
282  *		and use of any other lock would produce a deadlock.
283  *
284  * devi_pm_busy_lock:
285  *		This lock protects the integrity of the busy count.  It is
286  *		only taken by pm_busy_component() and pm_idle_component and
287  *		some code that adjust the busy time after the timer gets set
288  *		up or after a CPR operation.  It is per-dip to keep from
289  *		single-threading all the disk drivers on a system.
290  *		It could be per component instead, but most devices have
291  *		only one component.
292  *		No other PM locks are taken while holding this lock.
293  *
294  */
295 
296 static int stdout_is_framebuffer;
297 static kmutex_t	e_pm_power_lock;
298 static kmutex_t pm_loan_lock;
299 kmutex_t	pm_scan_lock;
300 callb_id_t	pm_cpr_cb_id;
301 callb_id_t	pm_panic_cb_id;
302 callb_id_t	pm_halt_cb_id;
303 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
304 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
305 
306 clock_t pm_min_scan = PM_MIN_SCAN;
307 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
308 
309 static int pm_busop_set_power(dev_info_t *,
310     void *, pm_bus_power_op_t, void *, void *);
311 static int pm_busop_match_request(dev_info_t *, void *);
312 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
313 
314 /*
315  * Dependency Processing is done thru a seperate thread.
316  */
317 kmutex_t	pm_dep_thread_lock;
318 kcondvar_t	pm_dep_thread_cv;
319 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
320 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
321 
322 /*
323  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
324  * power managing things in single user mode that have been suppressed via
325  * power.conf entries.  Protected by pm_scan_lock.
326  */
327 int		autopm_enabled;
328 
329 /*
330  * This flag is true while processes are stopped for a checkpoint/resume.
331  * Controlling processes of direct pm'd devices are not available to
332  * participate in power level changes, so we bypass them when this is set.
333  */
334 static int	pm_processes_stopped;
335 
336 #ifdef	DEBUG
337 
338 /*
339  * see common/sys/epm.h for PMD_* values
340  */
341 uint_t		pm_debug = 0;
342 
343 /*
344  * If pm_divertdebug is set, then no prom_printf calls will be made by
345  * PMD(), which will prevent debug output from bringing up the console
346  * frame buffer.  Clearing this variable before setting pm_debug will result
347  * in PMD output going to the console.
348  *
349  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
350  * deadlocks and decremented at the end of pm_set_power()
351  */
352 uint_t		pm_divertdebug = 1;
353 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
354 
355 void prdeps(char *);
356 #endif
357 
358 /* Globals */
359 
360 /*
361  * List of recorded thresholds and dependencies
362  */
363 pm_thresh_rec_t *pm_thresh_head;
364 krwlock_t pm_thresh_rwlock;
365 
366 pm_pdr_t *pm_dep_head;
367 static int pm_unresolved_deps = 0;
368 static int pm_prop_deps = 0;
369 
370 /*
371  * List of devices that exported no-involuntary-power-cycles property
372  */
373 pm_noinvol_t *pm_noinvol_head;
374 
375 /*
376  * Locks used in noinvol processing
377  */
378 krwlock_t pm_noinvol_rwlock;
379 kmutex_t pm_remdrv_lock;
380 
381 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
382 int pm_system_idle_threshold;
383 /*
384  * By default nexus has 0 threshold, and depends on its children to keep it up
385  */
386 int pm_default_nexus_threshold = 0;
387 
388 /*
389  * Data structures shared with common/io/pm.c
390  */
391 kmutex_t	pm_clone_lock;
392 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
393 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
394 unsigned char	pm_interest[PM_MAX_CLONE];
395 struct pollhead	pm_pollhead;
396 
397 extern int	hz;
398 extern char	*platform_module_list[];
399 
400 /*
401  * Wrappers for use in ddi_walk_devs
402  */
403 
404 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
405 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
406 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
407 static int		pm_discard_dep_walk(dev_info_t *, void *);
408 #ifdef DEBUG
409 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
410 #endif
411 
412 /*
413  * Routines for managing noinvol devices
414  */
415 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
416 void			pm_noinvol_update_node(dev_info_t *,
417 			    pm_bp_noinvol_t *req);
418 
419 kmutex_t pm_rsvp_lock;
420 kmutex_t pm_compcnt_lock;
421 krwlock_t pm_pscc_direct_rwlock;
422 krwlock_t pm_pscc_interest_rwlock;
423 
424 #define	PSC_INTEREST	0	/* belongs to interest psc list */
425 #define	PSC_DIRECT	1	/* belongs to direct psc list */
426 
427 pscc_t *pm_pscc_interest;
428 pscc_t *pm_pscc_direct;
429 
430 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
431 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
432 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
433 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
434 #define	PPM(dip) ((dev_info_t *)DEVI(dip)->devi_pm_ppm)
435 
436 #define	PM_INCR_NOTLOWEST(dip) {					\
437 	mutex_enter(&pm_compcnt_lock);					\
438 	if (!PM_IS_NEXUS(dip) ||					\
439 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
440 		if (pm_comps_notlowest == 0)				\
441 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
442 		pm_comps_notlowest++;					\
443 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
444 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
445 	}								\
446 	mutex_exit(&pm_compcnt_lock);					\
447 }
448 #define	PM_DECR_NOTLOWEST(dip) {					\
449 	mutex_enter(&pm_compcnt_lock);					\
450 	if (!PM_IS_NEXUS(dip) ||					\
451 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
452 		ASSERT(pm_comps_notlowest);				\
453 		pm_comps_notlowest--;					\
454 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
455 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
456 		if (pm_comps_notlowest == 0)				\
457 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
458 	}								\
459 	mutex_exit(&pm_compcnt_lock);					\
460 }
461 
462 /*
463  * console frame-buffer power-management is not enabled when
464  * debugging services are present.  to override, set pm_cfb_override
465  * to non-zero.
466  */
467 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
468 kmutex_t pm_cfb_lock;
469 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
470 #ifdef DEBUG
471 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
472 #else
473 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
474 #endif
475 
476 static dev_info_t *cfb_dip = 0;
477 static dev_info_t *cfb_dip_detaching = 0;
478 uint_t cfb_inuse = 0;
479 static ddi_softintr_t pm_soft_id;
480 static clock_t pm_soft_pending;
481 int	pm_scans_disabled = 0;
482 
483 /*
484  * A structure to record the fact that one thread has borrowed a lock held
485  * by another thread.  The context requires that the lender block on the
486  * completion of the borrower.
487  */
488 typedef struct lock_loan {
489 	struct lock_loan	*pmlk_next;
490 	kthread_t		*pmlk_borrower;
491 	kthread_t		*pmlk_lender;
492 	dev_info_t		*pmlk_dip;
493 } lock_loan_t;
494 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
495 
496 #ifdef	DEBUG
497 #define	PMD_FUNC(func, name)	char *(func) = (name);
498 #else
499 #define	PMD_FUNC(func, name)
500 #endif
501 
502 
503 /*
504  * Must be called before first device (including pseudo) attach
505  */
506 void
507 pm_init_locks(void)
508 {
509 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
510 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
511 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
512 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
513 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
514 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
515 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
516 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
517 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
518 }
519 
520 static boolean_t
521 pm_cpr_callb(void *arg, int code)
522 {
523 	_NOTE(ARGUNUSED(arg))
524 	static int auto_save;
525 	static int pm_reset_timestamps(dev_info_t *, void *);
526 
527 	switch (code) {
528 	case CB_CODE_CPR_CHKPT:
529 		/*
530 		 * Cancel scan or wait for scan in progress to finish
531 		 * Other threads may be trying to restart the scan, so we
532 		 * have to keep at it unil it sticks
533 		 */
534 		mutex_enter(&pm_scan_lock);
535 		ASSERT(!pm_scans_disabled);
536 		pm_scans_disabled = 1;
537 		auto_save = autopm_enabled;
538 		autopm_enabled = 0;
539 		mutex_exit(&pm_scan_lock);
540 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
541 		break;
542 
543 	case CB_CODE_CPR_RESUME:
544 		ASSERT(!autopm_enabled);
545 		ASSERT(pm_scans_disabled);
546 		pm_scans_disabled = 0;
547 		/*
548 		 * Call pm_reset_timestamps to reset timestamps of each
549 		 * device to the time when the system is resumed so that their
550 		 * idleness can be re-calculated. That's to avoid devices from
551 		 * being powered down right after resume if the system was in
552 		 * suspended mode long enough.
553 		 */
554 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
555 
556 		autopm_enabled = auto_save;
557 		/*
558 		 * If there is any auto-pm device, get the scanning
559 		 * going. Otherwise don't bother.
560 		 */
561 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
562 		break;
563 	}
564 	return (B_TRUE);
565 }
566 
567 /*
568  * This callback routine is called when there is a system panic.  This function
569  * exists for prototype matching.
570  */
571 static boolean_t
572 pm_panic_callb(void *arg, int code)
573 {
574 	_NOTE(ARGUNUSED(arg, code))
575 	void pm_cfb_check_and_powerup(void);
576 	PMD(PMD_CFB, ("pm_panic_callb\n"))
577 	pm_cfb_check_and_powerup();
578 	return (B_TRUE);
579 }
580 
581 static boolean_t
582 pm_halt_callb(void *arg, int code)
583 {
584 	_NOTE(ARGUNUSED(arg, code))
585 	return (B_TRUE);	/* XXX for now */
586 }
587 
588 /*
589  * This needs to be called after the root and platform drivers are loaded
590  * and be single-threaded with respect to driver attach/detach
591  */
592 void
593 pm_init(void)
594 {
595 	PMD_FUNC(pmf, "pm_init")
596 	char **mod;
597 	extern pri_t minclsyspri;
598 	static void pm_dep_thread(void);
599 
600 	pm_comps_notlowest = 0;
601 	pm_system_idle_threshold = pm_default_idle_threshold;
602 
603 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
604 	    CB_CL_CPR_PM, "pm_cpr");
605 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
606 		    CB_CL_PANIC, "pm_panic");
607 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
608 		    CB_CL_HALT, "pm_halt");
609 
610 	/*
611 	 * Create a thread to do dependency processing.
612 	 */
613 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
614 	    TS_RUN, minclsyspri);
615 
616 	/*
617 	 * loadrootmodules already loaded these ppm drivers, now get them
618 	 * attached so they can claim the root drivers as they attach
619 	 */
620 	for (mod = platform_module_list; *mod; mod++) {
621 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
622 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
623 			    *mod);
624 		} else {
625 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
626 			    ddi_major_to_name(ddi_name_to_major(*mod))))
627 		}
628 	}
629 }
630 
631 /*
632  * pm_scan_init - create pm scan data structure.  Called (if autopm enabled)
633  * when device becomes power managed or after a failed detach and when autopm
634  * is started via PM_START_PM ioctl, and after a CPR resume to get all the
635  * devices scanning again.
636  */
637 void
638 pm_scan_init(dev_info_t *dip)
639 {
640 	PMD_FUNC(pmf, "scan_init")
641 	pm_scan_t	*scanp;
642 
643 	ASSERT(!PM_ISBC(dip));
644 
645 	PM_LOCK_DIP(dip);
646 	scanp = PM_GET_PM_SCAN(dip);
647 	if (!scanp) {
648 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
649 		    pmf, PM_DEVICE(dip)))
650 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
651 		DEVI(dip)->devi_pm_scan = scanp;
652 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
653 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
654 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
655 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
656 	}
657 	PM_UNLOCK_DIP(dip);
658 }
659 
660 /*
661  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
662  */
663 void
664 pm_scan_fini(dev_info_t *dip)
665 {
666 	PMD_FUNC(pmf, "scan_fini")
667 	pm_scan_t	*scanp;
668 
669 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
670 	ASSERT(!PM_ISBC(dip));
671 	PM_LOCK_DIP(dip);
672 	scanp = PM_GET_PM_SCAN(dip);
673 	if (!scanp) {
674 		PM_UNLOCK_DIP(dip);
675 		return;
676 	}
677 
678 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
679 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
680 
681 	kmem_free(scanp, sizeof (pm_scan_t));
682 	DEVI(dip)->devi_pm_scan = NULL;
683 	PM_UNLOCK_DIP(dip);
684 }
685 
686 /*
687  * Given a pointer to a component struct, return the current power level
688  * (struct contains index unless it is a continuous level).
689  * Located here in hopes of getting both this and dev_is_needed into the
690  * cache together
691  */
692 static int
693 cur_power(pm_component_t *cp)
694 {
695 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
696 		return (cp->pmc_cur_pwr);
697 
698 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
699 }
700 
701 static char *
702 pm_decode_direction(int direction)
703 {
704 	switch (direction) {
705 	case PM_LEVEL_UPONLY:
706 		return ("up");
707 
708 	case PM_LEVEL_EXACT:
709 		return ("exact");
710 
711 	case PM_LEVEL_DOWNONLY:
712 		return ("down");
713 
714 	default:
715 		return ("INVALID DIRECTION");
716 	}
717 	_NOTE(NOTREACHED);
718 	ASSERT(0);
719 }
720 
721 char *
722 pm_decode_op(pm_bus_power_op_t op)
723 {
724 	switch (op) {
725 	case BUS_POWER_CHILD_PWRCHG:
726 		return ("CHILD_PWRCHG");
727 	case BUS_POWER_NEXUS_PWRUP:
728 		return ("NEXUS_PWRUP");
729 	case BUS_POWER_PRE_NOTIFICATION:
730 		return ("PRE_NOTIFICATION");
731 	case BUS_POWER_POST_NOTIFICATION:
732 		return ("POST_NOTIFICATION");
733 	case BUS_POWER_HAS_CHANGED:
734 		return ("HAS_CHANGED");
735 	case BUS_POWER_NOINVOL:
736 		return ("NOINVOL");
737 	default:
738 		return ("UNKNOWN OP");
739 	}
740 	_NOTE(NOTREACHED);
741 	ASSERT(0);
742 }
743 
744 /*
745  * Returns true if level is a possible (valid) power level for component
746  */
747 int
748 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
749 {
750 	PMD_FUNC(pmf, "e_pm_valid_power")
751 	pm_component_t *cp = PM_CP(dip, cmpt);
752 	int i;
753 	int *ip = cp->pmc_comp.pmc_lvals;
754 	int limit = cp->pmc_comp.pmc_numlevels;
755 
756 	if (level < 0)
757 		return (0);
758 	for (i = 0; i < limit; i++) {
759 		if (level == *ip++)
760 			return (1);
761 	}
762 #ifdef DEBUG
763 	if (pm_debug & PMD_FAIL) {
764 		ip = cp->pmc_comp.pmc_lvals;
765 
766 		for (i = 0; i < limit; i++)
767 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
768 			    pmf, i, *ip++))
769 	}
770 #endif
771 	return (0);
772 }
773 
774 /*
775  * Returns true if device is pm'd (after calling pm_start if need be)
776  */
777 int
778 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
779 {
780 	pm_info_t *info;
781 	static int pm_start(dev_info_t *dip);
782 
783 	/*
784 	 * Check if the device is power managed if not.
785 	 * To make the common case (device is power managed already)
786 	 * fast, we check without the lock.  If device is not already
787 	 * power managed, then we take the lock and the long route through
788 	 * go get it managed.  Devices never go unmanaged until they
789 	 * detach.
790 	 */
791 	info = PM_GET_PM_INFO(dip);
792 	if (!info) {
793 		if (!DEVI_IS_ATTACHING(dip)) {
794 			return (0);
795 		}
796 		if (pm_start(dip) != DDI_SUCCESS) {
797 			return (0);
798 		}
799 		info = PM_GET_PM_INFO(dip);
800 	}
801 	ASSERT(info);
802 	if (infop != NULL)
803 		*infop = info;
804 	return (1);
805 }
806 
807 int
808 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
809 {
810 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
811 		if (cpp != NULL)
812 			*cpp = PM_CP(dip, cmpt);
813 		return (1);
814 	} else {
815 		return (0);
816 	}
817 }
818 
819 /*
820  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
821  */
822 static int
823 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
824 {
825 	PMD_FUNC(pmf, "din")
826 	pm_component_t *cp;
827 	char *pathbuf;
828 	int result;
829 
830 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
831 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
832 	    !e_pm_valid_power(dip, cmpt, level))
833 		return (DDI_FAILURE);
834 
835 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
836 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
837 	    level, cur_power(cp)))
838 
839 	if (pm_set_power(dip, cmpt, level,  direction,
840 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
841 		if (direction == PM_LEVEL_UPONLY) {
842 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
843 			(void) ddi_pathname(dip, pathbuf);
844 			cmn_err(CE_WARN, "Device %s failed to power up.",
845 			    pathbuf);
846 			kmem_free(pathbuf, MAXPATHLEN);
847 		}
848 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
849 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
850 		    pm_decode_direction(direction), level, result))
851 		return (DDI_FAILURE);
852 	}
853 
854 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
855 	    PM_DEVICE(dip)))
856 	pm_rescan(dip);
857 	return (DDI_SUCCESS);
858 }
859 
860 /*
861  * We can get multiple pm_rescan() threads, if one of them discovers
862  * that no scan is running at the moment, it kicks it into action.
863  * Otherwise, it tells the current scanning thread to scan again when
864  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
865  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
866  * thread at a time runs the pm_scan_dev() code.
867  */
868 void
869 pm_rescan(void *arg)
870 {
871 	PMD_FUNC(pmf, "rescan")
872 	dev_info_t	*dip = (dev_info_t *)arg;
873 	pm_info_t	*info;
874 	pm_scan_t	*scanp;
875 	timeout_id_t	scanid;
876 
877 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
878 	PM_LOCK_DIP(dip);
879 	info = PM_GET_PM_INFO(dip);
880 	scanp = PM_GET_PM_SCAN(dip);
881 	if (pm_scans_disabled || !autopm_enabled || !info || !scanp ||
882 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
883 		PM_UNLOCK_DIP(dip);
884 		return;
885 	}
886 	if (scanp->ps_scan_flags & PM_SCANNING) {
887 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
888 		PM_UNLOCK_DIP(dip);
889 		return;
890 	} else if (scanp->ps_scan_id) {
891 		scanid = scanp->ps_scan_id;
892 		scanp->ps_scan_id = 0;
893 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
894 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
895 		PM_UNLOCK_DIP(dip);
896 		(void) untimeout(scanid);
897 		PM_LOCK_DIP(dip);
898 	}
899 
900 	/*
901 	 * Dispatching pm_scan during attach time is risky due to the fact that
902 	 * attach might soon fail and dip dissolved, and panic may happen while
903 	 * attempting to stop scan. So schedule a pm_rescan instead.
904 	 * (Note that if either of the first two terms are true, taskq_dispatch
905 	 * will not be invoked).
906 	 *
907 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
908 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
909 	 * to regulate the dispatching.
910 	 *
911 	 * Scan is stopped before the device is detached (in pm_detaching())
912 	 * but it may get re-started during the post_detach processing if the
913 	 * driver fails to detach.
914 	 */
915 	if (DEVI_IS_ATTACHING(dip) ||
916 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
917 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
918 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
919 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
920 		if (scanp->ps_scan_id) {
921 			scanid = scanp->ps_scan_id;
922 			scanp->ps_scan_id = 0;
923 			PM_UNLOCK_DIP(dip);
924 			(void) untimeout(scanid);
925 			PM_LOCK_DIP(dip);
926 			if (scanp->ps_scan_id) {
927 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
928 				    "thread scheduled pm_rescan, scanid %lx\n",
929 				    pmf, PM_DEVICE(dip),
930 				    (ulong_t)scanp->ps_scan_id))
931 				PM_UNLOCK_DIP(dip);
932 				return;
933 			}
934 		}
935 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
936 		    (scanp->ps_idle_down ? pm_id_ticks :
937 		    (pm_min_scan * hz)));
938 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
939 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
940 		    (ulong_t)scanp->ps_scan_id))
941 	} else {
942 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
943 		    pmf, PM_DEVICE(dip)))
944 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
945 	}
946 	PM_UNLOCK_DIP(dip);
947 }
948 
949 void
950 pm_scan(void *arg)
951 {
952 	PMD_FUNC(pmf, "scan")
953 	dev_info_t	*dip = (dev_info_t *)arg;
954 	pm_scan_t	*scanp;
955 	time_t		nextscan;
956 
957 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
958 
959 	PM_LOCK_DIP(dip);
960 	scanp = PM_GET_PM_SCAN(dip);
961 	ASSERT(scanp && PM_GET_PM_INFO(dip));
962 
963 	if (pm_scans_disabled || !autopm_enabled ||
964 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
965 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
966 		PM_UNLOCK_DIP(dip);
967 		return;
968 	}
969 
970 	if (scanp->ps_idle_down) {
971 		/*
972 		 * make sure we remember idledown was in affect until
973 		 * we've completed the scan
974 		 */
975 		PMID_SET_SCANS(scanp->ps_idle_down)
976 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
977 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
978 	}
979 
980 	/* possible having two threads running pm_scan() */
981 	if (scanp->ps_scan_flags & PM_SCANNING) {
982 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
983 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
984 		    pmf, PM_DEVICE(dip)))
985 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
986 		PM_UNLOCK_DIP(dip);
987 		return;
988 	}
989 
990 	scanp->ps_scan_flags |= PM_SCANNING;
991 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
992 	do {
993 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
994 		PM_UNLOCK_DIP(dip);
995 		nextscan = pm_scan_dev(dip);
996 		PM_LOCK_DIP(dip);
997 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
998 
999 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1000 	scanp->ps_scan_flags &= ~PM_SCANNING;
1001 
1002 	if (scanp->ps_idle_down) {
1003 		scanp->ps_idle_down &= ~PMID_SCANS;
1004 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1005 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1006 	}
1007 
1008 	/* schedule for next idle check */
1009 	if (nextscan != LONG_MAX) {
1010 		if (nextscan > (LONG_MAX / hz))
1011 			nextscan = (LONG_MAX - 1) / hz;
1012 		if (scanp->ps_scan_id) {
1013 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1014 			    "another rescan scheduled scanid(%lx)\n", pmf,
1015 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1016 			PM_UNLOCK_DIP(dip);
1017 			return;
1018 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1019 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1020 			    (clock_t)(nextscan * hz));
1021 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1022 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1023 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1024 		}
1025 	}
1026 	PM_UNLOCK_DIP(dip);
1027 }
1028 
1029 void
1030 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1031 {
1032 	int components = PM_NUMCMPTS(dip);
1033 	int i;
1034 
1035 	ASSERT(components > 0);
1036 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1037 	for (i = 0; i < components; i++) {
1038 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1039 	}
1040 	PM_UNLOCK_BUSY(dip);
1041 }
1042 
1043 /*
1044  * Returns true if device needs to be kept up because it exported the
1045  * "no-involuntary-power-cycles" property or we're pretending it did (console
1046  * fb case) or it is an ancestor of such a device and has used up the "one
1047  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1048  * upon detach
1049  */
1050 int
1051 pm_noinvol(dev_info_t *dip)
1052 {
1053 	PMD_FUNC(pmf, "noinvol")
1054 
1055 	/*
1056 	 * This doesn't change over the life of a driver, so no locking needed
1057 	 */
1058 	if (PM_IS_CFB(dip)) {
1059 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1060 		    pmf, PM_DEVICE(dip)))
1061 		return (1);
1062 	}
1063 	/*
1064 	 * Not an issue if no such kids
1065 	 */
1066 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1067 #ifdef DEBUG
1068 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1069 			dev_info_t *pdip = dip;
1070 			do {
1071 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1072 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1073 				    DEVI(pdip)->devi_pm_noinvolpm,
1074 				    DEVI(pdip)->devi_pm_volpmd))
1075 				pdip = ddi_get_parent(pdip);
1076 			} while (pdip);
1077 		}
1078 #endif
1079 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1080 		return (0);
1081 	}
1082 
1083 	/*
1084 	 * Since we now maintain the counts correct at every node, we no longer
1085 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1086 	 * without the children getting their counts adjusted.
1087 	 */
1088 
1089 #ifdef	DEBUG
1090 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1091 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1092 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1093 		    PM_DEVICE(dip)))
1094 #endif
1095 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1096 }
1097 
1098 /*
1099  * This function performs the actual scanning of the device.
1100  * It attempts to power off the indicated device's components if they have
1101  * been idle and other restrictions are met.
1102  * pm_scan_dev calculates and returns when the next scan should happen for
1103  * this device.
1104  */
1105 time_t
1106 pm_scan_dev(dev_info_t *dip)
1107 {
1108 	PMD_FUNC(pmf, "scan_dev")
1109 	pm_scan_t	*scanp;
1110 	time_t		*timestamp, idletime, now, thresh;
1111 	time_t		timeleft = 0;
1112 	int		i, nxtpwr, curpwr, pwrndx, unused;
1113 	size_t		size;
1114 	pm_component_t	 *cp;
1115 	dev_info_t	*pdip = ddi_get_parent(dip);
1116 	int		circ;
1117 	static int	cur_threshold(dev_info_t *, int);
1118 	static int	pm_next_lower_power(pm_component_t *, int);
1119 
1120 	/*
1121 	 * skip attaching device
1122 	 */
1123 	if (DEVI_IS_ATTACHING(dip)) {
1124 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1125 		    pmf, PM_DEVICE(dip), pm_min_scan))
1126 		return (pm_min_scan);
1127 	}
1128 
1129 	PM_LOCK_DIP(dip);
1130 	scanp = PM_GET_PM_SCAN(dip);
1131 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1132 
1133 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1134 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1135 	    PM_KUC(dip)))
1136 
1137 	/* no scan under the following conditions */
1138 	if (pm_scans_disabled || !autopm_enabled ||
1139 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1140 	    (PM_KUC(dip) != 0) ||
1141 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1142 		PM_UNLOCK_DIP(dip);
1143 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1144 		    "scan_disabled(%d), apm_enabled(%d), kuc(%d), "
1145 		    "%s directpm, %s pm_noinvol\n", pmf, PM_DEVICE(dip),
1146 		    pm_scans_disabled, autopm_enabled, PM_KUC(dip),
1147 		    PM_ISDIRECT(dip) ? "is" : "is not",
1148 		    pm_noinvol(dip) ? "is" : "is not"))
1149 		return (LONG_MAX);
1150 	}
1151 	PM_UNLOCK_DIP(dip);
1152 
1153 	if (!ndi_devi_tryenter(pdip, &circ)) {
1154 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1155 		    pmf, PM_DEVICE(pdip)))
1156 		return ((time_t)1);
1157 	}
1158 	now = gethrestime_sec();
1159 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1160 	timestamp = kmem_alloc(size, KM_SLEEP);
1161 	pm_get_timestamps(dip, timestamp);
1162 
1163 	/*
1164 	 * Since we removed support for backwards compatible devices,
1165 	 * (see big comment at top of file)
1166 	 * it is no longer required to deal with component 0 last.
1167 	 */
1168 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1169 		/*
1170 		 * If already off (an optimization, perhaps)
1171 		 */
1172 		cp = PM_CP(dip, i);
1173 		pwrndx = cp->pmc_cur_pwr;
1174 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1175 		    PM_LEVEL_UNKNOWN :
1176 		    cp->pmc_comp.pmc_lvals[pwrndx];
1177 
1178 		if (pwrndx == 0) {
1179 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1180 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1181 			/* skip device if off or at its lowest */
1182 			continue;
1183 		}
1184 
1185 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1186 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1187 			/* were busy or newly became busy by another thread */
1188 			if (timeleft == 0)
1189 				timeleft = max(thresh, pm_min_scan);
1190 			else
1191 				timeleft = min(
1192 				    timeleft, max(thresh, pm_min_scan));
1193 			continue;
1194 		}
1195 
1196 		idletime = now - timestamp[i];		/* idle time */
1197 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1198 		    pmf, PM_DEVICE(dip), i, idletime))
1199 		if (idletime >= thresh || PM_IS_PID(dip)) {
1200 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1201 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1202 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1203 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1204 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1205 			    PM_CURPOWER(dip, i) != nxtpwr) {
1206 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1207 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1208 				    i, curpwr, nxtpwr))
1209 				timeleft = pm_min_scan;
1210 				continue;
1211 			} else {
1212 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1213 				    "%d->%d, GOOD curpwr %d\n", pmf,
1214 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1215 				    cur_power(cp)))
1216 
1217 				if (nxtpwr == 0)	/* component went off */
1218 					continue;
1219 
1220 				/*
1221 				 * scan to next lower level
1222 				 */
1223 				if (timeleft == 0)
1224 					timeleft = max(
1225 					    1, cur_threshold(dip, i));
1226 				else
1227 					timeleft = min(timeleft,
1228 					    max(1, cur_threshold(dip, i)));
1229 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1230 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1231 				    i, timeleft))
1232 			}
1233 		} else {	/* comp not idle long enough */
1234 			if (timeleft == 0)
1235 				timeleft = thresh - idletime;
1236 			else
1237 				timeleft = min(timeleft, (thresh - idletime));
1238 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1239 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1240 		}
1241 	}
1242 	ndi_devi_exit(pdip, circ);
1243 	kmem_free(timestamp, size);
1244 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1245 	    PM_DEVICE(dip), timeleft))
1246 
1247 	/*
1248 	 * if components are already at lowest level, timeleft is left 0
1249 	 */
1250 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1251 }
1252 
1253 /*
1254  * pm_scan_stop - cancel scheduled pm_rescan,
1255  *                wait for termination of dispatched pm_scan thread
1256  *                     and active pm_scan_dev thread.
1257  */
1258 void
1259 pm_scan_stop(dev_info_t *dip)
1260 {
1261 	PMD_FUNC(pmf, "scan_stop")
1262 	pm_scan_t	*scanp;
1263 	timeout_id_t	scanid;
1264 
1265 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1266 	PM_LOCK_DIP(dip);
1267 	scanp = PM_GET_PM_SCAN(dip);
1268 	if (!scanp) {
1269 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1270 		    pmf, PM_DEVICE(dip)))
1271 		PM_UNLOCK_DIP(dip);
1272 		return;
1273 	}
1274 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1275 
1276 	/* cancel scheduled scan taskq */
1277 	while (scanp->ps_scan_id) {
1278 		scanid = scanp->ps_scan_id;
1279 		scanp->ps_scan_id = 0;
1280 		PM_UNLOCK_DIP(dip);
1281 		(void) untimeout(scanid);
1282 		PM_LOCK_DIP(dip);
1283 	}
1284 
1285 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1286 		PM_UNLOCK_DIP(dip);
1287 		delay(1);
1288 		PM_LOCK_DIP(dip);
1289 	}
1290 	PM_UNLOCK_DIP(dip);
1291 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1292 }
1293 
1294 int
1295 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1296 {
1297 	_NOTE(ARGUNUSED(arg))
1298 
1299 	if (!PM_GET_PM_SCAN(dip))
1300 		return (DDI_WALK_CONTINUE);
1301 	ASSERT(!PM_ISBC(dip));
1302 	pm_scan_stop(dip);
1303 	return (DDI_WALK_CONTINUE);
1304 }
1305 
1306 /*
1307  * Converts a power level value to its index
1308  */
1309 static int
1310 power_val_to_index(pm_component_t *cp, int val)
1311 {
1312 	int limit, i, *ip;
1313 
1314 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1315 	    val != PM_LEVEL_EXACT);
1316 	/*  convert power value into index (i) */
1317 	limit = cp->pmc_comp.pmc_numlevels;
1318 	ip = cp->pmc_comp.pmc_lvals;
1319 	for (i = 0; i < limit; i++)
1320 		if (val == *ip++)
1321 			return (i);
1322 	return (-1);
1323 }
1324 
1325 /*
1326  * Converts a numeric power level to a printable string
1327  */
1328 static char *
1329 power_val_to_string(pm_component_t *cp, int val)
1330 {
1331 	int index;
1332 
1333 	if (val == PM_LEVEL_UPONLY)
1334 		return ("<UPONLY>");
1335 
1336 	if (val == PM_LEVEL_UNKNOWN ||
1337 	    (index = power_val_to_index(cp, val)) == -1)
1338 		return ("<LEVEL_UNKNOWN>");
1339 
1340 	return (cp->pmc_comp.pmc_lnames[index]);
1341 }
1342 
1343 /*
1344  * Return true if this node has been claimed by a ppm.
1345  */
1346 static int
1347 pm_ppm_claimed(dev_info_t *dip)
1348 {
1349 	return (PPM(dip) != NULL);
1350 }
1351 
1352 /*
1353  * A node which was voluntarily power managed has just used up its "free cycle"
1354  * and need is volpmd field cleared, and the same done to all its descendents
1355  */
1356 static void
1357 pm_clear_volpm_dip(dev_info_t *dip)
1358 {
1359 	PMD_FUNC(pmf, "clear_volpm_dip")
1360 
1361 	if (dip == NULL)
1362 		return;
1363 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1364 	    PM_DEVICE(dip)))
1365 	DEVI(dip)->devi_pm_volpmd = 0;
1366 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1367 		pm_clear_volpm_dip(dip);
1368 	}
1369 }
1370 
1371 /*
1372  * A node which was voluntarily power managed has used up the "free cycles"
1373  * for the subtree that it is the root of.  Scan through the list of detached
1374  * nodes and adjust the counts of any that are descendents of the node.
1375  */
1376 static void
1377 pm_clear_volpm_list(dev_info_t *dip)
1378 {
1379 	PMD_FUNC(pmf, "clear_volpm_list")
1380 	char	*pathbuf;
1381 	size_t	len;
1382 	pm_noinvol_t *ip;
1383 
1384 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1385 	(void) ddi_pathname(dip, pathbuf);
1386 	len = strlen(pathbuf);
1387 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1388 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1389 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1390 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1391 		    ip->ni_path))
1392 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1393 		    ip->ni_path[len] == '/') {
1394 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1395 			    ip->ni_path))
1396 			ip->ni_volpmd = 0;
1397 			ip->ni_wasvolpmd = 0;
1398 		}
1399 	}
1400 	kmem_free(pathbuf, MAXPATHLEN);
1401 	rw_exit(&pm_noinvol_rwlock);
1402 }
1403 
1404 /*
1405  * Powers a device, suspending or resuming the driver if it is a backward
1406  * compatible device, calling into ppm to change power level.
1407  * Called with the component's power lock held.
1408  */
1409 static int
1410 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1411     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1412 {
1413 	PMD_FUNC(pmf, "power_dev")
1414 	power_req_t power_req;
1415 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1416 	int		resume_needed = 0;
1417 	int		suspended = 0;
1418 	int		result;
1419 	struct pm_component *cp = PM_CP(dip, comp);
1420 	int		bc = PM_ISBC(dip);
1421 	int pm_all_components_off(dev_info_t *);
1422 	int		clearvolpmd = 0;
1423 	char		pathbuf[MAXNAMELEN];
1424 #ifdef DEBUG
1425 	char *ppmname, *ppmaddr;
1426 #endif
1427 	/*
1428 	 * If this is comp 0 of a backwards compat device and we are
1429 	 * going to take the power away, we need to detach it with
1430 	 * DDI_PM_SUSPEND command.
1431 	 */
1432 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1433 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1434 			/* We could not suspend before turning cmpt zero off */
1435 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1436 			    pmf, PM_DEVICE(dip)))
1437 			return (DDI_FAILURE);
1438 		} else {
1439 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1440 			suspended++;
1441 		}
1442 	}
1443 	power_req.request_type = PMR_PPM_SET_POWER;
1444 	power_req.req.ppm_set_power_req.who = dip;
1445 	power_req.req.ppm_set_power_req.cmpt = comp;
1446 	power_req.req.ppm_set_power_req.old_level = old_level;
1447 	power_req.req.ppm_set_power_req.new_level = level;
1448 	power_req.req.ppm_set_power_req.canblock = canblock;
1449 	power_req.req.ppm_set_power_req.cookie = NULL;
1450 #ifdef DEBUG
1451 	if (pm_ppm_claimed(dip)) {
1452 		ppmname = PM_NAME(PPM(dip));
1453 		ppmaddr = PM_ADDR(PPM(dip));
1454 
1455 	} else {
1456 		ppmname = "noppm";
1457 		ppmaddr = "0";
1458 	}
1459 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1460 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1461 	    power_val_to_string(cp, old_level), old_level,
1462 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1463 #endif
1464 	/*
1465 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1466 	 * bc device comp 0 is powering on, then we count it as a power cycle
1467 	 * against its voluntary count.
1468 	 */
1469 	if (DEVI(dip)->devi_pm_volpmd &&
1470 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1471 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1472 		clearvolpmd = 1;
1473 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1474 	    &power_req, &result)) == DDI_SUCCESS) {
1475 		/*
1476 		 * Now do involuntary pm accounting;  If we've just cycled power
1477 		 * on a voluntarily pm'd node, and by inference on its entire
1478 		 * subtree, we need to set the subtree (including those nodes
1479 		 * already detached) volpmd counts to 0, and subtract out the
1480 		 * value of the current node's volpmd count from the ancestors
1481 		 */
1482 		if (clearvolpmd) {
1483 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1484 			pm_clear_volpm_dip(dip);
1485 			pm_clear_volpm_list(dip);
1486 			if (volpmd) {
1487 				(void) ddi_pathname(dip, pathbuf);
1488 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1489 				    volpmd, 0, pathbuf, dip);
1490 			}
1491 		}
1492 	} else {
1493 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1494 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1495 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1496 	}
1497 	/*
1498 	 * If some other devices were also powered up (e.g. other cpus in
1499 	 * the same domain) return a pointer to that list
1500 	 */
1501 	if (devlist) {
1502 		*devlist = (pm_ppm_devlist_t *)
1503 		    power_req.req.ppm_set_power_req.cookie;
1504 	}
1505 	/*
1506 	 * We will have to resume the device if the device is backwards compat
1507 	 * device and either of the following is true:
1508 	 * -This is comp 0 and we have successfully powered it up
1509 	 * -This is comp 0 and we have failed to power it down. Resume is
1510 	 *  needed because we have suspended it above
1511 	 */
1512 
1513 	if (bc && comp == 0) {
1514 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1515 		if (power_op_ret == DDI_SUCCESS) {
1516 			if (POWERING_ON(old_level, level)) {
1517 				/*
1518 				 * It must be either suspended or resumed
1519 				 * via pm_power_has_changed path
1520 				 */
1521 				ASSERT((DEVI(dip)->devi_pm_flags &
1522 				    PMC_SUSPENDED) ||
1523 				    (PM_CP(dip, comp)->pmc_flags &
1524 				    PM_PHC_WHILE_SET_POWER));
1525 
1526 					resume_needed = suspended;
1527 			}
1528 		} else {
1529 			if (POWERING_OFF(old_level, level)) {
1530 				/*
1531 				 * It must be either suspended or resumed
1532 				 * via pm_power_has_changed path
1533 				 */
1534 				ASSERT((DEVI(dip)->devi_pm_flags &
1535 				    PMC_SUSPENDED) ||
1536 				    (PM_CP(dip, comp)->pmc_flags &
1537 				    PM_PHC_WHILE_SET_POWER));
1538 
1539 					resume_needed = suspended;
1540 			}
1541 		}
1542 	}
1543 	if (resume_needed) {
1544 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1545 		/* ppm is not interested in DDI_PM_RESUME */
1546 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1547 		    DDI_SUCCESS) {
1548 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1549 		} else
1550 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1551 			    PM_DEVICE(dip));
1552 	}
1553 	return (power_op_ret);
1554 }
1555 
1556 /*
1557  * Return true if we are the owner or a borrower of the devi lock.  See
1558  * pm_lock_power_single() about borrowing the lock.
1559  */
1560 static int
1561 pm_devi_lock_held(dev_info_t *dip)
1562 {
1563 	lock_loan_t *cur;
1564 
1565 	if (DEVI_BUSY_OWNED(dip))
1566 	    return (1);
1567 
1568 	/* return false if no locks borrowed */
1569 	if (lock_loan_head.pmlk_next == NULL)
1570 		return (0);
1571 
1572 	mutex_enter(&pm_loan_lock);
1573 	/* see if our thread is registered as a lock borrower. */
1574 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1575 		if (cur->pmlk_borrower == curthread)
1576 			break;
1577 	mutex_exit(&pm_loan_lock);
1578 
1579 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1580 }
1581 
1582 /*
1583  * pm_set_power: adjusts power level of device.	 Assumes device is power
1584  * manageable & component exists.
1585  *
1586  * Cases which require us to bring up devices we keep up ("wekeepups") for
1587  * backwards compatible devices:
1588  *	component 0 is off and we're bringing it up from 0
1589  *		bring up wekeepup first
1590  *	and recursively when component 0 is off and we bring some other
1591  *	component up from 0
1592  * For devices which are not backward compatible, our dependency notion is much
1593  * simpler.  Unless all components are off, then wekeeps must be on.
1594  * We don't treat component 0 differently.
1595  * Canblock tells how to deal with a direct pm'd device.
1596  * Scan arg tells us if we were called from scan, in which case we don't need
1597  * to go back to the root node and walk down to change power.
1598  */
1599 int
1600 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1601     pm_canblock_t canblock, int scan, int *retp)
1602 {
1603 	PMD_FUNC(pmf, "set_power")
1604 	char		*pathbuf;
1605 	pm_bp_child_pwrchg_t bpc;
1606 	pm_sp_misc_t	pspm;
1607 	int		ret = DDI_SUCCESS;
1608 	int		unused = DDI_SUCCESS;
1609 	dev_info_t	*pdip = ddi_get_parent(dip);
1610 
1611 #ifdef DEBUG
1612 	int		diverted = 0;
1613 
1614 	/*
1615 	 * This prevents operations on the console from calling prom_printf and
1616 	 * either deadlocking or bringing up the console because of debug
1617 	 * output
1618 	 */
1619 	if (dip == cfb_dip) {
1620 		diverted++;
1621 		mutex_enter(&pm_debug_lock);
1622 		pm_divertdebug++;
1623 		mutex_exit(&pm_debug_lock);
1624 	}
1625 #endif
1626 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1627 	    direction == PM_LEVEL_EXACT);
1628 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1629 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1630 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1631 	(void) ddi_pathname(dip, pathbuf);
1632 	bpc.bpc_dip = dip;
1633 	bpc.bpc_path = pathbuf;
1634 	bpc.bpc_comp = comp;
1635 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1636 	bpc.bpc_nlevel = level;
1637 	pspm.pspm_direction = direction;
1638 	pspm.pspm_errnop = retp;
1639 	pspm.pspm_canblock = canblock;
1640 	pspm.pspm_scan = scan;
1641 	bpc.bpc_private = &pspm;
1642 
1643 	/*
1644 	 * If a config operation is being done (we've locked the parent) or
1645 	 * we already hold the power lock (we've locked the node)
1646 	 * then we can operate directly on the node because we have already
1647 	 * brought up all the ancestors, otherwise, we have to go back to the
1648 	 * top of the tree.
1649 	 */
1650 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1651 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1652 		    (void *)&bpc, (void *)&unused);
1653 	else
1654 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1655 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1656 #ifdef DEBUG
1657 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1658 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1659 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1660 	}
1661 	if (diverted) {
1662 		mutex_enter(&pm_debug_lock);
1663 		pm_divertdebug--;
1664 		mutex_exit(&pm_debug_lock);
1665 	}
1666 #endif
1667 	kmem_free(pathbuf, MAXPATHLEN);
1668 	return (ret);
1669 }
1670 
1671 
1672 static dev_info_t *
1673 find_dip(dev_info_t *dip, char *dev_name, int holddip)
1674 {
1675 	PMD_FUNC(pmf, "find_dip")
1676 	dev_info_t	*cdip;
1677 	char		*child_dev, *addr;
1678 	char		*device;	/* writeable copy of path */
1679 	int		dev_len = strlen(dev_name)+1;
1680 	int		circ;
1681 
1682 	device = kmem_zalloc(dev_len, KM_SLEEP);
1683 	(void) strcpy(device, dev_name);
1684 	addr = strchr(device, '@');
1685 	child_dev = strchr(device, '/');
1686 	if ((addr != NULL) && (child_dev == NULL || addr < child_dev)) {
1687 		/*
1688 		 * We have device = "name@addr..." form
1689 		 */
1690 		*addr++ = '\0';			/* for strcmp (and skip '@') */
1691 		if (child_dev != NULL)
1692 			*child_dev++ = '\0';	/* for strcmp (and skip '/') */
1693 	} else {
1694 		/*
1695 		 * We have device = "name/..." or "name"
1696 		 */
1697 		addr = "";
1698 		if (child_dev != NULL)
1699 			*child_dev++ = '\0';	/* for strcmp (and skip '/') */
1700 	}
1701 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
1702 		if (strcmp(ddi_node_name(dip), device) == 0) {
1703 			/* If the driver isn't loaded, we prune the search */
1704 			if (i_ddi_node_state(dip) < DS_READY) {
1705 				continue;
1706 			}
1707 			if (strcmp(ddi_get_name_addr(dip), addr) == 0) {
1708 				PMD(PMD_NAMETODIP, ("%s: matched %s@%s"
1709 				    "(%s#%d)\n", pmf, PM_DEVICE(dip)))
1710 				if (child_dev != NULL) {
1711 					PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): "
1712 					    "held, call find_dip %s\n", pmf,
1713 					    PM_DEVICE(dip), child_dev))
1714 					ndi_devi_enter(dip, &circ);
1715 					cdip = dip;
1716 					dip = find_dip(ddi_get_child(dip),
1717 					    child_dev, holddip);
1718 					ndi_devi_exit(cdip, circ);
1719 					PMD(PMD_NAMETODIP, ("%s: %s@%s(%s#%d): "
1720 					    "release, find_dip rets %s\n", pmf,
1721 					    PM_DEVICE(cdip), child_dev))
1722 				} else {
1723 					if (holddip) {
1724 						e_ddi_hold_devi(dip);
1725 						PMD(PMD_DHR | PMD_NAMETODIP,
1726 						    ("%s: held %s@%s(%s#%d), "
1727 						    "refcnt=%d\n", pmf,
1728 						    PM_DEVICE(dip),
1729 						    e_ddi_devi_holdcnt(dip)))
1730 					}
1731 				}
1732 				kmem_free(device, dev_len);
1733 				return (dip);
1734 			}
1735 		}
1736 	}
1737 	kmem_free(device, dev_len);
1738 	return (dip);
1739 }
1740 
1741 /*
1742  * If holddip is set, then if a dip is found we return with the node held
1743  */
1744 dev_info_t *
1745 pm_name_to_dip(char *pathname, int holddip)
1746 {
1747 	PMD_FUNC(pmf, "name_to_dip")
1748 	dev_info_t	*dip = NULL;
1749 	char		dev_name[MAXNAMELEN];
1750 	dev_info_t	*first_child;
1751 	int		circular;
1752 
1753 	if (!pathname)
1754 		return (NULL);
1755 
1756 	(void) strncpy(dev_name, pathname, MAXNAMELEN);
1757 
1758 	PMD(PMD_NAMETODIP, ("%s: devname: %s\n", pmf, dev_name))
1759 
1760 	/*
1761 	 * First we attempt to match the node in the tree.  If we succeed
1762 	 * we hold the driver and look up the dip again.
1763 	 * No need to hold the root as that node is always held.
1764 	 */
1765 	if (dev_name[0] == '/') {
1766 		ndi_devi_enter(ddi_root_node(), &circular);
1767 		first_child = ddi_get_child(ddi_root_node());
1768 		dip = find_dip(first_child, dev_name + 1, holddip);
1769 		ndi_devi_exit(ddi_root_node(), circular);
1770 
1771 	} else {
1772 		PMD(PMD_NAMETODIP, ("%s: physpath with unrooted "
1773 		    "search\n", pmf))
1774 		return (NULL);
1775 	}
1776 
1777 	ASSERT(!dip ||
1778 	    (ddi_name_to_major(ddi_binding_name(dip)) != (major_t)-1));
1779 
1780 	return (dip);
1781 }
1782 
1783 /*
1784  * Search for a dependency and mark it unsatisfied
1785  */
1786 static void
1787 pm_unsatisfy(char *keeper, char *kept)
1788 {
1789 	PMD_FUNC(pmf, "unsatisfy")
1790 	pm_pdr_t *dp;
1791 
1792 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1793 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1794 		if (!dp->pdr_isprop) {
1795 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1796 			    (dp->pdr_kept_count > 0) &&
1797 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1798 				if (dp->pdr_satisfied) {
1799 					dp->pdr_satisfied = 0;
1800 					pm_unresolved_deps++;
1801 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1802 					    "pm_unresolved_deps now %d\n", pmf,
1803 					    pm_unresolved_deps))
1804 				}
1805 			}
1806 		}
1807 	}
1808 }
1809 
1810 /*
1811  * Device dip is being un power managed, it keeps up count other devices.
1812  * We need to release any hold we have on the kept devices, and also
1813  * mark the dependency no longer satisfied.
1814  */
1815 static void
1816 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1817 {
1818 	PMD_FUNC(pmf, "unkeeps")
1819 	int i, j;
1820 	dev_info_t *kept;
1821 	dev_info_t *dip;
1822 	struct pm_component *cp;
1823 	int keeper_on = 0, circ;
1824 
1825 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1826 	    keeper, (void *)keptpaths))
1827 	/*
1828 	 * Try to grab keeper. Keeper may have gone away by now,
1829 	 * in this case, used the passed in value pwr
1830 	 */
1831 	dip = pm_name_to_dip(keeper, 1);
1832 	for (i = 0; i < count; i++) {
1833 		/* Release power hold */
1834 		kept = pm_name_to_dip(keptpaths[i], 1);
1835 		if (kept) {
1836 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1837 			    PM_DEVICE(kept), i))
1838 			/*
1839 			 * We need to check if we skipped a bringup here
1840 			 * because we could have failed the bringup
1841 			 * (ie DIRECT PM device) and have
1842 			 * not increment the count.
1843 			 */
1844 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1845 				keeper_on = 0;
1846 				PM_LOCK_POWER(dip, &circ);
1847 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1848 				    cp = &DEVI(dip)->devi_pm_components[j];
1849 					if (cur_power(cp)) {
1850 						keeper_on++;
1851 						break;
1852 					}
1853 				}
1854 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1855 					pm_rele_power(kept);
1856 					DEVI(kept)->devi_pm_flags
1857 						&= ~PMC_SKIP_BRINGUP;
1858 				}
1859 				PM_UNLOCK_POWER(dip, circ);
1860 			} else if (pwr) {
1861 				if (PM_SKBU(kept) == 0) {
1862 					pm_rele_power(kept);
1863 					DEVI(kept)->devi_pm_flags
1864 					    &= ~PMC_SKIP_BRINGUP;
1865 				}
1866 			}
1867 			ddi_release_devi(kept);
1868 		}
1869 		/*
1870 		 * mark this dependency not satisfied
1871 		 */
1872 		pm_unsatisfy(keeper, keptpaths[i]);
1873 	}
1874 	if (dip)
1875 		ddi_release_devi(dip);
1876 }
1877 
1878 /*
1879  * Device kept is being un power managed, it is kept up by keeper.
1880  * We need to mark the dependency no longer satisfied.
1881  */
1882 static void
1883 pm_unkepts(char *kept, char *keeper)
1884 {
1885 	PMD_FUNC(pmf, "unkepts")
1886 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1887 	ASSERT(keeper != NULL);
1888 	/*
1889 	 * mark this dependency not satisfied
1890 	 */
1891 	pm_unsatisfy(keeper, kept);
1892 }
1893 
1894 /*
1895  * Removes dependency information and hold on the kepts, if the path is a
1896  * path of a keeper.
1897  */
1898 static void
1899 pm_free_keeper(char *path, int pwr)
1900 {
1901 	pm_pdr_t *dp;
1902 	int i;
1903 	size_t length;
1904 
1905 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1906 		if (strcmp(dp->pdr_keeper, path) != 0)
1907 			continue;
1908 		/*
1909 		 * Remove all our kept holds and the dependency records,
1910 		 * then free up the kept lists.
1911 		 */
1912 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1913 		if (dp->pdr_kept_count)  {
1914 			for (i = 0; i < dp->pdr_kept_count; i++) {
1915 				length = strlen(dp->pdr_kept_paths[i]);
1916 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1917 			}
1918 			kmem_free(dp->pdr_kept_paths,
1919 			    dp->pdr_kept_count * sizeof (char **));
1920 			dp->pdr_kept_paths = NULL;
1921 			dp->pdr_kept_count = 0;
1922 		}
1923 	}
1924 }
1925 
1926 /*
1927  * Removes the device represented by path from the list of kepts, if the
1928  * path is a path of a kept
1929  */
1930 static void
1931 pm_free_kept(char *path)
1932 {
1933 	pm_pdr_t *dp;
1934 	int i;
1935 	int j, count;
1936 	size_t length;
1937 	char **paths;
1938 
1939 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1940 		if (dp->pdr_kept_count == 0)
1941 			continue;
1942 		count = dp->pdr_kept_count;
1943 		/* Remove this device from the kept path lists */
1944 		for (i = 0; i < count; i++) {
1945 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1946 				pm_unkepts(path, dp->pdr_keeper);
1947 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1948 				kmem_free(dp->pdr_kept_paths[i], length);
1949 				dp->pdr_kept_paths[i] = NULL;
1950 				dp->pdr_kept_count--;
1951 			}
1952 		}
1953 		/* Compact the kept paths array */
1954 		if (dp->pdr_kept_count) {
1955 			length = dp->pdr_kept_count * sizeof (char **);
1956 			paths = kmem_zalloc(length, KM_SLEEP);
1957 			j = 0;
1958 			for (i = 0; i < count; i++) {
1959 				if (dp->pdr_kept_paths[i] != NULL) {
1960 					paths[j] = dp->pdr_kept_paths[i];
1961 					j++;
1962 				}
1963 			}
1964 			ASSERT(j == dp->pdr_kept_count);
1965 		}
1966 		/* Now free the old array and point to the new one */
1967 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1968 		if (dp->pdr_kept_count)
1969 			dp->pdr_kept_paths = paths;
1970 		else
1971 			dp->pdr_kept_paths = NULL;
1972 	}
1973 }
1974 
1975 /*
1976  * Free the dependency information for a device.
1977  */
1978 void
1979 pm_free_keeps(char *path, int pwr)
1980 {
1981 	PMD_FUNC(pmf, "free_keeps")
1982 
1983 #ifdef DEBUG
1984 	int doprdeps = 0;
1985 	void prdeps(char *);
1986 
1987 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
1988 	if (pm_debug & PMD_KEEPS) {
1989 		doprdeps = 1;
1990 		prdeps("pm_free_keeps before");
1991 	}
1992 #endif
1993 	/*
1994 	 * First assume we are a keeper and remove all our kepts.
1995 	 */
1996 	pm_free_keeper(path, pwr);
1997 	/*
1998 	 * Now assume we a kept device, and remove all our records.
1999 	 */
2000 	pm_free_kept(path);
2001 #ifdef	DEBUG
2002 	if (doprdeps) {
2003 		prdeps("pm_free_keeps after");
2004 	}
2005 #endif
2006 }
2007 
2008 static int
2009 pm_is_kept(char *path)
2010 {
2011 	pm_pdr_t *dp;
2012 	int i;
2013 
2014 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2015 		if (dp->pdr_kept_count == 0)
2016 			continue;
2017 		for (i = 0; i < dp->pdr_kept_count; i++) {
2018 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2019 				return (1);
2020 		}
2021 	}
2022 	return (0);
2023 }
2024 
2025 static void
2026 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2027 {
2028 	PMD_FUNC(pmf, "hold_rele_power")
2029 	int circ;
2030 
2031 	if ((dip == NULL) ||
2032 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2033 		return;
2034 	PM_LOCK_POWER(dip, &circ);
2035 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2036 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2037 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2038 	PM_KUC(dip) += cnt;
2039 	ASSERT(PM_KUC(dip) >= 0);
2040 	PM_UNLOCK_POWER(dip, circ);
2041 	if (cnt < 0 && PM_KUC(dip) == 0)
2042 		pm_rescan(dip);
2043 }
2044 
2045 #define	MAX_PPM_HANDLERS	4
2046 
2047 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2048 
2049 struct	ppm_callbacks {
2050 	int (*ppmc_func)(dev_info_t *);
2051 	dev_info_t	*ppmc_dip;
2052 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2053 
2054 
2055 /*
2056  * This routine calls into all the registered ppms to notify them
2057  * that either all components of power-managed devices are at their
2058  * lowest levels or no longer all are at their lowest levels.
2059  */
2060 static void
2061 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2062 {
2063 	struct ppm_callbacks *ppmcp;
2064 	power_req_t power_req;
2065 	int result = 0;
2066 
2067 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2068 	power_req.req.ppm_all_lowest_req.mode = mode;
2069 	mutex_enter(&ppm_lock);
2070 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2071 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2072 		    DDI_CTLOPS_POWER, &power_req, &result);
2073 	mutex_exit(&ppm_lock);
2074 }
2075 
2076 static void
2077 pm_set_pm_info(dev_info_t *dip, void *value)
2078 {
2079 	DEVI(dip)->devi_pm_info = value;
2080 }
2081 
2082 pm_rsvp_t *pm_blocked_list;
2083 
2084 /*
2085  * Look up an entry in the blocked list by dip and component
2086  */
2087 static pm_rsvp_t *
2088 pm_rsvp_lookup(dev_info_t *dip, int comp)
2089 {
2090 	pm_rsvp_t *p;
2091 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2092 	for (p = pm_blocked_list; p; p = p->pr_next)
2093 		if (p->pr_dip == dip && p->pr_comp == comp) {
2094 			return (p);
2095 		}
2096 	return (NULL);
2097 }
2098 
2099 /*
2100  * Called when a device which is direct power managed (or the parent or
2101  * dependent of such a device) changes power, or when a pm clone is closed
2102  * that was direct power managing a device.  This call results in pm_blocked()
2103  * (below) returning.
2104  */
2105 void
2106 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2107 {
2108 	PMD_FUNC(pmf, "proceed")
2109 	pm_rsvp_t *found = NULL;
2110 	pm_rsvp_t *p;
2111 
2112 	mutex_enter(&pm_rsvp_lock);
2113 	switch (cmd) {
2114 	/*
2115 	 * we're giving up control, let any pending op continue
2116 	 */
2117 	case PMP_RELEASE:
2118 		for (p = pm_blocked_list; p; p = p->pr_next) {
2119 			if (dip == p->pr_dip) {
2120 				p->pr_retval = PMP_RELEASE;
2121 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2122 				    pmf, PM_DEVICE(dip)))
2123 				cv_signal(&p->pr_cv);
2124 			}
2125 		}
2126 		break;
2127 
2128 	/*
2129 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2130 	 * succeed and a non-matching request for the same device fail
2131 	 */
2132 	case PMP_SETPOWER:
2133 		found = pm_rsvp_lookup(dip, comp);
2134 		if (!found)	/* if driver not waiting */
2135 			break;
2136 		/*
2137 		 * This cannot be pm_lower_power, since that can only happen
2138 		 * during detach or probe
2139 		 */
2140 		if (found->pr_newlevel <= newlevel) {
2141 			found->pr_retval = PMP_SUCCEED;
2142 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2143 			    PM_DEVICE(dip)))
2144 		} else {
2145 			found->pr_retval = PMP_FAIL;
2146 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2147 			    PM_DEVICE(dip)))
2148 		}
2149 		cv_signal(&found->pr_cv);
2150 		break;
2151 
2152 	default:
2153 		panic("pm_proceed unknown cmd %d", cmd);
2154 	}
2155 	mutex_exit(&pm_rsvp_lock);
2156 }
2157 
2158 /*
2159  * This routine dispatches new work to the dependency thread. Caller must
2160  * be prepared to block for memory if necessary.
2161  */
2162 void
2163 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2164     int *res, int cached_pwr)
2165 {
2166 	pm_dep_wk_t	*new_work;
2167 
2168 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2169 	new_work->pdw_type = cmd;
2170 	new_work->pdw_wait = wait;
2171 	new_work->pdw_done = 0;
2172 	new_work->pdw_ret = 0;
2173 	new_work->pdw_pwr = cached_pwr;
2174 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2175 	if (keeper != NULL) {
2176 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2177 		    KM_SLEEP);
2178 		(void) strcpy(new_work->pdw_keeper, keeper);
2179 	}
2180 	if (kept != NULL) {
2181 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2182 		(void) strcpy(new_work->pdw_kept, kept);
2183 	}
2184 	mutex_enter(&pm_dep_thread_lock);
2185 	if (pm_dep_thread_workq == NULL) {
2186 		pm_dep_thread_workq = new_work;
2187 		pm_dep_thread_tail = new_work;
2188 		new_work->pdw_next = NULL;
2189 	} else {
2190 		pm_dep_thread_tail->pdw_next = new_work;
2191 		pm_dep_thread_tail = new_work;
2192 		new_work->pdw_next = NULL;
2193 	}
2194 	cv_signal(&pm_dep_thread_cv);
2195 	/* If caller asked for it, wait till it is done. */
2196 	if (wait)  {
2197 		while (!new_work->pdw_done)
2198 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2199 		/*
2200 		 * Pass return status, if any, back.
2201 		 */
2202 		if (res != NULL)
2203 			*res = new_work->pdw_ret;
2204 		/*
2205 		 * If we asked to wait, it is our job to free the request
2206 		 * structure.
2207 		 */
2208 		if (new_work->pdw_keeper)
2209 			kmem_free(new_work->pdw_keeper,
2210 			    strlen(new_work->pdw_keeper) + 1);
2211 		if (new_work->pdw_kept)
2212 			kmem_free(new_work->pdw_kept,
2213 			    strlen(new_work->pdw_kept) + 1);
2214 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2215 	}
2216 	mutex_exit(&pm_dep_thread_lock);
2217 }
2218 
2219 /*
2220  * Release the pm resource for this device.
2221  */
2222 void
2223 pm_rem_info(dev_info_t *dip)
2224 {
2225 	PMD_FUNC(pmf, "rem_info")
2226 	int		i, count = 0;
2227 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2228 	dev_info_t	*pdip = ddi_get_parent(dip);
2229 	char		*pathbuf;
2230 	int		work_type = PM_DEP_WK_DETACH;
2231 
2232 	ASSERT(info);
2233 
2234 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2235 	if (PM_ISDIRECT(dip)) {
2236 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2237 		ASSERT(info->pmi_clone);
2238 		info->pmi_clone = 0;
2239 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2240 	}
2241 	ASSERT(!PM_GET_PM_SCAN(dip));
2242 
2243 	/*
2244 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2245 	 * Others we check all components.  BC node that has already
2246 	 * called pm_destroy_components() has zero component count.
2247 	 * Parents that get notification are not adjusted because their
2248 	 * kidsupcnt is always 0 (or 1 during configuration).
2249 	 */
2250 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2251 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2252 
2253 	/* node is detached, so we can examine power without locking */
2254 	if (PM_ISBC(dip)) {
2255 		count = (PM_CURPOWER(dip, 0) != 0);
2256 	} else {
2257 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2258 			count += (PM_CURPOWER(dip, i) != 0);
2259 	}
2260 
2261 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2262 		e_pm_hold_rele_power(pdip, -count);
2263 
2264 	/* Schedule a request to clean up dependency records */
2265 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2266 	(void) ddi_pathname(dip, pathbuf);
2267 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2268 	    PM_DEP_NOWAIT, NULL, (count > 0));
2269 	kmem_free(pathbuf, MAXPATHLEN);
2270 
2271 	/*
2272 	 * Adjust the pm_comps_notlowest count since this device is
2273 	 * not being power-managed anymore.
2274 	 */
2275 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2276 		if (PM_CURPOWER(dip, i) != 0)
2277 			PM_DECR_NOTLOWEST(dip);
2278 	}
2279 	/*
2280 	 * Once we clear the info pointer, it looks like it is not power
2281 	 * managed to everybody else.
2282 	 */
2283 	pm_set_pm_info(dip, NULL);
2284 	kmem_free(info, sizeof (pm_info_t));
2285 }
2286 
2287 int
2288 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2289 {
2290 	int components = PM_NUMCMPTS(dip);
2291 	int *bufp;
2292 	size_t size;
2293 	int i;
2294 
2295 	if (components <= 0) {
2296 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2297 		    "can't get normal power values\n", PM_DEVICE(dip));
2298 		return (DDI_FAILURE);
2299 	} else {
2300 		size = components * sizeof (int);
2301 		bufp = kmem_alloc(size, KM_SLEEP);
2302 		for (i = 0; i < components; i++) {
2303 			bufp[i] = pm_get_normal_power(dip, i);
2304 		}
2305 	}
2306 	*length = size;
2307 	*valuep = bufp;
2308 	return (DDI_SUCCESS);
2309 }
2310 
2311 static int
2312 pm_reset_timestamps(dev_info_t *dip, void *arg)
2313 {
2314 	_NOTE(ARGUNUSED(arg))
2315 
2316 	int components;
2317 	int	i;
2318 
2319 	if (!PM_GET_PM_INFO(dip))
2320 		return (DDI_WALK_CONTINUE);
2321 	components = PM_NUMCMPTS(dip);
2322 	ASSERT(components > 0);
2323 	PM_LOCK_BUSY(dip);
2324 	for (i = 0; i < components; i++) {
2325 		struct pm_component *cp;
2326 		/*
2327 		 * If the component was not marked as busy,
2328 		 * reset its timestamp to now.
2329 		 */
2330 		cp = PM_CP(dip, i);
2331 		if (cp->pmc_timestamp)
2332 			cp->pmc_timestamp = gethrestime_sec();
2333 	}
2334 	PM_UNLOCK_BUSY(dip);
2335 	return (DDI_WALK_CONTINUE);
2336 }
2337 
2338 /*
2339  * Convert a power level to an index into the levels array (or
2340  * just PM_LEVEL_UNKNOWN in that special case).
2341  */
2342 static int
2343 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2344 {
2345 	PMD_FUNC(pmf, "level_to_index")
2346 	int i;
2347 	int limit = cp->pmc_comp.pmc_numlevels;
2348 	int *ip = cp->pmc_comp.pmc_lvals;
2349 
2350 	if (level == PM_LEVEL_UNKNOWN)
2351 		return (level);
2352 
2353 	for (i = 0; i < limit; i++) {
2354 		if (level == *ip++) {
2355 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2356 			    pmf, PM_DEVICE(dip),
2357 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2358 			return (i);
2359 		}
2360 	}
2361 	panic("pm_level_to_index: level %d not found for device "
2362 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2363 	/*NOTREACHED*/
2364 }
2365 
2366 /*
2367  * Internal function to set current power level
2368  */
2369 static void
2370 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2371 {
2372 	PMD_FUNC(pmf, "set_cur_pwr")
2373 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2374 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2375 
2376 	/*
2377 	 * Nothing to adjust if current & new levels are the same.
2378 	 */
2379 	if (curpwr != PM_LEVEL_UNKNOWN &&
2380 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2381 		return;
2382 
2383 	/*
2384 	 * Keep the count for comps doing transition to/from lowest
2385 	 * level.
2386 	 */
2387 	if (curpwr == 0) {
2388 		PM_INCR_NOTLOWEST(dip);
2389 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2390 		PM_DECR_NOTLOWEST(dip);
2391 	}
2392 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2393 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2394 }
2395 
2396 /*
2397  * This is the default method of setting the power of a device if no ppm
2398  * driver has claimed it.
2399  */
2400 int
2401 pm_power(dev_info_t *dip, int comp, int level)
2402 {
2403 	PMD_FUNC(pmf, "power")
2404 	struct dev_ops	*ops;
2405 	int		(*fn)(dev_info_t *, int, int);
2406 	struct pm_component *cp = PM_CP(dip, comp);
2407 	int retval;
2408 	pm_info_t *info = PM_GET_PM_INFO(dip);
2409 	static int pm_phc_impl(dev_info_t *, int, int, int);
2410 
2411 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2412 	    PM_DEVICE(dip), comp, level))
2413 	if (!(ops = ddi_get_driver(dip))) {
2414 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2415 		    PM_DEVICE(dip)))
2416 		return (DDI_FAILURE);
2417 	}
2418 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2419 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2420 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2421 		    (!fn ? " devo_power NULL" : "")))
2422 		return (DDI_FAILURE);
2423 	}
2424 	cp->pmc_flags |= PM_POWER_OP;
2425 	retval = (*fn)(dip, comp, level);
2426 	cp->pmc_flags &= ~PM_POWER_OP;
2427 	if (retval == DDI_SUCCESS) {
2428 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2429 		return (DDI_SUCCESS);
2430 	}
2431 
2432 	/*
2433 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2434 	 * updated only the power level of the component.  If our attempt to
2435 	 * set the device new to a power level above has failed we sync the
2436 	 * total power state via phc code now.
2437 	 */
2438 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2439 		int phc_lvl =
2440 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2441 
2442 		ASSERT(info);
2443 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2444 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2445 			pmf, PM_DEVICE(dip), comp, phc_lvl))
2446 	}
2447 
2448 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2449 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2450 	    level, power_val_to_string(cp, level)));
2451 	return (DDI_FAILURE);
2452 }
2453 
2454 int
2455 pm_unmanage(dev_info_t *dip)
2456 {
2457 	PMD_FUNC(pmf, "unmanage")
2458 	power_req_t power_req;
2459 	int result, retval = 0;
2460 
2461 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2462 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2463 	    PM_DEVICE(dip)))
2464 	power_req.request_type = PMR_PPM_UNMANAGE;
2465 	power_req.req.ppm_config_req.who = dip;
2466 	if (pm_ppm_claimed(dip))
2467 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2468 		    &power_req, &result);
2469 #ifdef DEBUG
2470 	else
2471 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2472 		    &power_req, &result);
2473 #endif
2474 	ASSERT(retval == DDI_SUCCESS);
2475 	pm_rem_info(dip);
2476 	return (retval);
2477 }
2478 
2479 int
2480 pm_raise_power(dev_info_t *dip, int comp, int level)
2481 {
2482 	if (level < 0)
2483 		return (DDI_FAILURE);
2484 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2485 	    !e_pm_valid_power(dip, comp, level))
2486 		return (DDI_FAILURE);
2487 
2488 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2489 }
2490 
2491 int
2492 pm_lower_power(dev_info_t *dip, int comp, int level)
2493 {
2494 	PMD_FUNC(pmf, "pm_lower_power")
2495 
2496 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2497 	    !e_pm_valid_power(dip, comp, level)) {
2498 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2499 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2500 		return (DDI_FAILURE);
2501 	}
2502 
2503 	if (!DEVI_IS_DETACHING(dip)) {
2504 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2505 		    pmf, PM_DEVICE(dip)))
2506 		return (DDI_FAILURE);
2507 	}
2508 
2509 	/*
2510 	 * If we don't care about saving power, or we're treating this node
2511 	 * specially, then this is a no-op
2512 	 */
2513 	if (!autopm_enabled || pm_noinvol(dip)) {
2514 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s\n", pmf, PM_DEVICE(dip),
2515 		    !autopm_enabled ? "!autopm_enabled " : "",
2516 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2517 		return (DDI_SUCCESS);
2518 	}
2519 
2520 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2521 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2522 		    PM_DEVICE(dip)))
2523 		return (DDI_FAILURE);
2524 	}
2525 	return (DDI_SUCCESS);
2526 }
2527 
2528 /*
2529  * Find the entries struct for a given dip in the blocked list, return it locked
2530  */
2531 static psce_t *
2532 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2533 {
2534 	pscc_t *p;
2535 	psce_t *psce;
2536 
2537 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2538 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2539 		if (p->pscc_dip == dip) {
2540 			*psccp = p;
2541 			psce = p->pscc_entries;
2542 			mutex_enter(&psce->psce_lock);
2543 			ASSERT(psce);
2544 			rw_exit(&pm_pscc_direct_rwlock);
2545 			return (psce);
2546 		}
2547 	}
2548 	rw_exit(&pm_pscc_direct_rwlock);
2549 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2550 	/*NOTREACHED*/
2551 }
2552 
2553 /*
2554  * Write an entry indicating a power level change (to be passed to a process
2555  * later) in the given psce.
2556  * If we were called in the path that brings up the console fb in the
2557  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2558  * we create a record that has a size of -1, a physaddr of NULL, and that
2559  * has the overflow flag set.
2560  */
2561 static int
2562 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2563     int old, int which, pm_canblock_t canblock)
2564 {
2565 	char	buf[MAXNAMELEN];
2566 	pm_state_change_t *p;
2567 	size_t	size;
2568 	caddr_t physpath = NULL;
2569 	int	overrun = 0;
2570 
2571 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2572 	(void) ddi_pathname(dip, buf);
2573 	size = strlen(buf) + 1;
2574 	p = psce->psce_in;
2575 	if (canblock == PM_CANBLOCK_BYPASS) {
2576 		physpath = kmem_alloc(size, KM_NOSLEEP);
2577 		if (physpath == NULL) {
2578 			/*
2579 			 * mark current entry as overrun
2580 			 */
2581 			p->flags |= PSC_EVENT_LOST;
2582 			size = (size_t)-1;
2583 		}
2584 	} else
2585 		physpath = kmem_alloc(size, KM_SLEEP);
2586 	if (p->size) {	/* overflow; mark the next entry */
2587 		if (p->size != (size_t)-1)
2588 			kmem_free(p->physpath, p->size);
2589 		ASSERT(psce->psce_out == p);
2590 		if (p == psce->psce_last) {
2591 			psce->psce_first->flags |= PSC_EVENT_LOST;
2592 			psce->psce_out = psce->psce_first;
2593 		} else {
2594 			(p + 1)->flags |= PSC_EVENT_LOST;
2595 			psce->psce_out = (p + 1);
2596 		}
2597 		overrun++;
2598 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2599 		p->flags |= PSC_EVENT_LOST;
2600 		p->size = 0;
2601 		p->physpath = NULL;
2602 	}
2603 	if (which == PSC_INTEREST) {
2604 		mutex_enter(&pm_compcnt_lock);
2605 		if (pm_comps_notlowest == 0)
2606 			p->flags |= PSC_ALL_LOWEST;
2607 		else
2608 			p->flags &= ~PSC_ALL_LOWEST;
2609 		mutex_exit(&pm_compcnt_lock);
2610 	}
2611 	p->event = event;
2612 	p->timestamp = gethrestime_sec();
2613 	p->component = comp;
2614 	p->old_level = old;
2615 	p->new_level = new;
2616 	p->physpath = physpath;
2617 	p->size = size;
2618 	if (physpath != NULL)
2619 		(void) strcpy(p->physpath, buf);
2620 	if (p == psce->psce_last)
2621 		psce->psce_in = psce->psce_first;
2622 	else
2623 		psce->psce_in = ++p;
2624 	mutex_exit(&psce->psce_lock);
2625 	return (overrun);
2626 }
2627 
2628 /*
2629  * Find the next entry on the interest list.  We keep a pointer to the item we
2630  * last returned in the user's cooke.  Returns a locked entries struct.
2631  */
2632 static psce_t *
2633 psc_interest(void **cookie, pscc_t **psccp)
2634 {
2635 	pscc_t *pscc;
2636 	pscc_t **cookiep = (pscc_t **)cookie;
2637 
2638 	if (*cookiep == NULL)
2639 		pscc = pm_pscc_interest;
2640 	else
2641 		pscc = (*cookiep)->pscc_next;
2642 	if (pscc) {
2643 		*cookiep = pscc;
2644 		*psccp = pscc;
2645 		mutex_enter(&pscc->pscc_entries->psce_lock);
2646 		return (pscc->pscc_entries);
2647 	} else {
2648 		return (NULL);
2649 	}
2650 }
2651 
2652 /*
2653  * Create an entry for a process to pick up indicating a power level change.
2654  */
2655 static void
2656 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2657     int newlevel, int oldlevel, pm_canblock_t canblock)
2658 {
2659 	PMD_FUNC(pmf, "enqueue_notify")
2660 	pscc_t	*pscc;
2661 	psce_t	*psce;
2662 	void		*cookie = NULL;
2663 	int	overrun;
2664 
2665 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2666 	switch (cmd) {
2667 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2668 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2669 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2670 		psce = pm_psc_dip_to_direct(dip, &pscc);
2671 		ASSERT(psce);
2672 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2673 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2674 		    pm_poll_cnt[pscc->pscc_clone]))
2675 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2676 		    PSC_DIRECT, canblock);
2677 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2678 		mutex_enter(&pm_clone_lock);
2679 		if (!overrun)
2680 			pm_poll_cnt[pscc->pscc_clone]++;
2681 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2682 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2683 		mutex_exit(&pm_clone_lock);
2684 		break;
2685 	case PSC_HAS_CHANGED:
2686 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2687 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2688 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2689 			psce = pm_psc_dip_to_direct(dip, &pscc);
2690 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2691 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2692 			    pm_poll_cnt[pscc->pscc_clone]))
2693 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2694 			    oldlevel, PSC_DIRECT, canblock);
2695 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2696 			mutex_enter(&pm_clone_lock);
2697 			if (!overrun)
2698 				pm_poll_cnt[pscc->pscc_clone]++;
2699 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2700 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2701 			mutex_exit(&pm_clone_lock);
2702 		}
2703 		mutex_enter(&pm_clone_lock);
2704 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2705 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2706 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2707 			    oldlevel, PSC_INTEREST, canblock);
2708 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2709 		}
2710 		rw_exit(&pm_pscc_interest_rwlock);
2711 		mutex_exit(&pm_clone_lock);
2712 		break;
2713 #ifdef DEBUG
2714 	default:
2715 		ASSERT(0);
2716 #endif
2717 	}
2718 }
2719 
2720 static void
2721 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2722 {
2723 	if (listp) {
2724 		pm_ppm_devlist_t *p, *next = NULL;
2725 
2726 		for (p = *listp; p; p = next) {
2727 			next = p->ppd_next;
2728 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2729 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2730 			    canblock);
2731 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2732 		}
2733 		*listp = NULL;
2734 	}
2735 }
2736 
2737 /*
2738  * Try to get the power locks of the parent node and target (child)
2739  * node.  Return true if successful (with both locks held) or false
2740  * (with no locks held).
2741  */
2742 static int
2743 pm_try_parent_child_locks(dev_info_t *pdip,
2744     dev_info_t *dip, int *pcircp, int *circp)
2745 {
2746 	if (ndi_devi_tryenter(pdip, pcircp))
2747 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2748 			return (1);
2749 		} else {
2750 			ndi_devi_exit(pdip, *pcircp);
2751 		}
2752 	return (0);
2753 }
2754 
2755 /*
2756  * Determine if the power lock owner is blocked by current thread.
2757  * returns :
2758  * 	1 - If the thread owning the effective power lock (the first lock on
2759  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2760  *          a mutex held by the current thread.
2761  *
2762  *	0 - otherwise
2763  *
2764  * Note : This function is called by pm_power_has_changed to determine whether
2765  * it is executing in parallel with pm_set_power.
2766  */
2767 static int
2768 pm_blocked_by_us(dev_info_t *dip)
2769 {
2770 	power_req_t power_req;
2771 	kthread_t *owner;
2772 	int result;
2773 	kmutex_t *mp;
2774 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2775 
2776 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2777 	power_req.req.ppm_power_lock_owner_req.who = dip;
2778 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2779 	    DDI_SUCCESS) {
2780 		/*
2781 		 * It is assumed that if the device is claimed by ppm, ppm
2782 		 * will always implement this request type and it'll always
2783 		 * return success. We panic here, if it fails.
2784 		 */
2785 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2786 		    PM_DEVICE(dip));
2787 		/*NOTREACHED*/
2788 	}
2789 
2790 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2791 	    owner->t_state == TS_SLEEP &&
2792 	    owner->t_sobj_ops &&
2793 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2794 	    (mp = (kmutex_t *)owner->t_wchan) &&
2795 	    mutex_owner(mp) == curthread)
2796 		return (1);
2797 
2798 	return (0);
2799 }
2800 
2801 /*
2802  * Notify parent which wants to hear about a child's power changes.
2803  */
2804 static void
2805 pm_notify_parent(dev_info_t *dip,
2806     dev_info_t *pdip, int comp, int old_level, int level)
2807 {
2808 	pm_bp_has_changed_t bphc;
2809 	pm_sp_misc_t pspm;
2810 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2811 	int result = DDI_SUCCESS;
2812 
2813 	bphc.bphc_dip = dip;
2814 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2815 	bphc.bphc_comp = comp;
2816 	bphc.bphc_olevel = old_level;
2817 	bphc.bphc_nlevel = level;
2818 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2819 	pspm.pspm_scan = 0;
2820 	bphc.bphc_private = &pspm;
2821 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2822 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2823 	kmem_free(pathbuf, MAXPATHLEN);
2824 }
2825 
2826 /*
2827  * Check if we need to resume a BC device, and make the attach call as required.
2828  */
2829 static int
2830 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2831 {
2832 	int ret = DDI_SUCCESS;
2833 
2834 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2835 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2836 		/* ppm is not interested in DDI_PM_RESUME */
2837 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2838 			/* XXX Should we mark it resumed, */
2839 			/* even though it failed? */
2840 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2841 			    PM_NAME(dip), PM_ADDR(dip));
2842 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2843 	}
2844 
2845 	return (ret);
2846 }
2847 
2848 /*
2849  * Tests outside the lock to see if we should bother to enqueue an entry
2850  * for any watching process.  If yes, then caller will take the lock and
2851  * do the full protocol
2852  */
2853 static int
2854 pm_watchers()
2855 {
2856 	if (pm_processes_stopped)
2857 		return (0);
2858 	return (pm_pscc_direct || pm_pscc_interest);
2859 }
2860 
2861 /*
2862  * A driver is reporting that the power of one of its device's components
2863  * has changed.  Update the power state accordingly.
2864  */
2865 int
2866 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2867 {
2868 	PMD_FUNC(pmf, "pm_power_has_changed")
2869 	int ret;
2870 	dev_info_t *pdip = ddi_get_parent(dip);
2871 	struct pm_component *cp;
2872 	int blocked, circ, pcirc, old_level;
2873 	static int pm_phc_impl(dev_info_t *, int, int, int);
2874 
2875 	if (level < 0) {
2876 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2877 		    PM_DEVICE(dip), level))
2878 		return (DDI_FAILURE);
2879 	}
2880 
2881 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2882 	    PM_DEVICE(dip), comp, level))
2883 
2884 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2885 	    !e_pm_valid_power(dip, comp, level))
2886 		return (DDI_FAILURE);
2887 
2888 	/*
2889 	 * A driver thread calling pm_power_has_changed and another thread
2890 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2891 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2892 	 * this specific deadlock.  If we can't get the lock immediately
2893 	 * and we are deadlocked, just update the component's level, do
2894 	 * notifications, and return.  We intend to update the total power
2895 	 * state later (if the other thread fails to set power to the
2896 	 * desired level).  If we were called because of a power change on a
2897 	 * component that isn't involved in a set_power op, update all state
2898 	 * immediately.
2899 	 */
2900 	cp = PM_CP(dip, comp);
2901 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2902 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2903 		    (cp->pmc_flags & PM_POWER_OP)) {
2904 			if (pm_watchers()) {
2905 				mutex_enter(&pm_rsvp_lock);
2906 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2907 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2908 				mutex_exit(&pm_rsvp_lock);
2909 			}
2910 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2911 				pm_notify_parent(dip,
2912 				    pdip, comp, cur_power(cp), level);
2913 			(void) pm_check_and_resume(dip,
2914 			    comp, cur_power(cp), level);
2915 
2916 			/*
2917 			 * Stash the old power index, update curpwr, and flag
2918 			 * that the total power state needs to be synched.
2919 			 */
2920 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2921 			/*
2922 			 * Several pm_power_has_changed calls could arrive
2923 			 * while the set power path remains blocked.  Keep the
2924 			 * oldest old power and the newest new power of any
2925 			 * sequence of phc calls which arrive during deadlock.
2926 			 */
2927 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2928 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2929 			cp->pmc_cur_pwr =
2930 			    pm_level_to_index(dip, cp, level);
2931 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2932 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2933 			return (DDI_SUCCESS);
2934 		} else
2935 			if (blocked) {	/* blocked, but different cmpt? */
2936 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2937 					cmn_err(CE_NOTE,
2938 					    "!pm: parent kuc not updated due "
2939 					    "to possible deadlock.\n");
2940 					return (pm_phc_impl(dip,
2941 						    comp, level, 1));
2942 				}
2943 				old_level = cur_power(cp);
2944 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2945 				    (!PM_ISBC(dip) || comp == 0) &&
2946 				    POWERING_ON(old_level, level))
2947 					pm_hold_power(pdip);
2948 				ret = pm_phc_impl(dip, comp, level, 1);
2949 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2950 					if ((!PM_ISBC(dip) ||
2951 					    comp == 0) && level == 0 &&
2952 					    old_level != PM_LEVEL_UNKNOWN)
2953 						pm_rele_power(pdip);
2954 				}
2955 				ndi_devi_exit(pdip, pcirc);
2956 				/* child lock not held: deadlock */
2957 				return (ret);
2958 			}
2959 		delay(1);
2960 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
2961 	}
2962 
2963 	/* non-deadlock case */
2964 	old_level = cur_power(cp);
2965 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2966 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
2967 		pm_hold_power(pdip);
2968 	ret = pm_phc_impl(dip, comp, level, 1);
2969 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
2970 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
2971 		    old_level != PM_LEVEL_UNKNOWN)
2972 			pm_rele_power(pdip);
2973 	}
2974 	PM_UNLOCK_POWER(dip, circ);
2975 	ndi_devi_exit(pdip, pcirc);
2976 	return (ret);
2977 }
2978 
2979 /*
2980  * Account for power changes to a component of the the console frame buffer.
2981  * If lowering power from full (or "unkown", which is treatd as full)
2982  * we will increment the "components off" count of the fb device.
2983  * Subsequent lowering of the same component doesn't affect the count.  If
2984  * raising a component back to full power, we will decrement the count.
2985  *
2986  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
2987  */
2988 static int
2989 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
2990 {
2991 	struct pm_component *cp = PM_CP(dip, cmpt);
2992 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
2993 	int want_normal = (new == cp->pmc_norm_pwr);
2994 	int incr = 0;
2995 
2996 	if (on && !want_normal)
2997 		incr = 1;
2998 	else if (!on && want_normal)
2999 		incr = -1;
3000 	return (incr);
3001 }
3002 
3003 /*
3004  * Adjust the count of console frame buffer components < full power.
3005  */
3006 static void
3007 update_comps_off(int incr, dev_info_t *dip)
3008 {
3009 		mutex_enter(&pm_cfb_lock);
3010 		pm_cfb_comps_off += incr;
3011 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3012 		mutex_exit(&pm_cfb_lock);
3013 }
3014 
3015 /*
3016  * Update the power state in the framework (via the ppm).  The 'notify'
3017  * argument tells whether to notify watchers.  Power lock is already held.
3018  */
3019 static int
3020 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3021 {
3022 	PMD_FUNC(pmf, "phc_impl")
3023 	power_req_t power_req;
3024 	int i, dodeps = 0;
3025 	dev_info_t *pdip = ddi_get_parent(dip);
3026 	int result;
3027 	int old_level;
3028 	struct pm_component *cp;
3029 	int incr = 0;
3030 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3031 	int work_type = 0;
3032 	char *pathbuf;
3033 
3034 	/* Must use "official" power level for this test. */
3035 	cp = PM_CP(dip, comp);
3036 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3037 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3038 	if (old_level != PM_LEVEL_UNKNOWN)
3039 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3040 
3041 	if (level == old_level) {
3042 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3043 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3044 		return (DDI_SUCCESS);
3045 	}
3046 
3047 	/*
3048 	 * Tell ppm about this.
3049 	 */
3050 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3051 	power_req.req.ppm_notify_level_req.who = dip;
3052 	power_req.req.ppm_notify_level_req.cmpt = comp;
3053 	power_req.req.ppm_notify_level_req.new_level = level;
3054 	power_req.req.ppm_notify_level_req.old_level = old_level;
3055 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3056 	    &result) == DDI_FAILURE) {
3057 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3058 		    pmf, PM_DEVICE(dip), level))
3059 		return (DDI_FAILURE);
3060 	}
3061 
3062 	if (PM_IS_CFB(dip)) {
3063 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3064 
3065 		if (incr) {
3066 			update_comps_off(incr, dip);
3067 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3068 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3069 			    comp, old_level, level, pm_cfb_comps_off))
3070 		}
3071 	}
3072 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3073 	result = DDI_SUCCESS;
3074 
3075 	if (notify) {
3076 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3077 			pm_notify_parent(dip, pdip, comp, old_level, level);
3078 		(void) pm_check_and_resume(dip, comp, old_level, level);
3079 	}
3080 
3081 	/*
3082 	 * Decrement the dependency kidsup count if we turn a device
3083 	 * off.
3084 	 */
3085 	if (POWERING_OFF(old_level, level)) {
3086 		dodeps = 1;
3087 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3088 			cp = PM_CP(dip, i);
3089 			if (cur_power(cp)) {
3090 				dodeps = 0;
3091 				break;
3092 			}
3093 		}
3094 		if (dodeps)
3095 			work_type = PM_DEP_WK_POWER_OFF;
3096 	}
3097 
3098 	/*
3099 	 * Increment if we turn it on. Check to see
3100 	 * if other comps are already on, if so,
3101 	 * dont increment.
3102 	 */
3103 	if (POWERING_ON(old_level, level)) {
3104 		dodeps = 1;
3105 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3106 			cp = PM_CP(dip, i);
3107 			if (comp == i)
3108 				continue;
3109 			/* -1 also treated as 0 in this case */
3110 			if (cur_power(cp) > 0) {
3111 				dodeps = 0;
3112 				break;
3113 			}
3114 		}
3115 		if (dodeps)
3116 			work_type = PM_DEP_WK_POWER_ON;
3117 	}
3118 
3119 	if (dodeps) {
3120 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3121 		(void) ddi_pathname(dip, pathbuf);
3122 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3123 		    PM_DEP_NOWAIT, NULL, 0);
3124 		kmem_free(pathbuf, MAXPATHLEN);
3125 	}
3126 
3127 	if (notify && (level != old_level) && pm_watchers()) {
3128 		mutex_enter(&pm_rsvp_lock);
3129 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3130 		    PM_CANBLOCK_BLOCK);
3131 		mutex_exit(&pm_rsvp_lock);
3132 	}
3133 
3134 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3135 	pm_rescan(dip);
3136 	return (DDI_SUCCESS);
3137 }
3138 
3139 /*
3140  * This function is called at startup time to notify pm of the existence
3141  * of any platform power managers for this platform.  As a result of
3142  * this registration, each function provided will be called each time
3143  * a device node is attached, until one returns true, and it must claim the
3144  * device node (by returning non-zero) if it wants to be involved in the
3145  * node's power management.  If it does claim the node, then it will
3146  * subsequently be notified of attach and detach events.
3147  *
3148  */
3149 
3150 int
3151 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3152 {
3153 	PMD_FUNC(pmf, "register_ppm")
3154 	struct ppm_callbacks *ppmcp;
3155 	pm_component_t *cp;
3156 	int i, pwr, result, circ;
3157 	power_req_t power_req;
3158 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3159 	void pm_ppm_claim(dev_info_t *);
3160 
3161 	mutex_enter(&ppm_lock);
3162 	ppmcp = ppm_callbacks;
3163 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3164 		if (ppmcp->ppmc_func == NULL) {
3165 			ppmcp->ppmc_func = func;
3166 			ppmcp->ppmc_dip = dip;
3167 			break;
3168 		}
3169 	}
3170 	mutex_exit(&ppm_lock);
3171 
3172 	if (i >= MAX_PPM_HANDLERS)
3173 		return (DDI_FAILURE);
3174 	while ((dip = ddi_get_parent(dip)) != NULL) {
3175 		if (PM_GET_PM_INFO(dip) == NULL)
3176 			continue;
3177 		pm_ppm_claim(dip);
3178 		if (pm_ppm_claimed(dip)) {
3179 			/*
3180 			 * Tell ppm about this.
3181 			 */
3182 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3183 			p->old_level = PM_LEVEL_UNKNOWN;
3184 			p->who = dip;
3185 			PM_LOCK_POWER(dip, &circ);
3186 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3187 				cp = PM_CP(dip, i);
3188 				pwr = cp->pmc_cur_pwr;
3189 				if (pwr != PM_LEVEL_UNKNOWN) {
3190 					p->cmpt = i;
3191 					p->new_level = cur_power(cp);
3192 					p->old_level = PM_LEVEL_UNKNOWN;
3193 					if (pm_ctlops(PPM(dip), dip,
3194 					    DDI_CTLOPS_POWER, &power_req,
3195 					    &result) == DDI_FAILURE) {
3196 						PMD(PMD_FAIL, ("%s: pc "
3197 						    "%s@%s(%s#%d) to %d "
3198 						    "fails\n", pmf,
3199 						    PM_DEVICE(dip), pwr))
3200 					}
3201 				}
3202 			}
3203 			PM_UNLOCK_POWER(dip, circ);
3204 		}
3205 	}
3206 	return (DDI_SUCCESS);
3207 }
3208 
3209 /*
3210  * Call the ppm's that have registered and adjust the devinfo struct as
3211  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3212  * by each ppm are assumed to be disjoint.
3213  */
3214 void
3215 pm_ppm_claim(dev_info_t *dip)
3216 {
3217 	struct ppm_callbacks *ppmcp;
3218 
3219 	if (PPM(dip)) {
3220 		return;
3221 	}
3222 	mutex_enter(&ppm_lock);
3223 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3224 		if ((*ppmcp->ppmc_func)(dip)) {
3225 			DEVI(dip)->devi_pm_ppm =
3226 			    (struct dev_info *)ppmcp->ppmc_dip;
3227 			mutex_exit(&ppm_lock);
3228 			return;
3229 		}
3230 	}
3231 	mutex_exit(&ppm_lock);
3232 }
3233 
3234 /*
3235  * Node is being detached so stop autopm until we see if it succeeds, in which
3236  * case pm_stop will be called.  For backwards compatible devices we bring the
3237  * device up to full power on the assumption the detach will succeed.
3238  */
3239 void
3240 pm_detaching(dev_info_t *dip)
3241 {
3242 	PMD_FUNC(pmf, "detaching")
3243 	pm_info_t *info = PM_GET_PM_INFO(dip);
3244 	int iscons;
3245 
3246 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3247 	    PM_NUMCMPTS(dip)))
3248 	if (info == NULL)
3249 		return;
3250 	ASSERT(DEVI_IS_DETACHING(dip));
3251 	PM_LOCK_DIP(dip);
3252 	info->pmi_dev_pm_state |= PM_DETACHING;
3253 	PM_UNLOCK_DIP(dip);
3254 	if (!PM_ISBC(dip))
3255 		pm_scan_stop(dip);
3256 
3257 	/*
3258 	 * console and old-style devices get brought up when detaching.
3259 	 */
3260 	iscons = PM_IS_CFB(dip);
3261 	if (iscons || PM_ISBC(dip)) {
3262 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3263 		if (iscons) {
3264 			mutex_enter(&pm_cfb_lock);
3265 			while (cfb_inuse) {
3266 				mutex_exit(&pm_cfb_lock);
3267 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3268 				delay(1);
3269 				mutex_enter(&pm_cfb_lock);
3270 			}
3271 			ASSERT(cfb_dip_detaching == NULL);
3272 			ASSERT(cfb_dip);
3273 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3274 			cfb_dip = NULL;
3275 			mutex_exit(&pm_cfb_lock);
3276 		}
3277 	}
3278 }
3279 
3280 /*
3281  * Node failed to detach.  If it used to be autopm'd, make it so again.
3282  */
3283 void
3284 pm_detach_failed(dev_info_t *dip)
3285 {
3286 	PMD_FUNC(pmf, "detach_failed")
3287 	pm_info_t *info = PM_GET_PM_INFO(dip);
3288 	int pm_all_at_normal(dev_info_t *);
3289 
3290 	if (info == NULL)
3291 		return;
3292 	ASSERT(DEVI_IS_DETACHING(dip));
3293 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3294 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3295 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3296 			/* Make sure the operation is still needed */
3297 			if (!pm_all_at_normal(dip)) {
3298 				if (pm_all_to_normal(dip,
3299 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3300 					PMD(PMD_ERROR, ("%s: could not bring "
3301 					    "%s@%s(%s#%d) to normal\n", pmf,
3302 					    PM_DEVICE(dip)))
3303 				}
3304 			}
3305 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3306 		}
3307 	}
3308 	if (!PM_ISBC(dip)) {
3309 		mutex_enter(&pm_scan_lock);
3310 		if (autopm_enabled)
3311 			pm_scan_init(dip);
3312 		mutex_exit(&pm_scan_lock);
3313 		pm_rescan(dip);
3314 	}
3315 }
3316 
3317 /* generic Backwards Compatible component */
3318 static char *bc_names[] = {"off", "on"};
3319 
3320 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3321 
3322 static void
3323 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3324 {
3325 	pm_comp_t *pmc;
3326 	pmc = &cp->pmc_comp;
3327 	pmc->pmc_numlevels = 2;
3328 	pmc->pmc_lvals[0] = 0;
3329 	pmc->pmc_lvals[1] = norm;
3330 	e_pm_set_cur_pwr(dip, cp, norm);
3331 }
3332 
3333 static void
3334 e_pm_default_components(dev_info_t *dip, int cmpts)
3335 {
3336 	int i;
3337 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3338 
3339 	p = DEVI(dip)->devi_pm_components;
3340 	for (i = 0; i < cmpts; i++, p++) {
3341 		p->pmc_comp = bc_comp;	/* struct assignment */
3342 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3343 		    KM_SLEEP);
3344 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3345 		    KM_SLEEP);
3346 		p->pmc_comp.pmc_numlevels = 2;
3347 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3348 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3349 	}
3350 }
3351 
3352 /*
3353  * Called from functions that require components to exist already to allow
3354  * for their creation by parsing the pm-components property.
3355  * Device will not be power managed as a result of this call
3356  * No locking needed because we're single threaded by the ndi_devi_enter
3357  * done while attaching, and the device isn't visible until after it has
3358  * attached
3359  */
3360 int
3361 pm_premanage(dev_info_t *dip, int style)
3362 {
3363 	PMD_FUNC(pmf, "premanage")
3364 	pm_comp_t	*pcp, *compp;
3365 	int		cmpts, i, norm, error;
3366 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3367 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3368 
3369 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3370 	/*
3371 	 * If this dip has already been processed, don't mess with it
3372 	 */
3373 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3374 		return (DDI_SUCCESS);
3375 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3376 		return (DDI_FAILURE);
3377 	}
3378 	/*
3379 	 * Look up pm-components property and create components accordingly
3380 	 * If that fails, fall back to backwards compatibility
3381 	 */
3382 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3383 		/*
3384 		 * If error is set, the property existed but was not well formed
3385 		 */
3386 		if (error || (style == PM_STYLE_NEW)) {
3387 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3388 			return (DDI_FAILURE);
3389 		}
3390 		/*
3391 		 * If they don't have the pm-components property, then we
3392 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3393 		 * behavior driver must have called pm_create_components, and
3394 		 * we need to flesh out dummy components
3395 		 */
3396 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3397 			/*
3398 			 * Not really failure, but we don't want the
3399 			 * caller to treat it as success
3400 			 */
3401 			return (DDI_FAILURE);
3402 		}
3403 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3404 		e_pm_default_components(dip, cmpts);
3405 		for (i = 0; i < cmpts; i++) {
3406 			/*
3407 			 * if normal power not set yet, we don't really know
3408 			 * what *ANY* of the power values are.  If normal
3409 			 * power is set, then we assume for this backwards
3410 			 * compatible case that the values are 0, normal power.
3411 			 */
3412 			norm = pm_get_normal_power(dip, i);
3413 			if (norm == (uint_t)-1) {
3414 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3415 				    PM_DEVICE(dip), i))
3416 				return (DDI_FAILURE);
3417 			}
3418 			/*
3419 			 * Components of BC devices start at their normal power,
3420 			 * so count them to be not at their lowest power.
3421 			 */
3422 			PM_INCR_NOTLOWEST(dip);
3423 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3424 		}
3425 	} else {
3426 		/*
3427 		 * e_pm_create_components was called from pm_autoconfig(), it
3428 		 * creates components with no descriptions (or known levels)
3429 		 */
3430 		cmpts = PM_NUMCMPTS(dip);
3431 		ASSERT(cmpts != 0);
3432 		pcp = compp;
3433 		p = DEVI(dip)->devi_pm_components;
3434 		for (i = 0; i < cmpts; i++, p++) {
3435 			p->pmc_comp = *pcp++;   /* struct assignment */
3436 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3437 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3438 		}
3439 		pm_set_device_threshold(dip, pm_system_idle_threshold,
3440 		    PMC_DEF_THRESH);
3441 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3442 	}
3443 	return (DDI_SUCCESS);
3444 }
3445 
3446 /*
3447  * Called from during or after the device's attach to let us know it is ready
3448  * to play autopm.   Look up the pm model and manage the device accordingly.
3449  * Returns system call errno value.
3450  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3451  * a little cleaner
3452  *
3453  * Called with dip lock held, return with dip lock unheld.
3454  */
3455 
3456 int
3457 e_pm_manage(dev_info_t *dip, int style)
3458 {
3459 	PMD_FUNC(pmf, "e_manage")
3460 	pm_info_t	*info;
3461 	dev_info_t	*pdip = ddi_get_parent(dip);
3462 	int	pm_thresh_specd(dev_info_t *);
3463 	int	count;
3464 	char	*pathbuf;
3465 
3466 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3467 		return (DDI_FAILURE);
3468 	}
3469 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3470 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3471 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3472 
3473 	/*
3474 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3475 	 * out at their normal power, so they are "up", others start out
3476 	 * unknown, which is effectively "up".  Parent which want notification
3477 	 * get kidsupcnt of 0 always.
3478 	 */
3479 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3480 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3481 		e_pm_hold_rele_power(pdip, count);
3482 
3483 	pm_set_pm_info(dip, info);
3484 	/*
3485 	 * Apply any recorded thresholds
3486 	 */
3487 	(void) pm_thresh_specd(dip);
3488 
3489 	/*
3490 	 * Do dependency processing.
3491 	 */
3492 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3493 	(void) ddi_pathname(dip, pathbuf);
3494 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3495 	    PM_DEP_NOWAIT, NULL, 0);
3496 	kmem_free(pathbuf, MAXPATHLEN);
3497 
3498 	if (!PM_ISBC(dip)) {
3499 		mutex_enter(&pm_scan_lock);
3500 		if (autopm_enabled) {
3501 			pm_scan_init(dip);
3502 			mutex_exit(&pm_scan_lock);
3503 			pm_rescan(dip);
3504 		} else {
3505 			mutex_exit(&pm_scan_lock);
3506 		}
3507 	}
3508 	return (0);
3509 }
3510 
3511 /*
3512  * This is the obsolete exported interface for a driver to find out its
3513  * "normal" (max) power.
3514  * We only get components destroyed while no power management is
3515  * going on (and the device is detached), so we don't need a mutex here
3516  */
3517 int
3518 pm_get_normal_power(dev_info_t *dip, int comp)
3519 {
3520 
3521 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3522 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3523 	}
3524 	return (DDI_FAILURE);
3525 }
3526 
3527 /*
3528  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3529  */
3530 int
3531 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3532 {
3533 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3534 		*levelp = PM_CURPOWER(dip, comp);
3535 		return (DDI_SUCCESS);
3536 	}
3537 	return (DDI_FAILURE);
3538 }
3539 
3540 /*
3541  * Returns current threshold of indicated component
3542  */
3543 static int
3544 cur_threshold(dev_info_t *dip, int comp)
3545 {
3546 	pm_component_t *cp = PM_CP(dip, comp);
3547 	int pwr;
3548 
3549 	if (PM_ISBC(dip)) {
3550 		/*
3551 		 * backwards compatible nodes only have one threshold
3552 		 */
3553 		return (cp->pmc_comp.pmc_thresh[1]);
3554 	}
3555 	pwr = cp->pmc_cur_pwr;
3556 	if (pwr == PM_LEVEL_UNKNOWN) {
3557 		int thresh;
3558 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3559 			thresh = pm_default_nexus_threshold;
3560 		else
3561 			thresh = pm_system_idle_threshold;
3562 		return (thresh);
3563 	}
3564 	ASSERT(cp->pmc_comp.pmc_thresh);
3565 	return (cp->pmc_comp.pmc_thresh[pwr]);
3566 }
3567 
3568 /*
3569  * Compute next lower component power level given power index.
3570  */
3571 static int
3572 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3573 {
3574 	int nxt_pwr;
3575 
3576 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3577 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3578 	} else {
3579 		pwrndx--;
3580 		ASSERT(pwrndx >= 0);
3581 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3582 	}
3583 	return (nxt_pwr);
3584 }
3585 
3586 /*
3587  * Bring all components of device to normal power
3588  */
3589 int
3590 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3591 {
3592 	PMD_FUNC(pmf, "all_to_normal")
3593 	int		*normal;
3594 	int		i, ncomps, result;
3595 	size_t		size;
3596 	int		changefailed = 0;
3597 
3598 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3599 	ASSERT(PM_GET_PM_INFO(dip));
3600 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3601 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3602 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3603 		return (DDI_FAILURE);
3604 	}
3605 	ncomps = PM_NUMCMPTS(dip);
3606 	for (i = 0; i < ncomps; i++) {
3607 		if (pm_set_power(dip, i, normal[i],
3608 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3609 			changefailed++;
3610 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3611 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3612 			    PM_DEVICE(dip), i, normal[i], result))
3613 		}
3614 	}
3615 	kmem_free(normal, size);
3616 	if (changefailed) {
3617 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3618 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3619 		return (DDI_FAILURE);
3620 	}
3621 	return (DDI_SUCCESS);
3622 }
3623 
3624 /*
3625  * Returns true if all components of device are at normal power
3626  */
3627 int
3628 pm_all_at_normal(dev_info_t *dip)
3629 {
3630 	PMD_FUNC(pmf, "all_at_normal")
3631 	int		*normal;
3632 	int		i;
3633 	size_t		size;
3634 
3635 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3636 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3637 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3638 		return (DDI_FAILURE);
3639 	}
3640 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3641 		int current = PM_CURPOWER(dip, i);
3642 		if (normal[i] > current) {
3643 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3644 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3645 			    normal[i], current))
3646 			break;
3647 		}
3648 	}
3649 	kmem_free(normal, size);
3650 	if (i != PM_NUMCMPTS(dip)) {
3651 		return (0);
3652 	}
3653 	return (1);
3654 }
3655 
3656 static void
3657 bring_wekeeps_up(char *keeper)
3658 {
3659 	PMD_FUNC(pmf, "bring_wekeeps_up")
3660 	int i;
3661 	pm_pdr_t *dp;
3662 	pm_info_t *wku_info;
3663 	char *kept_path;
3664 	dev_info_t *kept;
3665 	static void bring_pmdep_up(dev_info_t *, int);
3666 
3667 	if (panicstr) {
3668 		return;
3669 	}
3670 	/*
3671 	 * We process the request even if the keeper detaches because
3672 	 * detach processing expects this to increment kidsupcnt of kept.
3673 	 */
3674 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3675 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3676 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3677 			continue;
3678 		for (i = 0; i < dp->pdr_kept_count; i++) {
3679 			kept_path = dp->pdr_kept_paths[i];
3680 			if (kept_path == NULL)
3681 				continue;
3682 			ASSERT(kept_path[0] != '\0');
3683 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3684 				continue;
3685 			wku_info = PM_GET_PM_INFO(kept);
3686 			if (wku_info == NULL) {
3687 				if (kept)
3688 					ddi_release_devi(kept);
3689 				continue;
3690 			}
3691 			/*
3692 			 * Don't mess with it if it is being detached, it isn't
3693 			 * safe to call its power entry point
3694 			 */
3695 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3696 				if (kept)
3697 					ddi_release_devi(kept);
3698 				continue;
3699 			}
3700 			bring_pmdep_up(kept, 1);
3701 			ddi_release_devi(kept);
3702 		}
3703 	}
3704 }
3705 
3706 /*
3707  * Bring up the 'kept' device passed as argument
3708  */
3709 static void
3710 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3711 {
3712 	PMD_FUNC(pmf, "bring_pmdep_up")
3713 	int is_all_at_normal = 0;
3714 
3715 	/*
3716 	 * If the kept device has been unmanaged, do nothing.
3717 	 */
3718 	if (!PM_GET_PM_INFO(kept_dip))
3719 		return;
3720 
3721 	/* Just ignore DIRECT PM device till they are released. */
3722 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3723 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3724 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3725 		    "controlling process did something else\n", pmf,
3726 		    PM_DEVICE(kept_dip)))
3727 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3728 		return;
3729 	}
3730 	/* if we got here the keeper had a transition from OFF->ON */
3731 	if (hold)
3732 		pm_hold_power(kept_dip);
3733 
3734 	if (!is_all_at_normal)
3735 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3736 }
3737 
3738 /*
3739  * A bunch of stuff that belongs only to the next routine (or two)
3740  */
3741 
3742 static const char namestr[] = "NAME=";
3743 static const int nameln = sizeof (namestr) - 1;
3744 static const char pmcompstr[] = "pm-components";
3745 
3746 struct pm_comp_pkg {
3747 	pm_comp_t		*comp;
3748 	struct pm_comp_pkg	*next;
3749 };
3750 
3751 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3752 
3753 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3754 			((ch) >= 'A' && (ch) <= 'F'))
3755 
3756 /*
3757  * Rather than duplicate this code ...
3758  * (this code excerpted from the function that follows it)
3759  */
3760 #define	FINISH_COMP { \
3761 	ASSERT(compp); \
3762 	compp->pmc_lnames_sz = size; \
3763 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3764 	compp->pmc_numlevels = level; \
3765 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3766 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3767 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3768 	/* copy string out of prop array into buffer */ \
3769 	for (j = 0; j < level; j++) { \
3770 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3771 		compp->pmc_lvals[j] = lvals[j]; \
3772 		(void) strcpy(tp, lnames[j]); \
3773 		compp->pmc_lnames[j] = tp; \
3774 		tp += lszs[j]; \
3775 	} \
3776 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3777 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3778 	}
3779 
3780 /*
3781  * Create (empty) component data structures.
3782  */
3783 static void
3784 e_pm_create_components(dev_info_t *dip, int num_components)
3785 {
3786 	struct pm_component *compp, *ocompp;
3787 	int i, size = 0;
3788 
3789 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3790 	ASSERT(!DEVI(dip)->devi_pm_components);
3791 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3792 	size = sizeof (struct pm_component) * num_components;
3793 
3794 	compp = kmem_zalloc(size, KM_SLEEP);
3795 	ocompp = compp;
3796 	DEVI(dip)->devi_pm_comp_size = size;
3797 	DEVI(dip)->devi_pm_num_components = num_components;
3798 	PM_LOCK_BUSY(dip);
3799 	for (i = 0; i < num_components;  i++) {
3800 		compp->pmc_timestamp = gethrestime_sec();
3801 		compp->pmc_norm_pwr = (uint_t)-1;
3802 		compp++;
3803 	}
3804 	PM_UNLOCK_BUSY(dip);
3805 	DEVI(dip)->devi_pm_components = ocompp;
3806 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3807 }
3808 
3809 /*
3810  * Parse hex or decimal value from char string
3811  */
3812 static char *
3813 pm_parsenum(char *cp, int *valp)
3814 {
3815 	int ch, offset;
3816 	char numbuf[256];
3817 	char *np = numbuf;
3818 	int value = 0;
3819 
3820 	ch = *cp++;
3821 	if (isdigit(ch)) {
3822 		if (ch == '0') {
3823 			if ((ch = *cp++) == 'x' || ch == 'X') {
3824 				ch = *cp++;
3825 				while (isxdigit(ch)) {
3826 					*np++ = (char)ch;
3827 					ch = *cp++;
3828 				}
3829 				*np = 0;
3830 				cp--;
3831 				goto hexval;
3832 			} else {
3833 				goto digit;
3834 			}
3835 		} else {
3836 digit:
3837 			while (isdigit(ch)) {
3838 				*np++ = (char)ch;
3839 				ch = *cp++;
3840 			}
3841 			*np = 0;
3842 			cp--;
3843 			goto decval;
3844 		}
3845 	} else
3846 		return (NULL);
3847 
3848 hexval:
3849 	for (np = numbuf; *np; np++) {
3850 		if (*np >= 'a' && *np <= 'f')
3851 			offset = 'a' - 10;
3852 		else if (*np >= 'A' && *np <= 'F')
3853 			offset = 'A' - 10;
3854 		else if (*np >= '0' && *np <= '9')
3855 			offset = '0';
3856 		value *= 16;
3857 		value += *np - offset;
3858 	}
3859 	*valp = value;
3860 	return (cp);
3861 
3862 decval:
3863 	offset = '0';
3864 	for (np = numbuf; *np; np++) {
3865 		value *= 10;
3866 		value += *np - offset;
3867 	}
3868 	*valp = value;
3869 	return (cp);
3870 }
3871 
3872 /*
3873  * Set max (previously documented as "normal") power.
3874  */
3875 static void
3876 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3877 {
3878 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3879 }
3880 
3881 /*
3882  * Internal routine for destroying components
3883  * It is called even when there might not be any, so it must be forgiving.
3884  */
3885 static void
3886 e_pm_destroy_components(dev_info_t *dip)
3887 {
3888 	int i;
3889 	struct pm_component *cp;
3890 
3891 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3892 	if (PM_NUMCMPTS(dip) == 0)
3893 		return;
3894 	cp = DEVI(dip)->devi_pm_components;
3895 	ASSERT(cp);
3896 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
3897 		int nlevels = cp->pmc_comp.pmc_numlevels;
3898 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
3899 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
3900 		/*
3901 		 * For BC nodes, the rest is static in bc_comp, so skip it
3902 		 */
3903 		if (PM_ISBC(dip))
3904 			continue;
3905 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
3906 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
3907 		kmem_free(cp->pmc_comp.pmc_lname_buf,
3908 				cp->pmc_comp.pmc_lnames_sz);
3909 	}
3910 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
3911 	DEVI(dip)->devi_pm_components = NULL;
3912 	DEVI(dip)->devi_pm_num_components = 0;
3913 	DEVI(dip)->devi_pm_flags &=
3914 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
3915 }
3916 
3917 /*
3918  * Read the pm-components property (if there is one) and use it to set up
3919  * components.  Returns a pointer to an array of component structures if
3920  * pm-components found and successfully parsed, else returns NULL.
3921  * Sets error return *errp to true to indicate a failure (as opposed to no
3922  * property being present).
3923  */
3924 pm_comp_t *
3925 pm_autoconfig(dev_info_t *dip, int *errp)
3926 {
3927 	PMD_FUNC(pmf, "autoconfig")
3928 	uint_t nelems;
3929 	char **pp;
3930 	pm_comp_t *compp = NULL;
3931 	int i, j, level, components = 0;
3932 	size_t size = 0;
3933 	struct pm_comp_pkg *p, *ptail;
3934 	struct pm_comp_pkg *phead = NULL;
3935 	int *lvals = NULL;
3936 	int *lszs = NULL;
3937 	int *np = NULL;
3938 	int npi = 0;
3939 	char **lnames = NULL;
3940 	char *cp, *tp;
3941 	pm_comp_t *ret = NULL;
3942 
3943 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3944 	*errp = 0;	/* assume success */
3945 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3946 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
3947 		return (NULL);
3948 	}
3949 
3950 	if (nelems < 3) {	/* need at least one name and two levels */
3951 		goto errout;
3952 	}
3953 
3954 	/*
3955 	 * pm_create_components is no longer allowed
3956 	 */
3957 	if (PM_NUMCMPTS(dip) != 0) {
3958 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
3959 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
3960 		goto errout;
3961 	}
3962 
3963 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3964 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3965 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
3966 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
3967 
3968 	level = 0;
3969 	phead = NULL;
3970 	for (i = 0; i < nelems; i++) {
3971 		cp = pp[i];
3972 		if (!isdigit(*cp)) {	/*  must be name */
3973 			if (strncmp(cp, namestr, nameln) != 0) {
3974 				goto errout;
3975 			}
3976 			if (i != 0) {
3977 				if (level == 0) {	/* no level spec'd */
3978 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
3979 					    pmf))
3980 					goto errout;
3981 				}
3982 				np[npi++] = lvals[level - 1];
3983 				/* finish up previous component levels */
3984 				FINISH_COMP;
3985 			}
3986 			cp += nameln;
3987 			if (!*cp) {
3988 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
3989 				goto errout;
3990 			}
3991 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
3992 			if (phead == NULL) {
3993 				phead = ptail = p;
3994 			} else {
3995 				ptail->next = p;
3996 				ptail = p;
3997 			}
3998 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
3999 			    KM_SLEEP);
4000 			compp->pmc_name_sz = strlen(cp) + 1;
4001 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4002 			    KM_SLEEP);
4003 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4004 			components++;
4005 			level = 0;
4006 		} else {	/* better be power level <num>=<name> */
4007 #ifdef DEBUG
4008 			tp = cp;
4009 #endif
4010 			if (i == 0 ||
4011 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4012 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4013 				goto errout;
4014 			}
4015 #ifdef DEBUG
4016 			tp = cp;
4017 #endif
4018 			if (*cp++ != '=' || !*cp) {
4019 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4020 				goto errout;
4021 			}
4022 
4023 			lszs[level] = strlen(cp) + 1;
4024 			size += lszs[level];
4025 			lnames[level] = cp;	/* points into prop string */
4026 			level++;
4027 		}
4028 	}
4029 	np[npi++] = lvals[level - 1];
4030 	if (level == 0) {	/* ended with a name */
4031 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4032 		goto errout;
4033 	}
4034 	FINISH_COMP;
4035 
4036 
4037 	/*
4038 	 * Now we have a list of components--we have to return instead an
4039 	 * array of them, but we can just copy the top level and leave
4040 	 * the rest as is
4041 	 */
4042 	(void) e_pm_create_components(dip, components);
4043 	for (i = 0; i < components; i++)
4044 		e_pm_set_max_power(dip, i, np[i]);
4045 
4046 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4047 	for (i = 0, p = phead; i < components; i++) {
4048 		ASSERT(p);
4049 		/*
4050 		 * Now sanity-check values:  levels must be monotonically
4051 		 * increasing
4052 		 */
4053 		if (p->comp->pmc_numlevels < 2) {
4054 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4055 			    "levels\n", pmf,
4056 			    p->comp->pmc_name, PM_DEVICE(dip),
4057 			    p->comp->pmc_numlevels))
4058 			goto errout;
4059 		}
4060 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4061 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4062 			    (p->comp->pmc_lvals[j] <=
4063 			    p->comp->pmc_lvals[j - 1]))) {
4064 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4065 				    "not mono. incr, %d follows %d\n", pmf,
4066 				    p->comp->pmc_name, PM_DEVICE(dip),
4067 				    p->comp->pmc_lvals[j],
4068 				    p->comp->pmc_lvals[j - 1]))
4069 				goto errout;
4070 			}
4071 		}
4072 		ret[i] = *p->comp;	/* struct assignment */
4073 		for (j = 0; j < i; j++) {
4074 			/*
4075 			 * Test for unique component names
4076 			 */
4077 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4078 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4079 				    "unique\n", pmf, ret[j].pmc_name,
4080 				    PM_DEVICE(dip)))
4081 				goto errout;
4082 			}
4083 		}
4084 		ptail = p;
4085 		p = p->next;
4086 		phead = p;	/* errout depends on phead making sense */
4087 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4088 		kmem_free(ptail, sizeof (*ptail));
4089 	}
4090 out:
4091 	ddi_prop_free(pp);
4092 	if (lvals)
4093 		kmem_free(lvals, nelems * sizeof (int));
4094 	if (lszs)
4095 		kmem_free(lszs, nelems * sizeof (int));
4096 	if (lnames)
4097 		kmem_free(lnames, nelems * sizeof (char *));
4098 	if (np)
4099 		kmem_free(np, nelems * sizeof (int));
4100 	return (ret);
4101 
4102 errout:
4103 	e_pm_destroy_components(dip);
4104 	*errp = 1;	/* signal failure */
4105 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4106 	for (i = 0; i < nelems - 1; i++)
4107 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4108 	if (nelems != 0)
4109 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4110 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4111 	for (p = phead; p; ) {
4112 		pm_comp_t *pp;
4113 		int n;
4114 
4115 		ptail = p;
4116 		/*
4117 		 * Free component data structures
4118 		 */
4119 		pp = p->comp;
4120 		n = pp->pmc_numlevels;
4121 		if (pp->pmc_name_sz) {
4122 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4123 		}
4124 		if (pp->pmc_lnames_sz) {
4125 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4126 		}
4127 		if (pp->pmc_lnames) {
4128 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4129 		}
4130 		if (pp->pmc_thresh) {
4131 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4132 		}
4133 		if (pp->pmc_lvals) {
4134 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4135 		}
4136 		p = ptail->next;
4137 		kmem_free(ptail, sizeof (*ptail));
4138 	}
4139 	if (ret != NULL)
4140 		kmem_free(ret, components * sizeof (pm_comp_t));
4141 	ret = NULL;
4142 	goto out;
4143 }
4144 
4145 /*
4146  * Set threshold values for a devices components by dividing the target
4147  * threshold (base) by the number of transitions and assign each transition
4148  * that threshold.  This will get the entire device down in the target time if
4149  * all components are idle and even if there are dependencies among components.
4150  *
4151  * Devices may well get powered all the way down before the target time, but
4152  * at least the EPA will be happy.
4153  */
4154 void
4155 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4156 {
4157 	PMD_FUNC(pmf, "set_device_threshold")
4158 	int target_threshold = (base * 95) / 100;
4159 	int level, comp;		/* loop counters */
4160 	int transitions = 0;
4161 	int ncomp = PM_NUMCMPTS(dip);
4162 	int thresh;
4163 	int remainder;
4164 	pm_comp_t *pmc;
4165 	int i, circ;
4166 
4167 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4168 	PM_LOCK_DIP(dip);
4169 	/*
4170 	 * First we handle the easy one.  If we're setting the default
4171 	 * threshold for a node with children, then we set it to the
4172 	 * default nexus threshold (currently 0) and mark it as default
4173 	 * nexus threshold instead
4174 	 */
4175 	if (PM_IS_NEXUS(dip)) {
4176 		if (flag == PMC_DEF_THRESH) {
4177 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4178 			    PM_DEVICE(dip)))
4179 			thresh = pm_default_nexus_threshold;
4180 			for (comp = 0; comp < ncomp; comp++) {
4181 				pmc = &PM_CP(dip, comp)->pmc_comp;
4182 				for (level = 1; level < pmc->pmc_numlevels;
4183 				    level++) {
4184 					pmc->pmc_thresh[level] = thresh;
4185 				}
4186 			}
4187 			DEVI(dip)->devi_pm_dev_thresh =
4188 			    pm_default_nexus_threshold;
4189 			/*
4190 			 * If the nexus node is being reconfigured back to
4191 			 * the default threshold, adjust the notlowest count.
4192 			 */
4193 			if (DEVI(dip)->devi_pm_flags &
4194 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4195 				PM_LOCK_POWER(dip, &circ);
4196 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4197 					if (PM_CURPOWER(dip, i) == 0)
4198 						continue;
4199 					mutex_enter(&pm_compcnt_lock);
4200 					ASSERT(pm_comps_notlowest);
4201 					pm_comps_notlowest--;
4202 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4203 					    "notlowest to %d\n", pmf,
4204 					    PM_DEVICE(dip), pm_comps_notlowest))
4205 					if (pm_comps_notlowest == 0)
4206 						pm_ppm_notify_all_lowest(dip,
4207 						    PM_ALL_LOWEST);
4208 					mutex_exit(&pm_compcnt_lock);
4209 				}
4210 				PM_UNLOCK_POWER(dip, circ);
4211 			}
4212 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4213 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4214 			PM_UNLOCK_DIP(dip);
4215 			return;
4216 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4217 			/*
4218 			 * If the nexus node is being configured for a
4219 			 * non-default threshold, include that node in
4220 			 * the notlowest accounting.
4221 			 */
4222 			PM_LOCK_POWER(dip, &circ);
4223 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4224 				if (PM_CURPOWER(dip, i) == 0)
4225 					continue;
4226 				mutex_enter(&pm_compcnt_lock);
4227 				if (pm_comps_notlowest == 0)
4228 					pm_ppm_notify_all_lowest(dip,
4229 					    PM_NOT_ALL_LOWEST);
4230 				pm_comps_notlowest++;
4231 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4232 				    "notlowest to %d\n", pmf,
4233 				    PM_DEVICE(dip), pm_comps_notlowest))
4234 				mutex_exit(&pm_compcnt_lock);
4235 			}
4236 			PM_UNLOCK_POWER(dip, circ);
4237 		}
4238 	}
4239 	/*
4240 	 * Compute the total number of transitions for all components
4241 	 * of the device.  Distribute the threshold evenly over them
4242 	 */
4243 	for (comp = 0; comp < ncomp; comp++) {
4244 		pmc = &PM_CP(dip, comp)->pmc_comp;
4245 		ASSERT(pmc->pmc_numlevels > 1);
4246 		transitions += pmc->pmc_numlevels - 1;
4247 	}
4248 	ASSERT(transitions);
4249 	thresh = target_threshold / transitions;
4250 
4251 	for (comp = 0; comp < ncomp; comp++) {
4252 		pmc = &PM_CP(dip, comp)->pmc_comp;
4253 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4254 			pmc->pmc_thresh[level] = thresh;
4255 		}
4256 	}
4257 
4258 #ifdef DEBUG
4259 	for (comp = 0; comp < ncomp; comp++) {
4260 		pmc = &PM_CP(dip, comp)->pmc_comp;
4261 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4262 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4263 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4264 			    comp, level, pmc->pmc_thresh[level]))
4265 		}
4266 	}
4267 #endif
4268 	/*
4269 	 * Distribute any remainder till they are all gone
4270 	 */
4271 	remainder = target_threshold - thresh * transitions;
4272 	level = 1;
4273 #ifdef DEBUG
4274 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4275 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4276 	    transitions))
4277 #endif
4278 	while (remainder > 0) {
4279 		comp = 0;
4280 		while (remainder && (comp < ncomp)) {
4281 			pmc = &PM_CP(dip, comp)->pmc_comp;
4282 			if (level < pmc->pmc_numlevels) {
4283 				pmc->pmc_thresh[level] += 1;
4284 				remainder--;
4285 			}
4286 			comp++;
4287 		}
4288 		level++;
4289 	}
4290 #ifdef DEBUG
4291 	for (comp = 0; comp < ncomp; comp++) {
4292 		pmc = &PM_CP(dip, comp)->pmc_comp;
4293 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4294 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4295 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4296 			    comp, level, pmc->pmc_thresh[level]))
4297 		}
4298 	}
4299 #endif
4300 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4301 	DEVI(dip)->devi_pm_dev_thresh = base;
4302 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4303 	DEVI(dip)->devi_pm_flags |= flag;
4304 	PM_UNLOCK_DIP(dip);
4305 }
4306 
4307 /*
4308  * Called when there is no old-style platform power management driver
4309  */
4310 static int
4311 ddi_no_platform_power(power_req_t *req)
4312 {
4313 	_NOTE(ARGUNUSED(req))
4314 	return (DDI_FAILURE);
4315 }
4316 
4317 /*
4318  * This function calls the entry point supplied by the platform-specific
4319  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4320  * The use of global for getting the  function name from platform-specific
4321  * pm driver is not ideal, but it is simple and efficient.
4322  * The previous property lookup was being done in the idle loop on swift
4323  * systems without pmc chips and hurt deskbench performance as well as
4324  * violating scheduler locking rules
4325  */
4326 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4327 
4328 /*
4329  * Old obsolete interface for a device to request a power change (but only
4330  * an increase in power)
4331  */
4332 int
4333 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4334 {
4335 	return (pm_raise_power(dip, cmpt, level));
4336 }
4337 
4338 /*
4339  * The old obsolete interface to platform power management.  Only used by
4340  * Gypsy platform and APM on X86.
4341  */
4342 int
4343 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4344 {
4345 	power_req_t	request;
4346 
4347 	request.request_type = PMR_SET_POWER;
4348 	request.req.set_power_req.who = dip;
4349 	request.req.set_power_req.cmpt = pm_cmpt;
4350 	request.req.set_power_req.level = pm_level;
4351 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4352 }
4353 
4354 /*
4355  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4356  * passed.  Returns true if subsequent processing could result in power being
4357  * removed from the device.  The arg is not currently used because it is
4358  * implicit in the operation of cpr/DR.
4359  */
4360 int
4361 ddi_removing_power(dev_info_t *dip)
4362 {
4363 	_NOTE(ARGUNUSED(dip))
4364 	return (pm_powering_down);
4365 }
4366 
4367 /*
4368  * Returns true if a device indicates that its parent handles suspend/resume
4369  * processing for it.
4370  */
4371 int
4372 e_ddi_parental_suspend_resume(dev_info_t *dip)
4373 {
4374 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4375 }
4376 
4377 /*
4378  * Called for devices which indicate that their parent does suspend/resume
4379  * handling for them
4380  */
4381 int
4382 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4383 {
4384 	power_req_t	request;
4385 	request.request_type = PMR_SUSPEND;
4386 	request.req.suspend_req.who = dip;
4387 	request.req.suspend_req.cmd = cmd;
4388 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4389 }
4390 
4391 /*
4392  * Called for devices which indicate that their parent does suspend/resume
4393  * handling for them
4394  */
4395 int
4396 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4397 {
4398 	power_req_t	request;
4399 	request.request_type = PMR_RESUME;
4400 	request.req.resume_req.who = dip;
4401 	request.req.resume_req.cmd = cmd;
4402 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4403 }
4404 
4405 /*
4406  * Old obsolete exported interface for drivers to create components.
4407  * This is now handled by exporting the pm-components property.
4408  */
4409 int
4410 pm_create_components(dev_info_t *dip, int num_components)
4411 {
4412 	PMD_FUNC(pmf, "pm_create_components")
4413 
4414 	if (num_components < 1)
4415 		return (DDI_FAILURE);
4416 
4417 	if (!DEVI_IS_ATTACHING(dip)) {
4418 		return (DDI_FAILURE);
4419 	}
4420 
4421 	/* don't need to lock dip because attach is single threaded */
4422 	if (DEVI(dip)->devi_pm_components) {
4423 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4424 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4425 		return (DDI_FAILURE);
4426 	}
4427 	e_pm_create_components(dip, num_components);
4428 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4429 	e_pm_default_components(dip, num_components);
4430 	return (DDI_SUCCESS);
4431 }
4432 
4433 /*
4434  * Obsolete interface previously called by drivers to destroy their components
4435  * at detach time.  This is now done automatically.  However, we need to keep
4436  * this for the old drivers.
4437  */
4438 void
4439 pm_destroy_components(dev_info_t *dip)
4440 {
4441 	PMD_FUNC(pmf, "pm_destroy_components")
4442 	dev_info_t *pdip = ddi_get_parent(dip);
4443 
4444 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4445 	    PM_DEVICE(dip)))
4446 	ASSERT(DEVI_IS_DETACHING(dip));
4447 #ifdef DEBUG
4448 	if (!PM_ISBC(dip))
4449 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4450 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4451 		    PM_ADDR(dip));
4452 #endif
4453 	/*
4454 	 * We ignore this unless this is an old-style driver, except for
4455 	 * printing the message above
4456 	 */
4457 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4458 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4459 		    PM_DEVICE(dip)))
4460 		return;
4461 	}
4462 	ASSERT(PM_GET_PM_INFO(dip));
4463 
4464 	/*
4465 	 * pm_unmanage will clear info pointer later, after dealing with
4466 	 * dependencies
4467 	 */
4468 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4469 	/*
4470 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4471 	 * Parents that get notification are not adjusted because their
4472 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4473 	 */
4474 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4475 		pm_rele_power(pdip);
4476 #ifdef DEBUG
4477 	else {
4478 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4479 		    pmf, PM_DEVICE(dip)))
4480 	}
4481 #endif
4482 	e_pm_destroy_components(dip);
4483 	/*
4484 	 * Forget we ever knew anything about the components of this  device
4485 	 */
4486 	DEVI(dip)->devi_pm_flags &=
4487 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4488 }
4489 
4490 /*
4491  * Exported interface for a driver to set a component busy.
4492  */
4493 int
4494 pm_busy_component(dev_info_t *dip, int cmpt)
4495 {
4496 	struct pm_component *cp;
4497 
4498 	ASSERT(dip != NULL);
4499 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4500 		return (DDI_FAILURE);
4501 	PM_LOCK_BUSY(dip);
4502 	cp->pmc_busycount++;
4503 	cp->pmc_timestamp = 0;
4504 	PM_UNLOCK_BUSY(dip);
4505 	return (DDI_SUCCESS);
4506 }
4507 
4508 /*
4509  * Exported interface for a driver to set a component idle.
4510  */
4511 int
4512 pm_idle_component(dev_info_t *dip, int cmpt)
4513 {
4514 	PMD_FUNC(pmf, "pm_idle_component")
4515 	struct pm_component *cp;
4516 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4517 
4518 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4519 		return (DDI_FAILURE);
4520 
4521 	PM_LOCK_BUSY(dip);
4522 	if (cp->pmc_busycount) {
4523 		if (--(cp->pmc_busycount) == 0)
4524 			cp->pmc_timestamp = gethrestime_sec();
4525 	} else {
4526 		cp->pmc_timestamp = gethrestime_sec();
4527 	}
4528 
4529 	PM_UNLOCK_BUSY(dip);
4530 
4531 	/*
4532 	 * if device becomes idle during idle down period, try scan it down
4533 	 */
4534 	if (scanp && PM_IS_PID(dip)) {
4535 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4536 		    PM_DEVICE(dip)))
4537 		pm_rescan(dip);
4538 		return (DDI_SUCCESS);
4539 	}
4540 
4541 	/*
4542 	 * handle scan not running with nexus threshold == 0
4543 	 */
4544 
4545 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4546 		pm_rescan(dip);
4547 	}
4548 
4549 	return (DDI_SUCCESS);
4550 }
4551 
4552 /*
4553  * This is the old  obsolete interface called by drivers to set their normal
4554  * power.  Thus we can't fix its behavior or return a value.
4555  * This functionality is replaced by the pm-component property.
4556  * We'll only get components destroyed while no power management is
4557  * going on (and the device is detached), so we don't need a mutex here
4558  */
4559 void
4560 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4561 {
4562 	PMD_FUNC(pmf, "set_normal_power")
4563 #ifdef DEBUG
4564 	if (!PM_ISBC(dip))
4565 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4566 		    "(driver exporting pm-components property) ignored",
4567 		    PM_NAME(dip), PM_ADDR(dip));
4568 #endif
4569 	if (PM_ISBC(dip)) {
4570 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4571 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4572 		e_pm_set_max_power(dip, comp, level);
4573 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4574 	}
4575 }
4576 
4577 /*
4578  * Called on a successfully detached driver to free pm resources
4579  */
4580 static void
4581 pm_stop(dev_info_t *dip)
4582 {
4583 	PMD_FUNC(pmf, "stop")
4584 	dev_info_t *pdip = ddi_get_parent(dip);
4585 
4586 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4587 	/* stopping scan, destroy scan data structure */
4588 	if (!PM_ISBC(dip)) {
4589 		pm_scan_stop(dip);
4590 		pm_scan_fini(dip);
4591 	}
4592 
4593 	if (PM_GET_PM_INFO(dip) != NULL) {
4594 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4595 			/*
4596 			 * Old style driver may have called
4597 			 * pm_destroy_components already, but just in case ...
4598 			 */
4599 			e_pm_destroy_components(dip);
4600 		} else {
4601 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4602 			    pmf, PM_DEVICE(dip)))
4603 		}
4604 	} else {
4605 		if (PM_NUMCMPTS(dip))
4606 			e_pm_destroy_components(dip);
4607 		else {
4608 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4609 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4610 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4611 					pm_rele_power(pdip);
4612 				} else if (pdip && MDI_VHCI(pdip)) {
4613 					(void) mdi_power(pdip,
4614 					    MDI_PM_RELE_POWER,
4615 					    (void *)dip, NULL, 0);
4616 				}
4617 			}
4618 		}
4619 	}
4620 }
4621 
4622 /*
4623  * The node is the subject of a reparse pm props ioctl. Throw away the old
4624  * info and start over.
4625  */
4626 int
4627 e_new_pm_props(dev_info_t *dip)
4628 {
4629 	if (PM_GET_PM_INFO(dip) != NULL) {
4630 		pm_stop(dip);
4631 
4632 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4633 			return (DDI_FAILURE);
4634 		}
4635 	}
4636 	e_pm_props(dip);
4637 	return (DDI_SUCCESS);
4638 }
4639 
4640 /*
4641  * Device has been attached, so process its pm properties
4642  */
4643 void
4644 e_pm_props(dev_info_t *dip)
4645 {
4646 	char *pp;
4647 	int len;
4648 	int flags = 0;
4649 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4650 
4651 	/*
4652 	 * It doesn't matter if we do this more than once, we should always
4653 	 * get the same answers, and if not, then the last one in is the
4654 	 * best one.
4655 	 */
4656 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4657 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4658 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4659 			flags = PMC_NEEDS_SR;
4660 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4661 			flags = PMC_NO_SR;
4662 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4663 			flags = PMC_PARENTAL_SR;
4664 		} else {
4665 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4666 			    "%s property value '%s'", PM_NAME(dip),
4667 			    PM_ADDR(dip), "pm-hardware-state", pp);
4668 		}
4669 		kmem_free(pp, len);
4670 	}
4671 	/*
4672 	 * This next segment (PMC_WANTS_NOTIFY) is in
4673 	 * support of nexus drivers which will want to be involved in
4674 	 * (or at least notified of) their child node's power level transitions.
4675 	 * "pm-want-child-notification?" is defined by the parent.
4676 	 */
4677 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4678 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4679 		flags |= PMC_WANTS_NOTIFY;
4680 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4681 	    dip, propflag, "pm-want-child-notification?"));
4682 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4683 	    "no-involuntary-power-cycles"))
4684 		flags |= PMC_NO_INVOL;
4685 	/* devfs single threads us */
4686 	DEVI(dip)->devi_pm_flags |= flags;
4687 }
4688 
4689 /*
4690  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4691  * driver which has claimed a node.
4692  * Sets old_power in arg struct.
4693  */
4694 static int
4695 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4696     ddi_ctl_enum_t ctlop, void *arg, void *result)
4697 {
4698 	_NOTE(ARGUNUSED(dip))
4699 	PMD_FUNC(pmf, "ctlops")
4700 	power_req_t *reqp = (power_req_t *)arg;
4701 	int retval;
4702 	dev_info_t *target_dip;
4703 	int new_level, old_level, cmpt;
4704 #ifdef DEBUG
4705 	char *format;
4706 #endif
4707 
4708 	/*
4709 	 * The interface for doing the actual power level changes is now
4710 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4711 	 * different platform-specific power control drivers.
4712 	 *
4713 	 * This driver implements the "default" version of this interface.
4714 	 * If no ppm driver has been installed then this interface is called
4715 	 * instead.
4716 	 */
4717 	ASSERT(dip == NULL);
4718 	switch (ctlop) {
4719 	case DDI_CTLOPS_POWER:
4720 		switch (reqp->request_type) {
4721 		case PMR_PPM_SET_POWER:
4722 		{
4723 			target_dip = reqp->req.ppm_set_power_req.who;
4724 			ASSERT(target_dip == rdip);
4725 			new_level = reqp->req.ppm_set_power_req.new_level;
4726 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4727 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4728 			old_level = PM_CURPOWER(target_dip, cmpt);
4729 			reqp->req.ppm_set_power_req.old_level = old_level;
4730 			retval = pm_power(target_dip, cmpt, new_level);
4731 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4732 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4733 			    old_level, new_level, (retval == DDI_SUCCESS ?
4734 			    "chd" : "no chg")))
4735 			return (retval);
4736 		}
4737 
4738 		case PMR_PPM_PRE_DETACH:
4739 		case PMR_PPM_POST_DETACH:
4740 		case PMR_PPM_PRE_ATTACH:
4741 		case PMR_PPM_POST_ATTACH:
4742 		case PMR_PPM_PRE_PROBE:
4743 		case PMR_PPM_POST_PROBE:
4744 		case PMR_PPM_PRE_RESUME:
4745 		case PMR_PPM_INIT_CHILD:
4746 		case PMR_PPM_UNINIT_CHILD:
4747 #ifdef DEBUG
4748 			switch (reqp->request_type) {
4749 				case PMR_PPM_PRE_DETACH:
4750 					format = "%s: PMR_PPM_PRE_DETACH "
4751 					    "%s@%s(%s#%d)\n";
4752 					break;
4753 				case PMR_PPM_POST_DETACH:
4754 					format = "%s: PMR_PPM_POST_DETACH "
4755 					    "%s@%s(%s#%d) rets %d\n";
4756 					break;
4757 				case PMR_PPM_PRE_ATTACH:
4758 					format = "%s: PMR_PPM_PRE_ATTACH "
4759 					    "%s@%s(%s#%d)\n";
4760 					break;
4761 				case PMR_PPM_POST_ATTACH:
4762 					format = "%s: PMR_PPM_POST_ATTACH "
4763 					    "%s@%s(%s#%d) rets %d\n";
4764 					break;
4765 				case PMR_PPM_PRE_PROBE:
4766 					format = "%s: PMR_PPM_PRE_PROBE "
4767 					    "%s@%s(%s#%d)\n";
4768 					break;
4769 				case PMR_PPM_POST_PROBE:
4770 					format = "%s: PMR_PPM_POST_PROBE "
4771 					    "%s@%s(%s#%d) rets %d\n";
4772 					break;
4773 				case PMR_PPM_PRE_RESUME:
4774 					format = "%s: PMR_PPM_PRE_RESUME "
4775 					    "%s@%s(%s#%d) rets %d\n";
4776 					break;
4777 				case PMR_PPM_INIT_CHILD:
4778 					format = "%s: PMR_PPM_INIT_CHILD "
4779 					    "%s@%s(%s#%d)\n";
4780 					break;
4781 				case PMR_PPM_UNINIT_CHILD:
4782 					format = "%s: PMR_PPM_UNINIT_CHILD "
4783 					    "%s@%s(%s#%d)\n";
4784 					break;
4785 				default:
4786 					break;
4787 			}
4788 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4789 			    reqp->req.ppm_config_req.result))
4790 #endif
4791 			return (DDI_SUCCESS);
4792 
4793 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4794 			/*
4795 			 * Nothing for us to do
4796 			 */
4797 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4798 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4799 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4800 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4801 			    reqp->req.ppm_notify_level_req.cmpt,
4802 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4803 			    reqp->req.ppm_notify_level_req.cmpt),
4804 			    reqp->req.ppm_notify_level_req.new_level))
4805 			return (DDI_SUCCESS);
4806 
4807 		case PMR_PPM_UNMANAGE:
4808 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4809 			    pmf, PM_DEVICE(rdip)))
4810 			return (DDI_SUCCESS);
4811 
4812 		case PMR_PPM_LOCK_POWER:
4813 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4814 			    reqp->req.ppm_lock_power_req.circp);
4815 			return (DDI_SUCCESS);
4816 
4817 		case PMR_PPM_UNLOCK_POWER:
4818 			pm_unlock_power_single(
4819 			    reqp->req.ppm_unlock_power_req.who,
4820 			    reqp->req.ppm_unlock_power_req.circ);
4821 			return (DDI_SUCCESS);
4822 
4823 		case PMR_PPM_TRY_LOCK_POWER:
4824 			*(int *)result = pm_try_locking_power_single(
4825 			    reqp->req.ppm_lock_power_req.who,
4826 			    reqp->req.ppm_lock_power_req.circp);
4827 			return (DDI_SUCCESS);
4828 
4829 		case PMR_PPM_POWER_LOCK_OWNER:
4830 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4831 			ASSERT(target_dip == rdip);
4832 			reqp->req.ppm_power_lock_owner_req.owner =
4833 			    DEVI(rdip)->devi_busy_thread;
4834 			return (DDI_SUCCESS);
4835 		default:
4836 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4837 			return (DDI_FAILURE);
4838 		}
4839 
4840 	default:
4841 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4842 		return (DDI_FAILURE);
4843 	}
4844 }
4845 
4846 /*
4847  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4848  * power_ops struct for this functionality instead?
4849  * However, we only ever do this on a ppm driver.
4850  */
4851 int
4852 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4853 {
4854 	int (*fp)();
4855 
4856 	/* if no ppm handler, call the default routine */
4857 	if (d == NULL) {
4858 		return (pm_default_ctlops(d, r, op, a, v));
4859 	}
4860 	if (!d || !r)
4861 		return (DDI_FAILURE);
4862 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4863 		DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4864 
4865 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4866 	return ((*fp)(d, r, op, a, v));
4867 }
4868 
4869 /*
4870  * Called on a node when attach completes or the driver makes its first pm
4871  * call (whichever comes first).
4872  * In the attach case, device may not be power manageable at all.
4873  * Don't need to lock the dip because we're single threaded by the devfs code
4874  */
4875 static int
4876 pm_start(dev_info_t *dip)
4877 {
4878 	PMD_FUNC(pmf, "start")
4879 	int ret;
4880 	dev_info_t *pdip = ddi_get_parent(dip);
4881 	int e_pm_manage(dev_info_t *, int);
4882 	void pm_noinvol_specd(dev_info_t *dip);
4883 
4884 	e_pm_props(dip);
4885 	pm_noinvol_specd(dip);
4886 	/*
4887 	 * If this dip has already been processed, don't mess with it
4888 	 * (but decrement the speculative count we did above, as whatever
4889 	 * code put it under pm already will have dealt with it)
4890 	 */
4891 	if (PM_GET_PM_INFO(dip)) {
4892 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
4893 		    pmf, PM_DEVICE(dip)))
4894 		return (0);
4895 	}
4896 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
4897 
4898 	if (PM_GET_PM_INFO(dip) == NULL) {
4899 		/*
4900 		 * keep the kidsupcount increment as is
4901 		 */
4902 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
4903 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4904 			pm_hold_power(pdip);
4905 		} else if (pdip && MDI_VHCI(pdip)) {
4906 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
4907 			    (void *)dip, NULL, 0);
4908 		}
4909 
4910 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
4911 		    "left up\n", pmf, PM_DEVICE(dip)))
4912 	}
4913 
4914 	return (ret);
4915 }
4916 
4917 /*
4918  * Keep a list of recorded thresholds.  For now we just keep a list and
4919  * search it linearly.  We don't expect too many entries.  Can always hash it
4920  * later if we need to.
4921  */
4922 void
4923 pm_record_thresh(pm_thresh_rec_t *rp)
4924 {
4925 	pm_thresh_rec_t *pptr, *ptr;
4926 
4927 	ASSERT(*rp->ptr_physpath);
4928 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
4929 	for (pptr = NULL, ptr = pm_thresh_head;
4930 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
4931 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
4932 			/* replace this one */
4933 			rp->ptr_next = ptr->ptr_next;
4934 			if (pptr) {
4935 				pptr->ptr_next = rp;
4936 			} else {
4937 				pm_thresh_head = rp;
4938 			}
4939 			rw_exit(&pm_thresh_rwlock);
4940 			kmem_free(ptr, ptr->ptr_size);
4941 			return;
4942 		}
4943 		continue;
4944 	}
4945 	/*
4946 	 * There was not a match in the list, insert this one in front
4947 	 */
4948 	if (pm_thresh_head) {
4949 		rp->ptr_next = pm_thresh_head;
4950 		pm_thresh_head = rp;
4951 	} else {
4952 		rp->ptr_next = NULL;
4953 		pm_thresh_head = rp;
4954 	}
4955 	rw_exit(&pm_thresh_rwlock);
4956 }
4957 
4958 /*
4959  * Create a new dependency record and hang a new dependency entry off of it
4960  */
4961 pm_pdr_t *
4962 newpdr(char *kept, char *keeps, int isprop)
4963 {
4964 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
4965 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
4966 	p->pdr_size = size;
4967 	p->pdr_isprop = isprop;
4968 	p->pdr_kept_paths = NULL;
4969 	p->pdr_kept_count = 0;
4970 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
4971 	(void) strcpy(p->pdr_kept, kept);
4972 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
4973 	(void) strcpy(p->pdr_keeper, keeps);
4974 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
4975 	    (intptr_t)p + size);
4976 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
4977 	    (intptr_t)p + size);
4978 	return (p);
4979 }
4980 
4981 /*
4982  * Keep a list of recorded dependencies.  We only keep the
4983  * keeper -> kept list for simplification. At this point We do not
4984  * care about whether the devices are attached or not yet,
4985  * this would be done in pm_keeper() and pm_kept().
4986  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
4987  * and it is up to the user to issue the ioctl again if they want it
4988  * (e.g. pmconfig)
4989  * Returns true if dependency already exists in the list.
4990  */
4991 int
4992 pm_record_keeper(char *kept, char *keeper, int isprop)
4993 {
4994 	PMD_FUNC(pmf, "record_keeper")
4995 	pm_pdr_t *npdr, *ppdr, *pdr;
4996 
4997 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
4998 	ASSERT(kept && keeper);
4999 #ifdef DEBUG
5000 	if (pm_debug & PMD_KEEPS)
5001 		prdeps("pm_record_keeper entry");
5002 #endif
5003 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5004 	    ppdr = pdr, pdr = pdr->pdr_next) {
5005 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5006 		    pdr->pdr_keeper))
5007 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5008 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5009 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5010 			return (1);
5011 		}
5012 	}
5013 	/*
5014 	 * We did not find any match, so we have to make an entry
5015 	 */
5016 	npdr = newpdr(kept, keeper, isprop);
5017 	if (ppdr) {
5018 		ASSERT(ppdr->pdr_next == NULL);
5019 		ppdr->pdr_next = npdr;
5020 	} else {
5021 		ASSERT(pm_dep_head == NULL);
5022 		pm_dep_head = npdr;
5023 	}
5024 #ifdef DEBUG
5025 	if (pm_debug & PMD_KEEPS)
5026 		prdeps("pm_record_keeper after new record");
5027 #endif
5028 	if (!isprop)
5029 		pm_unresolved_deps++;
5030 	else
5031 		pm_prop_deps++;
5032 	return (0);
5033 }
5034 
5035 /*
5036  * Look up this device in the set of devices we've seen ioctls for
5037  * to see if we are holding a threshold spec for it.  If so, make it so.
5038  * At ioctl time, we were given the physical path of the device.
5039  */
5040 int
5041 pm_thresh_specd(dev_info_t *dip)
5042 {
5043 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5044 	char *path = 0;
5045 	char pathbuf[MAXNAMELEN];
5046 	pm_thresh_rec_t *rp;
5047 
5048 	path = ddi_pathname(dip, pathbuf);
5049 
5050 	rw_enter(&pm_thresh_rwlock, RW_READER);
5051 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5052 		if (strcmp(rp->ptr_physpath, path) != 0)
5053 			continue;
5054 		pm_apply_recorded_thresh(dip, rp);
5055 		rw_exit(&pm_thresh_rwlock);
5056 		return (1);
5057 	}
5058 	rw_exit(&pm_thresh_rwlock);
5059 	return (0);
5060 }
5061 
5062 static int
5063 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5064 {
5065 	PMD_FUNC(pmf, "set_keeping")
5066 	pm_info_t *kept_info;
5067 	int j, up = 0, circ;
5068 	void prdeps(char *);
5069 
5070 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5071 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5072 #ifdef DEBUG
5073 	if (pm_debug & PMD_KEEPS)
5074 		prdeps("Before PAD\n");
5075 #endif
5076 	ASSERT(keeper != kept);
5077 	if (PM_GET_PM_INFO(keeper) == NULL) {
5078 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5079 		    "%s@%s(%s#%d), but the latter is not power managed",
5080 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5081 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5082 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5083 		return (0);
5084 	}
5085 	kept_info = PM_GET_PM_INFO(kept);
5086 	ASSERT(kept_info);
5087 	PM_LOCK_POWER(keeper, &circ);
5088 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5089 		if (PM_CURPOWER(keeper, j)) {
5090 			up++;
5091 			break;
5092 		}
5093 	}
5094 	if (up) {
5095 		/* Bringup and maintain a hold on the kept */
5096 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5097 		    PM_DEVICE(kept)))
5098 		bring_pmdep_up(kept, 1);
5099 	}
5100 	PM_UNLOCK_POWER(keeper, circ);
5101 #ifdef DEBUG
5102 	if (pm_debug & PMD_KEEPS)
5103 		prdeps("After PAD\n");
5104 #endif
5105 	return (1);
5106 }
5107 
5108 /*
5109  * Should this device keep up another device?
5110  * Look up this device in the set of devices we've seen ioctls for
5111  * to see if we are holding a dependency spec for it.  If so, make it so.
5112  * Because we require the kept device to be attached already in order to
5113  * make the list entry (and hold it), we only need to look for keepers.
5114  * At ioctl time, we were given the physical path of the device.
5115  */
5116 int
5117 pm_keeper(char *keeper)
5118 {
5119 	PMD_FUNC(pmf, "keeper")
5120 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5121 	dev_info_t *dip;
5122 	pm_pdr_t *dp;
5123 	dev_info_t *kept = NULL;
5124 	int ret = 0;
5125 	int i;
5126 
5127 	if (!pm_unresolved_deps && !pm_prop_deps)
5128 		return (0);
5129 	ASSERT(keeper != NULL);
5130 	dip = pm_name_to_dip(keeper, 1);
5131 	if (dip == NULL)
5132 		return (0);
5133 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5134 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5135 		if (!dp->pdr_isprop) {
5136 			if (!pm_unresolved_deps)
5137 				continue;
5138 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5139 			if (dp->pdr_satisfied) {
5140 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5141 				continue;
5142 			}
5143 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5144 				ret += pm_apply_recorded_dep(dip, dp);
5145 			}
5146 		} else {
5147 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5148 				continue;
5149 			for (i = 0; i < dp->pdr_kept_count; i++) {
5150 				if (dp->pdr_kept_paths[i] == NULL)
5151 					continue;
5152 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5153 				if (kept == NULL)
5154 					continue;
5155 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5156 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5157 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5158 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5159 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5160 				    dp->pdr_kept_count))
5161 				if (kept != dip) {
5162 					ret += pm_set_keeping(dip, kept);
5163 				}
5164 				ddi_release_devi(kept);
5165 			}
5166 
5167 		}
5168 	}
5169 	ddi_release_devi(dip);
5170 	return (ret);
5171 }
5172 
5173 /*
5174  * Should this device be kept up by another device?
5175  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5176  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5177  * kept device lists.
5178  */
5179 static int
5180 pm_kept(char *keptp)
5181 {
5182 	PMD_FUNC(pmf, "kept")
5183 	pm_pdr_t *dp;
5184 	int found = 0;
5185 	int ret = 0;
5186 	dev_info_t *keeper;
5187 	dev_info_t *kept;
5188 	size_t length;
5189 	int i;
5190 	char **paths;
5191 	char *path;
5192 
5193 	ASSERT(keptp != NULL);
5194 	kept = pm_name_to_dip(keptp, 1);
5195 	if (kept == NULL)
5196 		return (0);
5197 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5198 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5199 		if (dp->pdr_isprop) {
5200 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5201 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5202 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5203 				/*
5204 				 * Dont allow self dependency.
5205 				 */
5206 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5207 					continue;
5208 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5209 				if (keeper == NULL)
5210 					continue;
5211 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5212 				    "%p\n", pmf, (void *)kept))
5213 #ifdef DEBUG
5214 				if (pm_debug & PMD_DEP)
5215 					prdeps("Before Adding from pm_kept\n");
5216 #endif
5217 				/*
5218 				 * Add ourselves to the dip list.
5219 				 */
5220 				if (dp->pdr_kept_count == 0) {
5221 					length = strlen(keptp) + 1;
5222 					path =
5223 					    kmem_alloc(length, KM_SLEEP);
5224 					paths = kmem_alloc(sizeof (char **),
5225 						    KM_SLEEP);
5226 					(void) strcpy(path, keptp);
5227 					paths[0] = path;
5228 					dp->pdr_kept_paths = paths;
5229 					dp->pdr_kept_count++;
5230 				} else {
5231 					/* Check to see if already on list */
5232 					for (i = 0; i < dp->pdr_kept_count;
5233 					    i++) {
5234 						if (strcmp(keptp,
5235 						    dp->pdr_kept_paths[i])
5236 						    == 0) {
5237 							found++;
5238 							break;
5239 						}
5240 					}
5241 					if (found) {
5242 						ddi_release_devi(keeper);
5243 						continue;
5244 					}
5245 					length = dp->pdr_kept_count *
5246 					    sizeof (char **);
5247 					paths = kmem_alloc(
5248 					    length + sizeof (char **),
5249 					    KM_SLEEP);
5250 					if (dp->pdr_kept_count) {
5251 						bcopy(dp->pdr_kept_paths,
5252 						    paths, length);
5253 						kmem_free(dp->pdr_kept_paths,
5254 							length);
5255 					}
5256 					dp->pdr_kept_paths = paths;
5257 					length = strlen(keptp) + 1;
5258 					path =
5259 					    kmem_alloc(length, KM_SLEEP);
5260 					(void) strcpy(path, keptp);
5261 					dp->pdr_kept_paths[i] = path;
5262 					dp->pdr_kept_count++;
5263 				}
5264 #ifdef DEBUG
5265 				if (pm_debug & PMD_DEP)
5266 					prdeps("After from pm_kept\n");
5267 #endif
5268 				if (keeper) {
5269 					ret += pm_set_keeping(keeper, kept);
5270 					ddi_release_devi(keeper);
5271 				}
5272 			}
5273 		} else {
5274 			/*
5275 			 * pm_keeper would be called later to do
5276 			 * the actual pm_set_keeping.
5277 			 */
5278 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5279 			    pmf, (void *)kept))
5280 #ifdef DEBUG
5281 			if (pm_debug & PMD_DEP)
5282 				prdeps("Before Adding from pm_kept\n");
5283 #endif
5284 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5285 				if (dp->pdr_kept_paths == NULL) {
5286 					length = strlen(keptp) + 1;
5287 					path =
5288 					    kmem_alloc(length, KM_SLEEP);
5289 					paths = kmem_alloc(sizeof (char **),
5290 						KM_SLEEP);
5291 					(void) strcpy(path, keptp);
5292 					paths[0] = path;
5293 					dp->pdr_kept_paths = paths;
5294 					dp->pdr_kept_count++;
5295 				}
5296 			}
5297 #ifdef DEBUG
5298 			if (pm_debug & PMD_DEP)
5299 			    prdeps("After from pm_kept\n");
5300 #endif
5301 		}
5302 	}
5303 	ddi_release_devi(kept);
5304 	return (ret);
5305 }
5306 
5307 /*
5308  * Apply a recorded dependency.  dp specifies the dependency, and
5309  * keeper is already known to be the device that keeps up the other (kept) one.
5310  * We have to the whole tree for the "kept" device, then apply
5311  * the dependency (which may already be applied).
5312  */
5313 int
5314 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5315 {
5316 	PMD_FUNC(pmf, "apply_recorded_dep")
5317 	dev_info_t *kept = NULL;
5318 	int ret = 0;
5319 	char *keptp = NULL;
5320 
5321 	/*
5322 	 * Device to Device dependency can only be 1 to 1.
5323 	 */
5324 	if (dp->pdr_kept_paths == NULL)
5325 		return (0);
5326 	keptp = dp->pdr_kept_paths[0];
5327 	if (keptp == NULL)
5328 		return (0);
5329 	ASSERT(*keptp != '\0');
5330 	kept = pm_name_to_dip(keptp, 1);
5331 	if (kept == NULL)
5332 		return (0);
5333 	if (kept) {
5334 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5335 		    dp->pdr_keeper, keptp))
5336 		if (pm_set_keeping(keeper, kept)) {
5337 			ASSERT(dp->pdr_satisfied == 0);
5338 			dp->pdr_satisfied = 1;
5339 			ASSERT(pm_unresolved_deps);
5340 			pm_unresolved_deps--;
5341 			ret++;
5342 		}
5343 	}
5344 	ddi_release_devi(kept);
5345 
5346 	return (ret);
5347 }
5348 
5349 /*
5350  * Called from common/io/pm.c
5351  */
5352 int
5353 pm_cur_power(pm_component_t *cp)
5354 {
5355 	return (cur_power(cp));
5356 }
5357 
5358 /*
5359  * External interface to sanity-check a power level.
5360  */
5361 int
5362 pm_valid_power(dev_info_t *dip, int comp, int level)
5363 {
5364 	PMD_FUNC(pmf, "valid_power")
5365 
5366 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5367 		return (e_pm_valid_power(dip, comp, level));
5368 	else {
5369 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5370 		    pmf, comp, PM_NUMCMPTS(dip), level))
5371 		return (0);
5372 	}
5373 }
5374 
5375 /*
5376  * Called when a device that is direct power managed needs to change state.
5377  * This routine arranges to block the request until the process managing
5378  * the device makes the change (or some other incompatible change) or
5379  * the process closes /dev/pm.
5380  */
5381 static int
5382 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5383 {
5384 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5385 	int ret = 0;
5386 	void pm_dequeue_blocked(pm_rsvp_t *);
5387 	void pm_enqueue_blocked(pm_rsvp_t *);
5388 
5389 	ASSERT(!pm_processes_stopped);
5390 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5391 	new->pr_dip = dip;
5392 	new->pr_comp = comp;
5393 	new->pr_newlevel = newpower;
5394 	new->pr_oldlevel = oldpower;
5395 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5396 	mutex_enter(&pm_rsvp_lock);
5397 	pm_enqueue_blocked(new);
5398 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5399 	    PM_CANBLOCK_BLOCK);
5400 	PM_UNLOCK_DIP(dip);
5401 	/*
5402 	 * truss may make the cv_wait_sig return prematurely
5403 	 */
5404 	while (ret == 0) {
5405 		/*
5406 		 * Normally there will be no user context involved, but if
5407 		 * there is (e.g. we are here via an ioctl call to a driver)
5408 		 * then we should allow the process to abort the request,
5409 		 * or we get an unkillable process if the same thread does
5410 		 * PM_DIRECT_PM and pm_raise_power
5411 		 */
5412 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5413 			ret = PMP_FAIL;
5414 		} else {
5415 			ret = new->pr_retval;
5416 		}
5417 	}
5418 	pm_dequeue_blocked(new);
5419 	mutex_exit(&pm_rsvp_lock);
5420 	cv_destroy(&new->pr_cv);
5421 	kmem_free(new, sizeof (*new));
5422 	return (ret);
5423 }
5424 
5425 /*
5426  * Returns true if the process is interested in power level changes (has issued
5427  * PM_GET_STATE_CHANGE ioctl).
5428  */
5429 int
5430 pm_interest_registered(int clone)
5431 {
5432 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5433 	return (pm_interest[clone]);
5434 }
5435 
5436 /*
5437  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5438  * watch all state transitions (dip == NULL).  Set up data
5439  * structs to communicate with process about state changes.
5440  */
5441 void
5442 pm_register_watcher(int clone, dev_info_t *dip)
5443 {
5444 	pscc_t	*p;
5445 	psce_t	*psce;
5446 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5447 
5448 	/*
5449 	 * We definitely need a control struct, then we have to search to see
5450 	 * there is already an entries struct (in the dip != NULL case).
5451 	 */
5452 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5453 	pscc->pscc_clone = clone;
5454 	pscc->pscc_dip = dip;
5455 
5456 	if (dip) {
5457 		int found = 0;
5458 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5459 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5460 			/*
5461 			 * Already an entry for this clone, so just use it
5462 			 * for the new one (for the case where a single
5463 			 * process is watching multiple devices)
5464 			 */
5465 			if (p->pscc_clone == clone) {
5466 				ASSERT(p->pscc_dip != dip);
5467 				pscc->pscc_entries = p->pscc_entries;
5468 				pscc->pscc_entries->psce_references++;
5469 				found++;
5470 			}
5471 		}
5472 		if (!found) {		/* create a new one */
5473 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5474 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5475 			psce->psce_first =
5476 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5477 			    KM_SLEEP);
5478 			psce->psce_in = psce->psce_out = psce->psce_first;
5479 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5480 			psce->psce_references = 1;
5481 			pscc->pscc_entries = psce;
5482 		}
5483 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5484 		rw_exit(&pm_pscc_direct_rwlock);
5485 	} else {
5486 		ASSERT(!pm_interest_registered(clone));
5487 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5488 #ifdef DEBUG
5489 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5490 			/*
5491 			 * Should not be an entry for this clone!
5492 			 */
5493 			ASSERT(p->pscc_clone != clone);
5494 		}
5495 #endif
5496 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5497 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5498 		    PSCCOUNT, KM_SLEEP);
5499 		psce->psce_in = psce->psce_out = psce->psce_first;
5500 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5501 		psce->psce_references = 1;
5502 		pscc->pscc_entries = psce;
5503 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5504 		pm_interest[clone] = 1;
5505 		rw_exit(&pm_pscc_interest_rwlock);
5506 	}
5507 }
5508 
5509 /*
5510  * Remove the given entry from the blocked list
5511  */
5512 void
5513 pm_dequeue_blocked(pm_rsvp_t *p)
5514 {
5515 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5516 	if (pm_blocked_list == p) {
5517 		ASSERT(p->pr_prev == NULL);
5518 		if (p->pr_next != NULL)
5519 			p->pr_next->pr_prev = NULL;
5520 		pm_blocked_list = p->pr_next;
5521 	} else {
5522 		ASSERT(p->pr_prev != NULL);
5523 		p->pr_prev->pr_next = p->pr_next;
5524 		if (p->pr_next != NULL)
5525 			p->pr_next->pr_prev = p->pr_prev;
5526 	}
5527 }
5528 
5529 /*
5530  * Remove the given control struct from the given list
5531  */
5532 static void
5533 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5534 {
5535 	if (*list == p) {
5536 		ASSERT(p->pscc_prev == NULL);
5537 		if (p->pscc_next != NULL)
5538 			p->pscc_next->pscc_prev = NULL;
5539 		*list = p->pscc_next;
5540 	} else {
5541 		ASSERT(p->pscc_prev != NULL);
5542 		p->pscc_prev->pscc_next = p->pscc_next;
5543 		if (p->pscc_next != NULL)
5544 			p->pscc_next->pscc_prev = p->pscc_prev;
5545 	}
5546 }
5547 
5548 /*
5549  * Stick the control struct specified on the front of the list
5550  */
5551 static void
5552 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5553 {
5554 	pscc_t *h;	/* entry at head of list */
5555 	if ((h = *list) == NULL) {
5556 		*list = p;
5557 		ASSERT(p->pscc_next == NULL);
5558 		ASSERT(p->pscc_prev == NULL);
5559 	} else {
5560 		p->pscc_next = h;
5561 		ASSERT(h->pscc_prev == NULL);
5562 		h->pscc_prev = p;
5563 		ASSERT(p->pscc_prev == NULL);
5564 		*list = p;
5565 	}
5566 }
5567 
5568 /*
5569  * If dip is NULL, process is closing "clone" clean up all its registrations.
5570  * Otherwise only clean up those for dip because process is just giving up
5571  * control of a direct device.
5572  */
5573 void
5574 pm_deregister_watcher(int clone, dev_info_t *dip)
5575 {
5576 	pscc_t	*p, *pn;
5577 	psce_t	*psce;
5578 	int found = 0;
5579 
5580 	if (dip == NULL) {
5581 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5582 		for (p = pm_pscc_interest; p; p = pn) {
5583 			pn = p->pscc_next;
5584 			if (p->pscc_clone == clone) {
5585 				pm_dequeue_pscc(p, &pm_pscc_interest);
5586 				psce = p->pscc_entries;
5587 				ASSERT(psce->psce_references == 1);
5588 				mutex_destroy(&psce->psce_lock);
5589 				kmem_free(psce->psce_first,
5590 				    sizeof (pm_state_change_t) * PSCCOUNT);
5591 				kmem_free(psce, sizeof (*psce));
5592 				kmem_free(p, sizeof (*p));
5593 			}
5594 		}
5595 		pm_interest[clone] = 0;
5596 		rw_exit(&pm_pscc_interest_rwlock);
5597 	}
5598 	found = 0;
5599 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5600 	for (p = pm_pscc_direct; p; p = pn) {
5601 		pn = p->pscc_next;
5602 		if ((dip && p->pscc_dip == dip) ||
5603 		    (dip == NULL && clone == p->pscc_clone)) {
5604 			ASSERT(clone == p->pscc_clone);
5605 			found++;
5606 			/*
5607 			 * Remove from control list
5608 			 */
5609 			pm_dequeue_pscc(p, &pm_pscc_direct);
5610 			/*
5611 			 * If we're the last reference, free the
5612 			 * entries struct.
5613 			 */
5614 			psce = p->pscc_entries;
5615 			ASSERT(psce);
5616 			if (psce->psce_references == 1) {
5617 				kmem_free(psce->psce_first,
5618 				    PSCCOUNT * sizeof (pm_state_change_t));
5619 				kmem_free(psce, sizeof (*psce));
5620 			} else {
5621 				psce->psce_references--;
5622 			}
5623 			kmem_free(p, sizeof (*p));
5624 		}
5625 	}
5626 	ASSERT(dip == NULL || found);
5627 	rw_exit(&pm_pscc_direct_rwlock);
5628 }
5629 
5630 /*
5631  * Search the indicated list for an entry that matches clone, and return a
5632  * pointer to it.  To be interesting, the entry must have something ready to
5633  * be passed up to the controlling process.
5634  * The returned entry will be locked upon return from this call.
5635  */
5636 static psce_t *
5637 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5638 {
5639 	pscc_t	*p;
5640 	psce_t	*psce;
5641 	rw_enter(lock, RW_READER);
5642 	for (p = *list; p; p = p->pscc_next) {
5643 		if (clone == p->pscc_clone) {
5644 			psce = p->pscc_entries;
5645 			mutex_enter(&psce->psce_lock);
5646 			if (psce->psce_out->size) {
5647 				rw_exit(lock);
5648 				return (psce);
5649 			} else {
5650 				mutex_exit(&psce->psce_lock);
5651 			}
5652 		}
5653 	}
5654 	rw_exit(lock);
5655 	return (NULL);
5656 }
5657 
5658 /*
5659  * Find an entry for a particular clone in the direct list.
5660  */
5661 psce_t *
5662 pm_psc_clone_to_direct(int clone)
5663 {
5664 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5665 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5666 	    &pm_pscc_direct_rwlock));
5667 }
5668 
5669 /*
5670  * Find an entry for a particular clone in the interest list.
5671  */
5672 psce_t *
5673 pm_psc_clone_to_interest(int clone)
5674 {
5675 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5676 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5677 	    &pm_pscc_interest_rwlock));
5678 }
5679 
5680 /*
5681  * Put the given entry at the head of the blocked list
5682  */
5683 void
5684 pm_enqueue_blocked(pm_rsvp_t *p)
5685 {
5686 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5687 	ASSERT(p->pr_next == NULL);
5688 	ASSERT(p->pr_prev == NULL);
5689 	if (pm_blocked_list != NULL) {
5690 		p->pr_next = pm_blocked_list;
5691 		ASSERT(pm_blocked_list->pr_prev == NULL);
5692 		pm_blocked_list->pr_prev = p;
5693 		pm_blocked_list = p;
5694 	} else {
5695 		pm_blocked_list = p;
5696 	}
5697 }
5698 
5699 /*
5700  * Sets every power managed device back to its default threshold
5701  */
5702 void
5703 pm_all_to_default_thresholds(void)
5704 {
5705 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5706 	    (void *) &pm_system_idle_threshold);
5707 }
5708 
5709 static int
5710 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5711 {
5712 	int thr = (int)(*(int *)arg);
5713 
5714 	if (!PM_GET_PM_INFO(dip))
5715 		return (DDI_WALK_CONTINUE);
5716 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5717 	return (DDI_WALK_CONTINUE);
5718 }
5719 
5720 /*
5721  * Returns the current threshold value (in seconds) for the indicated component
5722  */
5723 int
5724 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5725 {
5726 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5727 		return (DDI_FAILURE);
5728 	} else {
5729 		*threshp = cur_threshold(dip, comp);
5730 		return (DDI_SUCCESS);
5731 	}
5732 }
5733 
5734 /*
5735  * To be called when changing the power level of a component of a device.
5736  * On some platforms, changing power on one device may require that power
5737  * be changed on other, related devices in the same transaction.  Thus, we
5738  * always pass this request to the platform power manager so that all the
5739  * affected devices will be locked.
5740  */
5741 void
5742 pm_lock_power(dev_info_t *dip, int *circp)
5743 {
5744 	power_req_t power_req;
5745 	int result;
5746 
5747 	power_req.request_type = PMR_PPM_LOCK_POWER;
5748 	power_req.req.ppm_lock_power_req.who = dip;
5749 	power_req.req.ppm_lock_power_req.circp = circp;
5750 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5751 }
5752 
5753 /*
5754  * Release the lock (or locks) acquired to change the power of a device.
5755  * See comments for pm_lock_power.
5756  */
5757 void
5758 pm_unlock_power(dev_info_t *dip, int circ)
5759 {
5760 	power_req_t power_req;
5761 	int result;
5762 
5763 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5764 	power_req.req.ppm_unlock_power_req.who = dip;
5765 	power_req.req.ppm_unlock_power_req.circ = circ;
5766 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5767 }
5768 
5769 
5770 /*
5771  * Attempt (without blocking) to acquire the lock(s) needed to change the
5772  * power of a component of a device.  See comments for pm_lock_power.
5773  *
5774  * Return: 1 if lock(s) acquired, 0 if not.
5775  */
5776 int
5777 pm_try_locking_power(dev_info_t *dip, int *circp)
5778 {
5779 	power_req_t power_req;
5780 	int result;
5781 
5782 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5783 	power_req.req.ppm_lock_power_req.who = dip;
5784 	power_req.req.ppm_lock_power_req.circp = circp;
5785 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5786 	return (result);
5787 }
5788 
5789 
5790 /*
5791  * Lock power state of a device.
5792  *
5793  * The implementation handles a special case where another thread may have
5794  * acquired the lock and created/launched this thread to do the work.  If
5795  * the lock cannot be acquired immediately, we check to see if this thread
5796  * is registered as a borrower of the lock.  If so, we may proceed without
5797  * the lock.  This assumes that the lending thread blocks on the completion
5798  * of this thread.
5799  *
5800  * Note 1: for use by ppm only.
5801  *
5802  * Note 2: On failing to get the lock immediately, we search lock_loan list
5803  * for curthread (as borrower of the lock).  On a hit, we check that the
5804  * lending thread already owns the lock we want.  It is safe to compare
5805  * devi_busy_thread and thread id of the lender because in the == case (the
5806  * only one we care about) we know that the owner is blocked.  Similarly,
5807  * If we find that curthread isn't registered as a lock borrower, it is safe
5808  * to use the blocking call (ndi_devi_enter) because we know that if we
5809  * weren't already listed as a borrower (upstream on the call stack) we won't
5810  * become one.
5811  */
5812 void
5813 pm_lock_power_single(dev_info_t *dip, int *circp)
5814 {
5815 	lock_loan_t *cur;
5816 
5817 	/* if the lock is available, we are done. */
5818 	if (ndi_devi_tryenter(dip, circp))
5819 		return;
5820 
5821 	mutex_enter(&pm_loan_lock);
5822 	/* see if our thread is registered as a lock borrower. */
5823 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5824 		if (cur->pmlk_borrower == curthread)
5825 			break;
5826 	mutex_exit(&pm_loan_lock);
5827 
5828 	/* if this thread not already registered, it is safe to block */
5829 	if (cur == NULL)
5830 		ndi_devi_enter(dip, circp);
5831 	else {
5832 		/* registered: does lender own the lock we want? */
5833 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5834 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5835 			cur->pmlk_dip = dip;
5836 		} else /* no: just block for it */
5837 			ndi_devi_enter(dip, circp);
5838 
5839 	}
5840 }
5841 
5842 /*
5843  * Drop the lock on the device's power state.  See comment for
5844  * pm_lock_power_single() for special implementation considerations.
5845  *
5846  * Note: for use by ppm only.
5847  */
5848 void
5849 pm_unlock_power_single(dev_info_t *dip, int circ)
5850 {
5851 	lock_loan_t *cur;
5852 
5853 	/* optimization: mutex not needed to check empty list */
5854 	if (lock_loan_head.pmlk_next == NULL) {
5855 		ndi_devi_exit(dip, circ);
5856 		return;
5857 	}
5858 
5859 	mutex_enter(&pm_loan_lock);
5860 	/* see if our thread is registered as a lock borrower. */
5861 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5862 		if (cur->pmlk_borrower == curthread)
5863 			break;
5864 	mutex_exit(&pm_loan_lock);
5865 
5866 	if (cur == NULL || cur->pmlk_dip != dip)
5867 		/* we acquired the lock directly, so return it */
5868 		ndi_devi_exit(dip, circ);
5869 }
5870 
5871 /*
5872  * Try to take the lock for changing the power level of a component.
5873  *
5874  * Note: for use by ppm only.
5875  */
5876 int
5877 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5878 {
5879 	return (ndi_devi_tryenter(dip, circp));
5880 }
5881 
5882 #ifdef	DEBUG
5883 /*
5884  * The following are used only to print out data structures for debugging
5885  */
5886 void
5887 prdeps(char *msg)
5888 {
5889 
5890 	pm_pdr_t *rp;
5891 	int i;
5892 
5893 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
5894 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
5895 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
5896 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
5897 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
5898 		    (void *)rp->pdr_next);
5899 		if (rp->pdr_kept_count != 0) {
5900 			pm_log("kept list = ");
5901 			i = 0;
5902 			while (i < rp->pdr_kept_count) {
5903 				pm_log("%s ", rp->pdr_kept_paths[i]);
5904 				i++;
5905 			}
5906 			pm_log("\n");
5907 		}
5908 	}
5909 }
5910 
5911 void
5912 pr_noinvol(char *hdr)
5913 {
5914 	pm_noinvol_t *ip;
5915 
5916 	pm_log("%s\n", hdr);
5917 	rw_enter(&pm_noinvol_rwlock, RW_READER);
5918 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
5919 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
5920 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
5921 	rw_exit(&pm_noinvol_rwlock);
5922 }
5923 #endif
5924 
5925 /*
5926  * Attempt to apply the thresholds indicated by rp to the node specified by
5927  * dip.
5928  */
5929 void
5930 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5931 {
5932 	PMD_FUNC(pmf, "apply_recorded_thresh")
5933 	int i, j;
5934 	int comps = PM_NUMCMPTS(dip);
5935 	struct pm_component *cp;
5936 	pm_pte_t *ep;
5937 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
5938 
5939 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
5940 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
5941 	PM_LOCK_DIP(dip);
5942 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
5943 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
5944 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
5945 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
5946 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
5947 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
5948 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
5949 		PM_UNLOCK_DIP(dip);
5950 		return;
5951 	}
5952 
5953 	ep = rp->ptr_entries;
5954 	/*
5955 	 * Here we do the special case of a device threshold
5956 	 */
5957 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
5958 		ASSERT(ep && ep->pte_numthresh == 1);
5959 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
5960 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
5961 		PM_UNLOCK_DIP(dip);
5962 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
5963 		if (autopm_enabled)
5964 			pm_rescan(dip);
5965 		return;
5966 	}
5967 	for (i = 0; i < comps; i++) {
5968 		cp = PM_CP(dip, i);
5969 		for (j = 0; j < ep->pte_numthresh; j++) {
5970 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
5971 			    "to %x\n", pmf, j, PM_DEVICE(dip),
5972 			    i, ep->pte_thresh[j]))
5973 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
5974 		}
5975 		ep++;
5976 	}
5977 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
5978 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
5979 	PM_UNLOCK_DIP(dip);
5980 
5981 	if (autopm_enabled)
5982 		pm_rescan(dip);
5983 }
5984 
5985 /*
5986  * Returns true if the threshold specified by rp could be applied to dip
5987  * (that is, the number of components and transitions are the same)
5988  */
5989 int
5990 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
5991 {
5992 	PMD_FUNC(pmf, "valid_thresh")
5993 	int comps, i;
5994 	pm_component_t *cp;
5995 	pm_pte_t *ep;
5996 
5997 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
5998 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
5999 		    rp->ptr_physpath))
6000 		return (0);
6001 	}
6002 	/*
6003 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6004 	 * an entry with numcomps == 0, (since we don't know how many
6005 	 * components there are in advance).  This is always a valid
6006 	 * spec.
6007 	 */
6008 	if (rp->ptr_numcomps == 0) {
6009 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6010 		return (1);
6011 	}
6012 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6013 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6014 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6015 		return (0);
6016 	}
6017 	ep = rp->ptr_entries;
6018 	for (i = 0; i < comps; i++) {
6019 		cp = PM_CP(dip, i);
6020 		if ((ep + i)->pte_numthresh !=
6021 		    cp->pmc_comp.pmc_numlevels - 1) {
6022 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6023 			    pmf, rp->ptr_physpath, i,
6024 			    cp->pmc_comp.pmc_numlevels - 1,
6025 			    (ep + i)->pte_numthresh))
6026 			return (0);
6027 		}
6028 	}
6029 	return (1);
6030 }
6031 
6032 /*
6033  * Remove any recorded threshold for device physpath
6034  * We know there will be at most one.
6035  */
6036 void
6037 pm_unrecord_threshold(char *physpath)
6038 {
6039 	pm_thresh_rec_t *pptr, *ptr;
6040 
6041 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6042 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6043 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6044 			if (pptr) {
6045 				pptr->ptr_next = ptr->ptr_next;
6046 			} else {
6047 				ASSERT(pm_thresh_head == ptr);
6048 				pm_thresh_head = ptr->ptr_next;
6049 			}
6050 			kmem_free(ptr, ptr->ptr_size);
6051 			break;
6052 		}
6053 		pptr = ptr;
6054 	}
6055 	rw_exit(&pm_thresh_rwlock);
6056 }
6057 
6058 /*
6059  * Discard all recorded thresholds.  We are returning to the default pm state.
6060  */
6061 void
6062 pm_discard_thresholds(void)
6063 {
6064 	pm_thresh_rec_t *rp;
6065 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6066 	while (pm_thresh_head) {
6067 		rp = pm_thresh_head;
6068 		pm_thresh_head = rp->ptr_next;
6069 		kmem_free(rp, rp->ptr_size);
6070 	}
6071 	rw_exit(&pm_thresh_rwlock);
6072 }
6073 
6074 /*
6075  * Discard all recorded dependencies.  We are returning to the default pm state.
6076  */
6077 void
6078 pm_discard_dependencies(void)
6079 {
6080 	pm_pdr_t *rp;
6081 	int i;
6082 	size_t length;
6083 
6084 #ifdef DEBUG
6085 	if (pm_debug & PMD_DEP)
6086 		prdeps("Before discard\n");
6087 #endif
6088 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6089 
6090 #ifdef DEBUG
6091 	if (pm_debug & PMD_DEP)
6092 		prdeps("After discard\n");
6093 #endif
6094 	while (pm_dep_head) {
6095 		rp = pm_dep_head;
6096 		if (!rp->pdr_isprop) {
6097 			ASSERT(rp->pdr_satisfied == 0);
6098 			ASSERT(pm_unresolved_deps);
6099 			pm_unresolved_deps--;
6100 		} else {
6101 			ASSERT(pm_prop_deps);
6102 			pm_prop_deps--;
6103 		}
6104 		pm_dep_head = rp->pdr_next;
6105 		if (rp->pdr_kept_count)  {
6106 			for (i = 0; i < rp->pdr_kept_count; i++) {
6107 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6108 				kmem_free(rp->pdr_kept_paths[i], length);
6109 			}
6110 			kmem_free(rp->pdr_kept_paths,
6111 				rp->pdr_kept_count * sizeof (char **));
6112 		}
6113 		kmem_free(rp, rp->pdr_size);
6114 	}
6115 }
6116 
6117 
6118 static int
6119 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6120 {
6121 	_NOTE(ARGUNUSED(arg))
6122 	char *pathbuf;
6123 
6124 	if (PM_GET_PM_INFO(dip) == NULL)
6125 		return (DDI_WALK_CONTINUE);
6126 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6127 	(void) ddi_pathname(dip, pathbuf);
6128 	pm_free_keeper(pathbuf, 0);
6129 	kmem_free(pathbuf, MAXPATHLEN);
6130 	return (DDI_WALK_CONTINUE);
6131 }
6132 
6133 static int
6134 pm_kept_walk(dev_info_t *dip, void *arg)
6135 {
6136 	_NOTE(ARGUNUSED(arg))
6137 	char *pathbuf;
6138 
6139 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6140 	(void) ddi_pathname(dip, pathbuf);
6141 	(void) pm_kept(pathbuf);
6142 	kmem_free(pathbuf, MAXPATHLEN);
6143 
6144 	return (DDI_WALK_CONTINUE);
6145 }
6146 
6147 static int
6148 pm_keeper_walk(dev_info_t *dip, void *arg)
6149 {
6150 	_NOTE(ARGUNUSED(arg))
6151 	char *pathbuf;
6152 
6153 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6154 	(void) ddi_pathname(dip, pathbuf);
6155 	(void) pm_keeper(pathbuf);
6156 	kmem_free(pathbuf, MAXPATHLEN);
6157 
6158 	return (DDI_WALK_CONTINUE);
6159 }
6160 
6161 static char *
6162 pdw_type_decode(int type)
6163 {
6164 	switch (type) {
6165 	case PM_DEP_WK_POWER_ON:
6166 		return ("power on");
6167 	case PM_DEP_WK_POWER_OFF:
6168 		return ("power off");
6169 	case PM_DEP_WK_DETACH:
6170 		return ("detach");
6171 	case PM_DEP_WK_REMOVE_DEP:
6172 		return ("remove dep");
6173 	case PM_DEP_WK_BRINGUP_SELF:
6174 		return ("bringup self");
6175 	case PM_DEP_WK_RECORD_KEEPER:
6176 		return ("add dependent");
6177 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6178 		return ("add dependent property");
6179 	case PM_DEP_WK_KEPT:
6180 		return ("kept");
6181 	case PM_DEP_WK_KEEPER:
6182 		return ("keeper");
6183 	case PM_DEP_WK_ATTACH:
6184 		return ("attach");
6185 	case PM_DEP_WK_CHECK_KEPT:
6186 		return ("check kept");
6187 	case PM_DEP_WK_CPR_SUSPEND:
6188 		return ("suspend");
6189 	case PM_DEP_WK_CPR_RESUME:
6190 		return ("resume");
6191 	default:
6192 		return ("unknown");
6193 	}
6194 
6195 }
6196 
6197 static void
6198 pm_rele_dep(char *keeper)
6199 {
6200 	PMD_FUNC(pmf, "rele_dep")
6201 	pm_pdr_t *dp;
6202 	char *kept_path = NULL;
6203 	dev_info_t *kept = NULL;
6204 	int count = 0;
6205 
6206 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6207 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6208 			continue;
6209 		for (count = 0; count < dp->pdr_kept_count; count++) {
6210 			kept_path = dp->pdr_kept_paths[count];
6211 			if (kept_path == NULL)
6212 				continue;
6213 			kept = pm_name_to_dip(kept_path, 1);
6214 			if (kept) {
6215 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6216 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6217 				    keeper))
6218 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6219 				pm_rele_power(kept);
6220 				ddi_release_devi(kept);
6221 			}
6222 		}
6223 	}
6224 }
6225 
6226 /*
6227  * Called when we are just released from direct PM.  Bring ourself up
6228  * if our keeper is up since dependency is not honored while a kept
6229  * device is under direct PM.
6230  */
6231 static void
6232 pm_bring_self_up(char *keptpath)
6233 {
6234 	PMD_FUNC(pmf, "bring_self_up")
6235 	dev_info_t *kept;
6236 	dev_info_t *keeper;
6237 	pm_pdr_t *dp;
6238 	int i, j;
6239 	int up = 0, circ;
6240 
6241 	kept = pm_name_to_dip(keptpath, 1);
6242 	if (kept == NULL)
6243 		return;
6244 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6245 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6246 		if (dp->pdr_kept_count == 0)
6247 			continue;
6248 		for (i = 0; i < dp->pdr_kept_count; i++) {
6249 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6250 				continue;
6251 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6252 			if (keeper) {
6253 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6254 				    pmf, PM_DEVICE(keeper)))
6255 				PM_LOCK_POWER(keeper, &circ);
6256 				for (j = 0; j < PM_NUMCMPTS(keeper);
6257 				    j++) {
6258 					if (PM_CURPOWER(keeper, j)) {
6259 						PMD(PMD_KEEPS, ("%s: comp="
6260 						    "%d is up\n", pmf, j))
6261 						up++;
6262 					}
6263 				}
6264 				if (up) {
6265 					if (PM_SKBU(kept))
6266 						DEVI(kept)->devi_pm_flags &=
6267 						    ~PMC_SKIP_BRINGUP;
6268 					bring_pmdep_up(kept, 1);
6269 				}
6270 				PM_UNLOCK_POWER(keeper, circ);
6271 				ddi_release_devi(keeper);
6272 			}
6273 		}
6274 	}
6275 	ddi_release_devi(kept);
6276 }
6277 
6278 static void
6279 pm_process_dep_request(pm_dep_wk_t *work)
6280 {
6281 	PMD_FUNC(pmf, "dep_req")
6282 	int ret;
6283 
6284 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6285 	    pdw_type_decode(work->pdw_type)))
6286 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6287 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6288 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6289 
6290 	switch (work->pdw_type) {
6291 	case PM_DEP_WK_POWER_ON:
6292 		/* Bring up the kept devices and put a hold on them */
6293 		bring_wekeeps_up(work->pdw_keeper);
6294 		break;
6295 	case PM_DEP_WK_POWER_OFF:
6296 		/* Release the kept devices */
6297 		pm_rele_dep(work->pdw_keeper);
6298 		break;
6299 	case PM_DEP_WK_DETACH:
6300 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6301 		break;
6302 	case PM_DEP_WK_REMOVE_DEP:
6303 		pm_discard_dependencies();
6304 		break;
6305 	case PM_DEP_WK_BRINGUP_SELF:
6306 		/*
6307 		 * We deferred satisfying our dependency till now, so satisfy
6308 		 * it again and bring ourselves up.
6309 		 */
6310 		pm_bring_self_up(work->pdw_kept);
6311 		break;
6312 	case PM_DEP_WK_RECORD_KEEPER:
6313 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6314 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6315 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6316 		break;
6317 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6318 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6319 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6320 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6321 		break;
6322 	case PM_DEP_WK_KEPT:
6323 		ret = pm_kept(work->pdw_kept);
6324 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6325 		    ret))
6326 		break;
6327 	case PM_DEP_WK_KEEPER:
6328 		ret = pm_keeper(work->pdw_keeper);
6329 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6330 		    pmf, ret))
6331 		break;
6332 	case PM_DEP_WK_ATTACH:
6333 		ret = pm_keeper(work->pdw_keeper);
6334 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6335 		    pmf, ret))
6336 		ret = pm_kept(work->pdw_kept);
6337 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6338 		    pmf, ret))
6339 		break;
6340 	case PM_DEP_WK_CHECK_KEPT:
6341 		ret = pm_is_kept(work->pdw_kept);
6342 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6343 		    pmf, work->pdw_kept, ret))
6344 		break;
6345 	case PM_DEP_WK_CPR_SUSPEND:
6346 		pm_discard_dependencies();
6347 		break;
6348 	case PM_DEP_WK_CPR_RESUME:
6349 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6350 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6351 		break;
6352 	default:
6353 		ASSERT(0);
6354 		break;
6355 	}
6356 	/*
6357 	 * Free the work structure if the requester is not waiting
6358 	 * Otherwise it is the requester's responsiblity to free it.
6359 	 */
6360 	if (!work->pdw_wait) {
6361 		if (work->pdw_keeper)
6362 			kmem_free(work->pdw_keeper,
6363 			    strlen(work->pdw_keeper) + 1);
6364 		if (work->pdw_kept)
6365 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6366 		kmem_free(work, sizeof (pm_dep_wk_t));
6367 	} else {
6368 		/*
6369 		 * Notify requester if it is waiting for it.
6370 		 */
6371 		work->pdw_ret = ret;
6372 		work->pdw_done = 1;
6373 		cv_signal(&work->pdw_cv);
6374 	}
6375 }
6376 
6377 /*
6378  * Process PM dependency requests.
6379  */
6380 static void
6381 pm_dep_thread(void)
6382 {
6383 	pm_dep_wk_t *work;
6384 	callb_cpr_t cprinfo;
6385 
6386 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6387 	    "pm_dep_thread");
6388 	for (;;) {
6389 		mutex_enter(&pm_dep_thread_lock);
6390 		if (pm_dep_thread_workq == NULL) {
6391 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6392 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6393 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6394 		}
6395 		work = pm_dep_thread_workq;
6396 		pm_dep_thread_workq = work->pdw_next;
6397 		if (pm_dep_thread_tail == work)
6398 			pm_dep_thread_tail = work->pdw_next;
6399 		mutex_exit(&pm_dep_thread_lock);
6400 		pm_process_dep_request(work);
6401 
6402 	}
6403 	/*NOTREACHED*/
6404 }
6405 
6406 /*
6407  * Set the power level of the indicated device to unknown (if it is not a
6408  * backwards compatible device), as it has just been resumed, and it won't
6409  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6410  */
6411 void
6412 pm_forget_power_level(dev_info_t *dip)
6413 {
6414 	dev_info_t *pdip = ddi_get_parent(dip);
6415 	int i, count = 0;
6416 
6417 	if (!PM_ISBC(dip)) {
6418 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6419 			count += (PM_CURPOWER(dip, i) == 0);
6420 
6421 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6422 			e_pm_hold_rele_power(pdip, count);
6423 
6424 		/*
6425 		 * Count this as a power cycle if we care
6426 		 */
6427 		if (DEVI(dip)->devi_pm_volpmd &&
6428 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6429 			DEVI(dip)->devi_pm_volpmd = 0;
6430 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6431 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6432 	}
6433 }
6434 
6435 /*
6436  * This function advises the caller whether it should make a power-off
6437  * transition at this time or not.  If the transition is not advised
6438  * at this time, the time that the next power-off transition can
6439  * be made from now is returned through "intervalp" pointer.
6440  * This function returns:
6441  *
6442  *  1  power-off advised
6443  *  0  power-off not advised, intervalp will point to seconds from
6444  *	  now that a power-off is advised.  If it is passed the number
6445  *	  of years that policy specifies the device should last,
6446  *	  a large number is returned as the time interval.
6447  *  -1  error
6448  */
6449 int
6450 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6451 {
6452 	PMD_FUNC(pmf, "pm_trans_check")
6453 	char dbuf[DC_SCSI_MFR_LEN];
6454 	struct pm_scsi_cycles *scp;
6455 	int service_years, service_weeks, full_years;
6456 	time_t now, service_seconds, tdiff;
6457 	time_t within_year, when_allowed;
6458 	char *ptr;
6459 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6460 	int cycles_diff, cycles_over;
6461 
6462 	if (datap == NULL) {
6463 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6464 		return (-1);
6465 	}
6466 
6467 	if (datap->format == DC_SCSI_FORMAT) {
6468 		/*
6469 		 * Power cycles of the scsi drives are distributed
6470 		 * over 5 years with the following percentage ratio:
6471 		 *
6472 		 *	30%, 25%, 20%, 15%, and 10%
6473 		 *
6474 		 * The power cycle quota for each year is distributed
6475 		 * linearly through out the year.  The equation for
6476 		 * determining the expected cycles is:
6477 		 *
6478 		 *	e = a * (n / y)
6479 		 *
6480 		 * e = expected cycles
6481 		 * a = allocated cycles for this year
6482 		 * n = number of seconds since beginning of this year
6483 		 * y = number of seconds in a year
6484 		 *
6485 		 * Note that beginning of the year starts the day that
6486 		 * the drive has been put on service.
6487 		 *
6488 		 * If the drive has passed its expected cycles, we
6489 		 * can determine when it can start to power cycle
6490 		 * again to keep it on track to meet the 5-year
6491 		 * life expectancy.  The equation for determining
6492 		 * when to power cycle is:
6493 		 *
6494 		 *	w = y * (c / a)
6495 		 *
6496 		 * w = when it can power cycle again
6497 		 * y = number of seconds in a year
6498 		 * c = current number of cycles
6499 		 * a = allocated cycles for the year
6500 		 *
6501 		 */
6502 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6503 
6504 		scp = &datap->un.scsi_cycles;
6505 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6506 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6507 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6508 		if (scp->ncycles < 0 || scp->flag != 0) {
6509 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6510 			return (-1);
6511 		}
6512 
6513 		if (scp->ncycles > scp->lifemax) {
6514 			*intervalp = (LONG_MAX / hz);
6515 			return (0);
6516 		}
6517 
6518 		/*
6519 		 * convert service date to time_t
6520 		 */
6521 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6522 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6523 		ptr = dbuf;
6524 		service_years = stoi(&ptr) - EPOCH_YEAR;
6525 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6526 		    DC_SCSI_WEEK_LEN);
6527 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6528 
6529 		/*
6530 		 * scsi standard does not specify WW data,
6531 		 * could be (00-51) or (01-52)
6532 		 */
6533 		ptr = dbuf;
6534 		service_weeks = stoi(&ptr);
6535 		if (service_years < 0 ||
6536 		    service_weeks < 0 || service_weeks > 52) {
6537 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6538 			    pmf, service_years, service_weeks))
6539 			return (-1);
6540 		}
6541 
6542 		/*
6543 		 * calculate service date in seconds-since-epoch,
6544 		 * adding one day for each leap-year.
6545 		 *
6546 		 * (years-since-epoch + 2) fixes integer truncation,
6547 		 * example: (8) leap-years during [1972, 2000]
6548 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6549 		 */
6550 		service_seconds = (service_years * DC_SPY) +
6551 		    (service_weeks * DC_SPW) +
6552 		    (((service_years + 2) / 4) * DC_SPD);
6553 
6554 		now = gethrestime_sec();
6555 		/*
6556 		 * since the granularity of 'svc_date' is day not second,
6557 		 * 'now' should be rounded up to full day.
6558 		 */
6559 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6560 		if (service_seconds > now) {
6561 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6562 			    "than now (%ld)!\n", pmf, service_seconds, now))
6563 			return (-1);
6564 		}
6565 
6566 		tdiff = now - service_seconds;
6567 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6568 
6569 		/*
6570 		 * NOTE - Leap years are not considered in the calculations
6571 		 * below.
6572 		 */
6573 		full_years = (tdiff / DC_SPY);
6574 		if ((full_years >= DC_SCSI_NPY) &&
6575 		    (scp->ncycles <= scp->lifemax))
6576 			return (1);
6577 
6578 		/*
6579 		 * Determine what is the normal cycle usage for the
6580 		 * device at the beginning and the end of this year.
6581 		 */
6582 		lower_bound_cycles = (!full_years) ? 0 :
6583 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6584 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6585 
6586 		if (scp->ncycles <= lower_bound_cycles)
6587 			return (1);
6588 
6589 		/*
6590 		 * The linear slope that determines how many cycles
6591 		 * are allowed this year is number of seconds
6592 		 * passed this year over total number of seconds in a year.
6593 		 */
6594 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6595 		within_year = (tdiff % DC_SPY);
6596 		cycles_allowed = lower_bound_cycles +
6597 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6598 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6599 		    full_years, within_year))
6600 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6601 		    cycles_allowed))
6602 
6603 		if (scp->ncycles <= cycles_allowed)
6604 			return (1);
6605 
6606 		/*
6607 		 * The transition is not advised now but we can
6608 		 * determine when the next transition can be made.
6609 		 *
6610 		 * Depending on how many cycles the device has been
6611 		 * over-used, we may need to skip years with
6612 		 * different percentage quota in order to determine
6613 		 * when the next transition can be made.
6614 		 */
6615 		cycles_over = (scp->ncycles - lower_bound_cycles);
6616 		while (cycles_over > cycles_diff) {
6617 			full_years++;
6618 			if (full_years >= DC_SCSI_NPY) {
6619 				*intervalp = (LONG_MAX / hz);
6620 				return (0);
6621 			}
6622 			cycles_over -= cycles_diff;
6623 			lower_bound_cycles = upper_bound_cycles;
6624 			upper_bound_cycles =
6625 			    (scp->lifemax * pcnt[full_years]) / 100;
6626 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6627 		}
6628 
6629 		/*
6630 		 * The linear slope that determines when the next transition
6631 		 * can be made is the relative position of used cycles within a
6632 		 * year over total number of cycles within that year.
6633 		 */
6634 		when_allowed = service_seconds + (full_years * DC_SPY) +
6635 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6636 		*intervalp = (when_allowed - now);
6637 		if (*intervalp > (LONG_MAX / hz))
6638 			*intervalp = (LONG_MAX / hz);
6639 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6640 		    *intervalp))
6641 		return (0);
6642 	}
6643 
6644 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6645 	return (-1);
6646 }
6647 
6648 /*
6649  * Nexus drivers call into pm framework to indicate which child driver is about
6650  * to be installed.  In some platforms, ppm may need to configure the hardware
6651  * for successful installation of a driver.
6652  */
6653 int
6654 pm_init_child(dev_info_t *dip)
6655 {
6656 	power_req_t power_req;
6657 
6658 	ASSERT(ddi_binding_name(dip));
6659 	ASSERT(ddi_get_name_addr(dip));
6660 	pm_ppm_claim(dip);
6661 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6662 		power_req.request_type = PMR_PPM_INIT_CHILD;
6663 		power_req.req.ppm_config_req.who = dip;
6664 		ASSERT(PPM(dip) != NULL);
6665 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6666 		    NULL));
6667 	} else {
6668 #ifdef DEBUG
6669 		/* pass it to the default handler so we can debug things */
6670 		power_req.request_type = PMR_PPM_INIT_CHILD;
6671 		power_req.req.ppm_config_req.who = dip;
6672 		(void) pm_ctlops(NULL, dip,
6673 		    DDI_CTLOPS_POWER, &power_req, NULL);
6674 #endif
6675 	}
6676 	return (DDI_SUCCESS);
6677 }
6678 
6679 /*
6680  * Bring parent of a node that is about to be probed up to full power, and
6681  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6682  * it is time to let it go down again
6683  */
6684 void
6685 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6686 {
6687 	int result;
6688 	power_req_t power_req;
6689 
6690 	bzero(cp, sizeof (*cp));
6691 	cp->ppc_dip = dip;
6692 
6693 	pm_ppm_claim(dip);
6694 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6695 		power_req.request_type = PMR_PPM_PRE_PROBE;
6696 		power_req.req.ppm_config_req.who = dip;
6697 		ASSERT(PPM(dip) != NULL);
6698 		(void) pm_ctlops(PPM(dip), dip,
6699 		    DDI_CTLOPS_POWER, &power_req, &result);
6700 		cp->ppc_ppm = PPM(dip);
6701 	} else {
6702 #ifdef DEBUG
6703 		/* pass it to the default handler so we can debug things */
6704 		power_req.request_type = PMR_PPM_PRE_PROBE;
6705 		power_req.req.ppm_config_req.who = dip;
6706 		(void) pm_ctlops(NULL, dip,
6707 		    DDI_CTLOPS_POWER, &power_req, &result);
6708 #endif
6709 		cp->ppc_ppm = NULL;
6710 	}
6711 }
6712 
6713 int
6714 pm_pre_config(dev_info_t *dip, char *devnm)
6715 {
6716 	PMD_FUNC(pmf, "pre_config")
6717 	int ret;
6718 
6719 	if (MDI_VHCI(dip)) {
6720 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6721 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6722 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6723 	} else if (!PM_GET_PM_INFO(dip))
6724 		return (DDI_SUCCESS);
6725 
6726 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6727 	pm_hold_power(dip);
6728 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6729 	if (ret != DDI_SUCCESS)
6730 		pm_rele_power(dip);
6731 	return (ret);
6732 }
6733 
6734 /*
6735  * This routine is called by devfs during its walk to unconfigue a node.
6736  * If the call is due to auto mod_unloads and the dip is not at its
6737  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6738  * return DDI_SUCCESS.
6739  */
6740 int
6741 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6742 {
6743 	PMD_FUNC(pmf, "pre_unconfig")
6744 	int ret;
6745 
6746 	if (MDI_VHCI(dip)) {
6747 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6748 		    PM_DEVICE(dip), flags))
6749 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6750 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6751 	} else if (!PM_GET_PM_INFO(dip))
6752 		return (DDI_SUCCESS);
6753 
6754 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6755 	    flags))
6756 	*held = 0;
6757 
6758 	/*
6759 	 * If the dip is a leaf node, don't power it up.
6760 	 */
6761 	if (!ddi_get_child(dip))
6762 		return (DDI_SUCCESS);
6763 
6764 	/*
6765 	 * Do not power up the node if it is called due to auto-modunload.
6766 	 */
6767 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6768 		return (DDI_FAILURE);
6769 
6770 	pm_hold_power(dip);
6771 	*held = 1;
6772 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6773 	if (ret != DDI_SUCCESS) {
6774 		pm_rele_power(dip);
6775 		*held = 0;
6776 	}
6777 	return (ret);
6778 }
6779 
6780 /*
6781  * Notify ppm of attach action.  Parent is already held at full power by
6782  * probe action.
6783  */
6784 void
6785 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6786 {
6787 	static char *me = "pm_pre_attach";
6788 	power_req_t power_req;
6789 	int result;
6790 
6791 	/*
6792 	 * Initialize and fill in the PPM cookie
6793 	 */
6794 	bzero(cp, sizeof (*cp));
6795 	cp->ppc_cmd = (int)cmd;
6796 	cp->ppc_ppm = PPM(dip);
6797 	cp->ppc_dip = dip;
6798 
6799 	/*
6800 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6801 	 * Power Management stuff. DDI_RESUME also has to purge it's
6802 	 * powerlevel information.
6803 	 */
6804 	switch (cmd) {
6805 	case DDI_ATTACH:
6806 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6807 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6808 			power_req.req.ppm_config_req.who = dip;
6809 			ASSERT(PPM(dip));
6810 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6811 			    &power_req, &result);
6812 		}
6813 #ifdef DEBUG
6814 		else {
6815 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6816 			power_req.req.ppm_config_req.who = dip;
6817 			(void) pm_ctlops(NULL, dip,
6818 			    DDI_CTLOPS_POWER, &power_req, &result);
6819 		}
6820 #endif
6821 		break;
6822 	case DDI_RESUME:
6823 		pm_forget_power_level(dip);
6824 
6825 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6826 			power_req.request_type = PMR_PPM_PRE_RESUME;
6827 			power_req.req.resume_req.who = cp->ppc_dip;
6828 			power_req.req.resume_req.cmd =
6829 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6830 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6831 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6832 			    DDI_CTLOPS_POWER, &power_req, &result);
6833 		}
6834 #ifdef DEBUG
6835 		else {
6836 			power_req.request_type = PMR_PPM_PRE_RESUME;
6837 			power_req.req.resume_req.who = cp->ppc_dip;
6838 			power_req.req.resume_req.cmd =
6839 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6840 			(void) pm_ctlops(NULL, cp->ppc_dip,
6841 			    DDI_CTLOPS_POWER, &power_req, &result);
6842 		}
6843 #endif
6844 		break;
6845 
6846 	case DDI_PM_RESUME:
6847 		break;
6848 
6849 	default:
6850 		panic(me);
6851 	}
6852 }
6853 
6854 /*
6855  * Nexus drivers call into pm framework to indicate which child driver is
6856  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6857  * hardware since the device driver is no longer installed.
6858  */
6859 int
6860 pm_uninit_child(dev_info_t *dip)
6861 {
6862 	power_req_t power_req;
6863 
6864 	ASSERT(ddi_binding_name(dip));
6865 	ASSERT(ddi_get_name_addr(dip));
6866 	pm_ppm_claim(dip);
6867 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6868 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6869 		power_req.req.ppm_config_req.who = dip;
6870 		ASSERT(PPM(dip));
6871 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6872 		    NULL));
6873 	} else {
6874 #ifdef DEBUG
6875 		/* pass it to the default handler so we can debug things */
6876 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6877 		power_req.req.ppm_config_req.who = dip;
6878 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6879 #endif
6880 	}
6881 	return (DDI_SUCCESS);
6882 }
6883 /*
6884  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
6885  * Also notify ppm of result of probe if there is a ppm that cares
6886  */
6887 void
6888 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
6889 {
6890 	_NOTE(ARGUNUSED(probe_failed))
6891 	int result;
6892 	power_req_t power_req;
6893 
6894 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6895 		power_req.request_type = PMR_PPM_POST_PROBE;
6896 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6897 		power_req.req.ppm_config_req.result = ret;
6898 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6899 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
6900 		    &power_req, &result);
6901 	}
6902 #ifdef DEBUG
6903 	else {
6904 		power_req.request_type = PMR_PPM_POST_PROBE;
6905 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6906 		power_req.req.ppm_config_req.result = ret;
6907 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
6908 		    &power_req, &result);
6909 	}
6910 #endif
6911 }
6912 
6913 void
6914 pm_post_config(dev_info_t *dip, char *devnm)
6915 {
6916 	PMD_FUNC(pmf, "post_config")
6917 
6918 	if (MDI_VHCI(dip)) {
6919 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6920 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
6921 		return;
6922 	} else if (!PM_GET_PM_INFO(dip))
6923 		return;
6924 
6925 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6926 	pm_rele_power(dip);
6927 }
6928 
6929 void
6930 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
6931 {
6932 	PMD_FUNC(pmf, "post_unconfig")
6933 
6934 	if (MDI_VHCI(dip)) {
6935 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
6936 		    PM_DEVICE(dip), held))
6937 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
6938 		return;
6939 	} else if (!PM_GET_PM_INFO(dip))
6940 		return;
6941 
6942 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
6943 	    held))
6944 	if (!held)
6945 		return;
6946 	/*
6947 	 * We have held power in pre_unconfig, release it here.
6948 	 */
6949 	pm_rele_power(dip);
6950 }
6951 
6952 /*
6953  * Notify ppm of result of attach if there is a ppm that cares
6954  */
6955 void
6956 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
6957 {
6958 	int result;
6959 	power_req_t power_req;
6960 	dev_info_t	*dip;
6961 
6962 	if (cp->ppc_cmd != DDI_ATTACH)
6963 		return;
6964 
6965 	dip = cp->ppc_dip;
6966 
6967 	if (ret == DDI_SUCCESS) {
6968 		/*
6969 		 * Attach succeeded, so proceed to doing post-attach pm tasks
6970 		 */
6971 		if (PM_GET_PM_INFO(dip) == NULL)
6972 			(void) pm_start(dip);
6973 	} else {
6974 		/*
6975 		 * Attach may have got pm started before failing
6976 		 */
6977 		pm_stop(dip);
6978 	}
6979 
6980 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6981 		power_req.request_type = PMR_PPM_POST_ATTACH;
6982 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6983 		power_req.req.ppm_config_req.result = ret;
6984 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6985 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6986 		    DDI_CTLOPS_POWER, &power_req, &result);
6987 	}
6988 #ifdef DEBUG
6989 	else {
6990 		power_req.request_type = PMR_PPM_POST_ATTACH;
6991 		power_req.req.ppm_config_req.who = cp->ppc_dip;
6992 		power_req.req.ppm_config_req.result = ret;
6993 		(void) pm_ctlops(NULL, cp->ppc_dip,
6994 		    DDI_CTLOPS_POWER, &power_req, &result);
6995 	}
6996 #endif
6997 }
6998 
6999 /*
7000  * Notify ppm of attach action.  Parent is already held at full power by
7001  * probe action.
7002  */
7003 void
7004 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7005 {
7006 	int result;
7007 	power_req_t power_req;
7008 
7009 	bzero(cp, sizeof (*cp));
7010 	cp->ppc_dip = dip;
7011 	cp->ppc_cmd = (int)cmd;
7012 
7013 	switch (cmd) {
7014 	case DDI_DETACH:
7015 		pm_detaching(dip);		/* suspend pm while detaching */
7016 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7017 			power_req.request_type = PMR_PPM_PRE_DETACH;
7018 			power_req.req.ppm_config_req.who = dip;
7019 			ASSERT(PPM(dip));
7020 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7021 			    &power_req, &result);
7022 			cp->ppc_ppm = PPM(dip);
7023 		} else {
7024 #ifdef DEBUG
7025 			/* pass to the default handler so we can debug things */
7026 			power_req.request_type = PMR_PPM_PRE_DETACH;
7027 			power_req.req.ppm_config_req.who = dip;
7028 			(void) pm_ctlops(NULL, dip,
7029 			    DDI_CTLOPS_POWER, &power_req, &result);
7030 #endif
7031 			cp->ppc_ppm = NULL;
7032 		}
7033 		break;
7034 
7035 	default:
7036 		break;
7037 	}
7038 }
7039 
7040 /*
7041  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7042  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7043  * make an entry to record the details, which includes certain flag settings.
7044  */
7045 static void
7046 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7047     int wasvolpmd, major_t major)
7048 {
7049 	PMD_FUNC(pmf, "record_invol_path")
7050 	major_t pm_path_to_major(char *);
7051 	size_t plen;
7052 	pm_noinvol_t *ip, *np, *pp;
7053 	pp = NULL;
7054 
7055 	plen = strlen(path) + 1;
7056 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7057 	np->ni_size = plen;
7058 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7059 	np->ni_noinvolpm = noinvolpm;
7060 	np->ni_volpmd = volpmd;
7061 	np->ni_wasvolpmd = wasvolpmd;
7062 	np->ni_flags = flags;
7063 	(void) strcpy(np->ni_path, path);
7064 	/*
7065 	 * If we haven't actually seen the node attached, it is hard to figure
7066 	 * out its major.  If we could hold the node by path, we would be much
7067 	 * happier here.
7068 	 */
7069 	if (major == (major_t)-1) {
7070 		np->ni_major = pm_path_to_major(path);
7071 	} else {
7072 		np->ni_major = major;
7073 	}
7074 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7075 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7076 		int comp = strcmp(path, ip->ni_path);
7077 		if (comp < 0) {
7078 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7079 			    pmf, path, ip->ni_path))
7080 			/* insert before current entry */
7081 			np->ni_next = ip;
7082 			if (pp) {
7083 				pp->ni_next = np;
7084 			} else {
7085 				pm_noinvol_head = np;
7086 			}
7087 			rw_exit(&pm_noinvol_rwlock);
7088 #ifdef DEBUG
7089 			if (pm_debug & PMD_NOINVOL)
7090 				pr_noinvol("record_invol_path exit0");
7091 #endif
7092 			return;
7093 		} else if (comp == 0) {
7094 			panic("%s already in pm_noinvol list", path);
7095 		}
7096 	}
7097 	/*
7098 	 * If we did not find an entry in the list that this should go before,
7099 	 * then it must go at the end
7100 	 */
7101 	if (pp) {
7102 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7103 		    pp->ni_path))
7104 		ASSERT(pp->ni_next == 0);
7105 		pp->ni_next = np;
7106 	} else {
7107 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7108 		ASSERT(!pm_noinvol_head);
7109 		pm_noinvol_head = np;
7110 	}
7111 	rw_exit(&pm_noinvol_rwlock);
7112 #ifdef DEBUG
7113 	if (pm_debug & PMD_NOINVOL)
7114 		pr_noinvol("record_invol_path exit");
7115 #endif
7116 }
7117 
7118 void
7119 pm_record_invol(dev_info_t *dip)
7120 {
7121 	char *pathbuf;
7122 	int pm_all_components_off(dev_info_t *);
7123 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7124 
7125 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7126 	(void) ddi_pathname(dip, pathbuf);
7127 
7128 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7129 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7130 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7131 
7132 	/*
7133 	 * If this child's detach will be holding up its ancestors, then we
7134 	 * allow for an exception to that if all children of this type have
7135 	 * gone down voluntarily.
7136 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7137 	 */
7138 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7139 	    dip);
7140 	kmem_free(pathbuf, MAXPATHLEN);
7141 }
7142 
7143 void
7144 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7145 {
7146 	dev_info_t *dip = cp->ppc_dip;
7147 	int result;
7148 	power_req_t power_req;
7149 
7150 	switch (cp->ppc_cmd) {
7151 	case DDI_DETACH:
7152 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7153 			power_req.request_type = PMR_PPM_POST_DETACH;
7154 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7155 			power_req.req.ppm_config_req.result = ret;
7156 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7157 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7158 			    DDI_CTLOPS_POWER, &power_req, &result);
7159 		}
7160 #ifdef DEBUG
7161 		else {
7162 			power_req.request_type = PMR_PPM_POST_DETACH;
7163 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7164 			power_req.req.ppm_config_req.result = ret;
7165 			(void) pm_ctlops(NULL, cp->ppc_dip,
7166 			    DDI_CTLOPS_POWER, &power_req, &result);
7167 		}
7168 #endif
7169 		if (ret == DDI_SUCCESS) {
7170 			/*
7171 			 * For hotplug detach we assume it is *really* gone
7172 			 */
7173 			if (cp->ppc_cmd == DDI_DETACH &&
7174 			    ((DEVI(dip)->devi_pm_flags &
7175 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7176 			    DEVI(dip)->devi_pm_noinvolpm))
7177 				pm_record_invol(dip);
7178 			DEVI(dip)->devi_pm_flags &=
7179 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7180 
7181 			/*
7182 			 * If console fb is detaching, then we don't need to
7183 			 * worry any more about it going off (pm_detaching has
7184 			 * brought up all components)
7185 			 */
7186 			if (PM_IS_CFB(dip)) {
7187 				mutex_enter(&pm_cfb_lock);
7188 				ASSERT(cfb_dip_detaching);
7189 				ASSERT(cfb_dip == NULL);
7190 				ASSERT(pm_cfb_comps_off == 0);
7191 				cfb_dip_detaching = NULL;
7192 				mutex_exit(&pm_cfb_lock);
7193 			}
7194 			pm_stop(dip);	/* make it permanent */
7195 		} else {
7196 			if (PM_IS_CFB(dip)) {
7197 				mutex_enter(&pm_cfb_lock);
7198 				ASSERT(cfb_dip_detaching);
7199 				ASSERT(cfb_dip == NULL);
7200 				ASSERT(pm_cfb_comps_off == 0);
7201 				cfb_dip = cfb_dip_detaching;
7202 				cfb_dip_detaching = NULL;
7203 				mutex_exit(&pm_cfb_lock);
7204 			}
7205 			pm_detach_failed(dip);	/* resume power management */
7206 		}
7207 		break;
7208 	case DDI_PM_SUSPEND:
7209 		break;
7210 	case DDI_SUSPEND:
7211 		break;				/* legal, but nothing to do */
7212 	default:
7213 #ifdef DEBUG
7214 		panic("pm_post_detach: unrecognized cmd %d for detach",
7215 		    cp->ppc_cmd);
7216 		/*NOTREACHED*/
7217 #else
7218 		break;
7219 #endif
7220 	}
7221 }
7222 
7223 /*
7224  * Called after vfs_mountroot has got the clock started to fix up timestamps
7225  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7226  * devices look busy but have a 0 busycnt
7227  */
7228 int
7229 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7230 {
7231 	_NOTE(ARGUNUSED(arg))
7232 
7233 	pm_info_t *info = PM_GET_PM_INFO(dip);
7234 	struct pm_component *cp;
7235 	int i;
7236 
7237 	if (!info)
7238 		return (DDI_WALK_CONTINUE);
7239 	PM_LOCK_BUSY(dip);
7240 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7241 		cp = PM_CP(dip, i);
7242 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7243 			cp->pmc_timestamp = gethrestime_sec();
7244 	}
7245 	PM_UNLOCK_BUSY(dip);
7246 	return (DDI_WALK_CONTINUE);
7247 }
7248 
7249 /*
7250  * Called at attach time to see if the device being attached has a record in
7251  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7252  * parents and set a flag in the dip
7253  */
7254 void
7255 pm_noinvol_specd(dev_info_t *dip)
7256 {
7257 	PMD_FUNC(pmf, "noinvol_specd")
7258 	char *pathbuf;
7259 	pm_noinvol_t *ip, *pp = NULL;
7260 	int wasvolpmd;
7261 	int found = 0;
7262 
7263 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7264 		return;
7265 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7266 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7267 	(void) ddi_pathname(dip, pathbuf);
7268 
7269 	PM_LOCK_DIP(dip);
7270 	DEVI(dip)->devi_pm_volpmd = 0;
7271 	DEVI(dip)->devi_pm_noinvolpm = 0;
7272 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7273 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7274 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7275 		    pmf, pathbuf, ip->ni_path))
7276 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7277 			found++;
7278 			break;
7279 		}
7280 	}
7281 	rw_exit(&pm_noinvol_rwlock);
7282 	if (!found) {
7283 		PM_UNLOCK_DIP(dip);
7284 		kmem_free(pathbuf, MAXPATHLEN);
7285 		return;
7286 	}
7287 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7288 	pp = NULL;
7289 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7290 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7291 		    pmf, pathbuf, ip->ni_path))
7292 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7293 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7294 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7295 			/*
7296 			 * Handle special case of console fb
7297 			 */
7298 			if (PM_IS_CFB(dip)) {
7299 				mutex_enter(&pm_cfb_lock);
7300 				cfb_dip = dip;
7301 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7302 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7303 				mutex_exit(&pm_cfb_lock);
7304 			}
7305 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7306 			ASSERT((DEVI(dip)->devi_pm_flags &
7307 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7308 			    DEVI(dip)->devi_pm_noinvolpm);
7309 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7310 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7311 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7312 			    ip->ni_noinvolpm, ip->ni_volpmd,
7313 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7314 			/*
7315 			 * free the entry in hopes the list will now be empty
7316 			 * and we won't have to search it any more until the
7317 			 * device detaches
7318 			 */
7319 			if (pp) {
7320 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7321 				    pmf, ip->ni_path, pp->ni_path))
7322 				pp->ni_next = ip->ni_next;
7323 			} else {
7324 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7325 				    pmf, ip->ni_path))
7326 				ASSERT(pm_noinvol_head == ip);
7327 				pm_noinvol_head = ip->ni_next;
7328 			}
7329 			PM_UNLOCK_DIP(dip);
7330 			wasvolpmd = ip->ni_wasvolpmd;
7331 			rw_exit(&pm_noinvol_rwlock);
7332 			kmem_free(ip->ni_path, ip->ni_size);
7333 			kmem_free(ip, sizeof (*ip));
7334 			/*
7335 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7336 			 * (and volpmd if appropriate)
7337 			 */
7338 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7339 			    wasvolpmd, pathbuf, dip);
7340 #ifdef DEBUG
7341 			if (pm_debug & PMD_NOINVOL)
7342 				pr_noinvol("noinvol_specd exit");
7343 #endif
7344 			kmem_free(pathbuf, MAXPATHLEN);
7345 			return;
7346 		}
7347 	}
7348 	kmem_free(pathbuf, MAXPATHLEN);
7349 	rw_exit(&pm_noinvol_rwlock);
7350 	PM_UNLOCK_DIP(dip);
7351 }
7352 
7353 int
7354 pm_all_components_off(dev_info_t *dip)
7355 {
7356 	int i;
7357 	pm_component_t *cp;
7358 
7359 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7360 		cp = PM_CP(dip, i);
7361 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7362 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7363 			return (0);
7364 	}
7365 	return (1);	/* all off */
7366 }
7367 
7368 /*
7369  * Make sure that all "no involuntary power cycles" devices are attached.
7370  * Called before doing a cpr suspend to make sure the driver has a say about
7371  * the power cycle
7372  */
7373 int
7374 pm_reattach_noinvol(void)
7375 {
7376 	PMD_FUNC(pmf, "reattach_noinvol")
7377 	pm_noinvol_t *ip;
7378 	char *path;
7379 	dev_info_t *dip;
7380 
7381 	/*
7382 	 * Prevent the modunload thread from unloading any modules until we
7383 	 * have completely stopped all kernel threads.
7384 	 */
7385 	modunload_disable();
7386 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7387 		/*
7388 		 * Forget we'v ever seen any entry
7389 		 */
7390 		ip->ni_persistent = 0;
7391 	}
7392 restart:
7393 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7394 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7395 		major_t maj;
7396 		maj = ip->ni_major;
7397 		path = ip->ni_path;
7398 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7399 			if (ip->ni_persistent) {
7400 				/*
7401 				 * If we weren't able to make this entry
7402 				 * go away, then we give up, as
7403 				 * holding/attaching the driver ought to have
7404 				 * resulted in this entry being deleted
7405 				 */
7406 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7407 				    "(%s|%d)\n", pmf, ip->ni_path,
7408 				    ddi_major_to_name(maj), (int)maj))
7409 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7410 				    ip->ni_path);
7411 				modunload_enable();
7412 				rw_exit(&pm_noinvol_rwlock);
7413 				return (0);
7414 			}
7415 			ip->ni_persistent++;
7416 			rw_exit(&pm_noinvol_rwlock);
7417 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7418 			dip = e_ddi_hold_devi_by_path(path, 0);
7419 			if (dip == NULL) {
7420 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7421 				    pmf, path, (int)maj))
7422 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7423 				    "driver", path);
7424 				modunload_enable();
7425 				return (0);
7426 			} else {
7427 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7428 				/*
7429 				 * Since the modunload thread is stopped, we
7430 				 * don't have to keep the driver held, which
7431 				 * saves a ton of bookkeeping
7432 				 */
7433 				ddi_release_devi(dip);
7434 				goto restart;
7435 			}
7436 		} else {
7437 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7438 			    pmf, ip->ni_path))
7439 			continue;
7440 		}
7441 	}
7442 	rw_exit(&pm_noinvol_rwlock);
7443 	return (1);
7444 }
7445 
7446 void
7447 pm_reattach_noinvol_fini(void)
7448 {
7449 	modunload_enable();
7450 }
7451 
7452 /*
7453  * Display pm support code
7454  */
7455 
7456 
7457 /*
7458  * console frame-buffer power-mgmt gets enabled when debugging
7459  * services are not present or console fbpm override is set
7460  */
7461 void
7462 pm_cfb_setup(const char *stdout_path)
7463 {
7464 	PMD_FUNC(pmf, "cfb_setup")
7465 	extern int obpdebug;
7466 	char *devname;
7467 	dev_info_t *dip;
7468 	int devname_len;
7469 	extern dev_info_t *fbdip;
7470 
7471 	/*
7472 	 * By virtue of this function being called (from consconfig),
7473 	 * we know stdout is a framebuffer.
7474 	 */
7475 	stdout_is_framebuffer = 1;
7476 
7477 	if (obpdebug || (boothowto & RB_DEBUG)) {
7478 		if (pm_cfb_override == 0) {
7479 			/*
7480 			 * Console is frame buffer, but we want to suppress
7481 			 * pm on it because of debugging setup
7482 			 */
7483 			pm_cfb_enabled = 0;
7484 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7485 			    "console power management.");
7486 			/*
7487 			 * however, we still need to know which is the console
7488 			 * fb in order to suppress pm on it
7489 			 */
7490 		} else {
7491 			cmn_err(CE_WARN, "Kernel debugger present: see "
7492 			    "kmdb(1M) for interaction with power management.");
7493 		}
7494 	}
7495 #ifdef DEBUG
7496 	/*
7497 	 * IF console is fb and is power managed, don't do prom_printfs from
7498 	 * pm debug macro
7499 	 */
7500 	if (pm_cfb_enabled) {
7501 		if (pm_debug)
7502 			prom_printf("pm debug output will be to log only\n");
7503 		pm_divertdebug++;
7504 	}
7505 #endif
7506 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7507 	devname_len = strlen(devname) + 1;
7508 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7509 	/* if the driver is attached */
7510 	if ((dip = fbdip) != NULL) {
7511 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7512 		    PM_DEVICE(dip)))
7513 		/*
7514 		 * We set up here as if the driver were power manageable in case
7515 		 * we get a later attach of a pm'able driver (which would result
7516 		 * in a panic later)
7517 		 */
7518 		cfb_dip = dip;
7519 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7520 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7521 		    PM_DEVICE(dip)))
7522 #ifdef DEBUG
7523 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7524 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7525 			    pmf, PM_DEVICE(dip)))
7526 		}
7527 #endif
7528 	} else {
7529 		char *ep;
7530 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7531 		pm_record_invol_path(devname,
7532 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7533 		    (major_t)-1);
7534 		for (ep = strrchr(devname, '/'); ep != devname;
7535 		    ep = strrchr(devname, '/')) {
7536 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7537 			*ep = '\0';
7538 			dip = pm_name_to_dip(devname, 0);
7539 			if (dip != NULL) {
7540 				/*
7541 				 * Walk up the tree incrementing
7542 				 * devi_pm_noinvolpm
7543 				 */
7544 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7545 				    0, 0, devname, dip);
7546 				break;
7547 			} else {
7548 				pm_record_invol_path(devname,
7549 				    PMC_NO_INVOL, 1, 0, 0, (major_t)-1);
7550 			}
7551 		}
7552 	}
7553 	kmem_free(devname, devname_len);
7554 }
7555 
7556 void
7557 pm_cfb_rele(void)
7558 {
7559 	mutex_enter(&pm_cfb_lock);
7560 	/*
7561 	 * this call isn't using the console any  more, it is ok to take it
7562 	 * down if the count goes to 0
7563 	 */
7564 	cfb_inuse--;
7565 	mutex_exit(&pm_cfb_lock);
7566 }
7567 
7568 /*
7569  * software interrupt handler for fbpm; this function exists because we can't
7570  * bring up the frame buffer power from above lock level.  So if we need to,
7571  * we instead schedule a softint that runs this routine and takes us into
7572  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7573  */
7574 static uint_t
7575 pm_cfb_softint(caddr_t int_handler_arg)
7576 {
7577 	_NOTE(ARGUNUSED(int_handler_arg))
7578 	int rval = DDI_INTR_UNCLAIMED;
7579 
7580 	mutex_enter(&pm_cfb_lock);
7581 	if (pm_soft_pending) {
7582 		mutex_exit(&pm_cfb_lock);
7583 		debug_enter((char *)NULL);
7584 		/* acquired in debug_enter before calling pm_cfb_trigger */
7585 		pm_cfb_rele();
7586 		mutex_enter(&pm_cfb_lock);
7587 		pm_soft_pending = 0;
7588 		mutex_exit(&pm_cfb_lock);
7589 		rval = DDI_INTR_CLAIMED;
7590 	} else
7591 		mutex_exit(&pm_cfb_lock);
7592 
7593 	return (rval);
7594 }
7595 
7596 void
7597 pm_cfb_setup_intr(void)
7598 {
7599 	PMD_FUNC(pmf, "cfb_setup_intr")
7600 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7601 	void pm_cfb_check_and_powerup(void);
7602 
7603 	if (!stdout_is_framebuffer) {
7604 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7605 		return;
7606 	}
7607 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7608 #ifdef DEBUG
7609 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7610 #endif
7611 	/*
7612 	 * setup software interrupt handler
7613 	 */
7614 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7615 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7616 		panic("pm: unable to register soft intr.");
7617 
7618 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7619 }
7620 
7621 /*
7622  * Checks to see if it is safe to write to the console wrt power management
7623  * (i.e. if the console is a framebuffer, then it must be at full power)
7624  * returns 1 when power is off (power-up is needed)
7625  * returns 0 when power is on (power-up not needed)
7626  */
7627 int
7628 pm_cfb_check_and_hold(void)
7629 {
7630 	/*
7631 	 * cfb_dip is set iff console is a power manageable frame buffer
7632 	 * device
7633 	 */
7634 	extern int modrootloaded;
7635 
7636 	mutex_enter(&pm_cfb_lock);
7637 	cfb_inuse++;
7638 	ASSERT(cfb_inuse);	/* wrap? */
7639 	if (modrootloaded && cfb_dip) {
7640 		/*
7641 		 * don't power down the frame buffer, the prom is using it
7642 		 */
7643 		if (pm_cfb_comps_off) {
7644 			mutex_exit(&pm_cfb_lock);
7645 			return (1);
7646 		}
7647 	}
7648 	mutex_exit(&pm_cfb_lock);
7649 	return (0);
7650 }
7651 
7652 /*
7653  * turn on cfb power (which is known to be off).
7654  * Must be called below lock level!
7655  */
7656 void
7657 pm_cfb_powerup(void)
7658 {
7659 	pm_info_t *info;
7660 	int norm;
7661 	int ccount, ci;
7662 	int unused;
7663 #ifdef DEBUG
7664 	/*
7665 	 * Can't reenter prom_prekern, so suppress pm debug messages
7666 	 * (still go to circular buffer).
7667 	 */
7668 	mutex_enter(&pm_debug_lock);
7669 	pm_divertdebug++;
7670 	mutex_exit(&pm_debug_lock);
7671 #endif
7672 	info = PM_GET_PM_INFO(cfb_dip);
7673 	ASSERT(info);
7674 
7675 	ccount = PM_NUMCMPTS(cfb_dip);
7676 	for (ci = 0; ci < ccount; ci++) {
7677 		norm = pm_get_normal_power(cfb_dip, ci);
7678 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7679 		    PM_CANBLOCK_BYPASS, 0, &unused);
7680 	}
7681 #ifdef DEBUG
7682 	mutex_enter(&pm_debug_lock);
7683 	pm_divertdebug--;
7684 	mutex_exit(&pm_debug_lock);
7685 #endif
7686 }
7687 
7688 /*
7689  * Check if the console framebuffer is powered up.  If not power it up.
7690  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7691  * must be released by calling pm_cfb_rele when the console fb operation
7692  * is completed.
7693  */
7694 void
7695 pm_cfb_check_and_powerup(void)
7696 {
7697 	if (pm_cfb_check_and_hold())
7698 		pm_cfb_powerup();
7699 }
7700 
7701 /*
7702  * Trigger a low level interrupt to power up console frame buffer.
7703  */
7704 void
7705 pm_cfb_trigger(void)
7706 {
7707 	if (cfb_dip == NULL)
7708 		return;
7709 
7710 	mutex_enter(&pm_cfb_lock);
7711 	/*
7712 	 * If machine appears to be hung, pulling the keyboard connector of
7713 	 * the console will cause a high level interrupt and go to debug_enter.
7714 	 * But, if the fb is powered down, this routine will be called to bring
7715 	 * it up (by generating a softint to do the work).  If soft interrupts
7716 	 * are not running, and the keyboard connector is pulled again, the
7717 	 * following code detects this condition and calls panic which allows
7718 	 * the fb to be brought up from high level.
7719 	 *
7720 	 * If two nearly simultaneous calls to debug_enter occur (both from
7721 	 * high level) the code described above will cause a panic.
7722 	 */
7723 	if (lbolt <= pm_soft_pending) {
7724 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7725 		panic(panicstr);	/* does a power up at any intr level */
7726 		/* NOTREACHED */
7727 	}
7728 	pm_soft_pending = lbolt;
7729 	mutex_exit(&pm_cfb_lock);
7730 	ddi_trigger_softintr(pm_soft_id);
7731 }
7732 
7733 major_t
7734 pm_path_to_major(char *path)
7735 {
7736 	PMD_FUNC(pmf, "path_to_major")
7737 	char *np, *ap, *bp;
7738 	major_t ret;
7739 	size_t len;
7740 	static major_t i_path_to_major(char *, char *);
7741 
7742 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7743 
7744 	np = strrchr(path, '/');
7745 	if (np != NULL)
7746 		np++;
7747 	else
7748 		np = path;
7749 	len = strlen(np) + 1;
7750 	bp = kmem_alloc(len, KM_SLEEP);
7751 	(void) strcpy(bp, np);
7752 	if ((ap = strchr(bp, '@')) != NULL) {
7753 		*ap = '\0';
7754 	}
7755 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7756 	ret = i_path_to_major(path, np);
7757 	kmem_free(bp, len);
7758 	return (ret);
7759 }
7760 
7761 #ifdef DEBUG
7762 
7763 char *pm_msgp;
7764 char *pm_bufend;
7765 char *pm_msgbuf = NULL;
7766 int   pm_logpages = 2;
7767 
7768 #define	PMLOGPGS	pm_logpages
7769 
7770 /*PRINTFLIKE1*/
7771 void
7772 pm_log(const char *fmt, ...)
7773 {
7774 	va_list adx;
7775 	size_t size;
7776 
7777 	mutex_enter(&pm_debug_lock);
7778 	if (pm_msgbuf == NULL) {
7779 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7780 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7781 		pm_msgp = pm_msgbuf;
7782 	}
7783 	va_start(adx, fmt);
7784 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7785 	va_end(adx);
7786 	va_start(adx, fmt);
7787 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7788 		bzero(pm_msgp, pm_bufend - pm_msgp);
7789 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7790 		if (!pm_divertdebug)
7791 			prom_printf("%s", pm_msgp);
7792 		pm_msgp = pm_msgbuf + size;
7793 	} else {
7794 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7795 		if (!pm_divertdebug)
7796 			prom_printf("%s", pm_msgp);
7797 		pm_msgp += size;
7798 	}
7799 	va_end(adx);
7800 	mutex_exit(&pm_debug_lock);
7801 }
7802 #endif	/* DEBUG */
7803 
7804 /*
7805  * We want to save the state of any directly pm'd devices over the suspend/
7806  * resume process so that we can put them back the way the controlling
7807  * process left them.
7808  */
7809 void
7810 pm_save_direct_levels(void)
7811 {
7812 	pm_processes_stopped = 1;
7813 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7814 }
7815 
7816 static int
7817 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7818 {
7819 	_NOTE(ARGUNUSED(arg))
7820 	int i;
7821 	int *ip;
7822 	pm_info_t *info = PM_GET_PM_INFO(dip);
7823 
7824 	if (!info)
7825 		return (DDI_WALK_CONTINUE);
7826 
7827 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7828 		if (PM_NUMCMPTS(dip) > 2) {
7829 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7830 			    sizeof (int), KM_SLEEP);
7831 			ip = info->pmi_lp;
7832 		} else {
7833 			ip = info->pmi_levels;
7834 		}
7835 		/* autopm and processes are stopped, ok not to lock power */
7836 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7837 			*ip++ = PM_CURPOWER(dip, i);
7838 		/*
7839 		 * There is a small window between stopping the
7840 		 * processes and setting pm_processes_stopped where
7841 		 * a driver could get hung up in a pm_raise_power()
7842 		 * call.  Free any such driver now.
7843 		 */
7844 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7845 	}
7846 
7847 	return (DDI_WALK_CONTINUE);
7848 }
7849 
7850 void
7851 pm_restore_direct_levels(void)
7852 {
7853 	/*
7854 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
7855 	 * threads failed) then we don't want to try to restore them
7856 	 */
7857 	if (!pm_processes_stopped)
7858 		return;
7859 
7860 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
7861 	pm_processes_stopped = 0;
7862 }
7863 
7864 static int
7865 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
7866 {
7867 	_NOTE(ARGUNUSED(arg))
7868 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
7869 	int i, nc, result;
7870 	int *ip;
7871 
7872 	pm_info_t *info = PM_GET_PM_INFO(dip);
7873 	if (!info)
7874 		return (DDI_WALK_CONTINUE);
7875 
7876 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7877 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
7878 			ip = &info->pmi_lp[nc - 1];
7879 		} else {
7880 			ip = &info->pmi_levels[nc - 1];
7881 		}
7882 		/*
7883 		 * Because fb drivers fail attempts to turn off the
7884 		 * fb when the monitor is on, but treat a request to
7885 		 * turn on the monitor as a request to turn on the
7886 		 * fb too, we process components in descending order
7887 		 * Because autopm is disabled and processes aren't
7888 		 * running, it is ok to examine current power outside
7889 		 * of the power lock
7890 		 */
7891 		for (i = nc - 1; i >= 0; i--, ip--) {
7892 			if (PM_CURPOWER(dip, i) == *ip)
7893 				continue;
7894 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
7895 			    PM_CANBLOCK_BYPASS, 0, &result) !=
7896 				DDI_SUCCESS) {
7897 				cmn_err(CE_WARN, "cpr: unable "
7898 				    "to restore power level of "
7899 				    "component %d of directly "
7900 				    "power manged device %s@%s"
7901 				    " to %d",
7902 				    i, PM_NAME(dip),
7903 				    PM_ADDR(dip), *ip);
7904 				PMD(PMD_FAIL, ("%s: failed to restore "
7905 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
7906 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
7907 				    PM_CURPOWER(dip, i), *ip, result))
7908 			}
7909 		}
7910 		if (nc > 2) {
7911 			kmem_free(info->pmi_lp, nc * sizeof (int));
7912 			info->pmi_lp = NULL;
7913 		}
7914 	}
7915 	return (DDI_WALK_CONTINUE);
7916 }
7917 
7918 /*
7919  * Stolen from the bootdev module
7920  * attempt to convert a path to a major number
7921  */
7922 static major_t
7923 i_path_to_major(char *path, char *leaf_name)
7924 {
7925 	extern major_t path_to_major(char *pathname);
7926 	major_t maj;
7927 
7928 	if ((maj = path_to_major(path)) == (major_t)-1) {
7929 		maj = ddi_name_to_major(leaf_name);
7930 	}
7931 
7932 	return (maj);
7933 }
7934 
7935 /*
7936  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
7937  * An entry in the list means that the device is detached, so we need to
7938  * adjust its ancestors as if they had just seen this attach, and any detached
7939  * ancestors need to have their list entries adjusted.
7940  */
7941 void
7942 pm_driver_removed(major_t major)
7943 {
7944 	static void i_pm_driver_removed(major_t major);
7945 
7946 	/*
7947 	 * Serialize removal of drivers. This is to keep ancestors of
7948 	 * a node that is being deleted from getting deleted and added back
7949 	 * with different counters.
7950 	 */
7951 	mutex_enter(&pm_remdrv_lock);
7952 	i_pm_driver_removed(major);
7953 	mutex_exit(&pm_remdrv_lock);
7954 }
7955 
7956 /*
7957  * This routine is called recursively by pm_noinvol_process_ancestors()
7958  */
7959 static void
7960 i_pm_driver_removed(major_t major)
7961 {
7962 	PMD_FUNC(pmf, "driver_removed")
7963 	static void adjust_ancestors(char *, int);
7964 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
7965 	static void pm_noinvol_process_ancestors(char *);
7966 	pm_noinvol_t *ip, *pp = NULL;
7967 	int wasvolpmd;
7968 	ASSERT(major != (major_t)-1);
7969 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
7970 again:
7971 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7972 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7973 		if (major != ip->ni_major)
7974 			continue;
7975 		/*
7976 		 * If it is an ancestor of no-invol node, which is
7977 		 * not removed, skip it. This is to cover the case of
7978 		 * ancestor removed without removing its descendants.
7979 		 */
7980 		if (pm_is_noinvol_ancestor(ip)) {
7981 			ip->ni_flags |= PMC_DRIVER_REMOVED;
7982 			continue;
7983 		}
7984 		wasvolpmd = ip->ni_wasvolpmd;
7985 		/*
7986 		 * remove the entry from the list
7987 		 */
7988 		if (pp) {
7989 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
7990 			    pmf, ip->ni_path, pp->ni_path))
7991 			pp->ni_next = ip->ni_next;
7992 		} else {
7993 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
7994 			    ip->ni_path))
7995 			ASSERT(pm_noinvol_head == ip);
7996 			pm_noinvol_head = ip->ni_next;
7997 		}
7998 		rw_exit(&pm_noinvol_rwlock);
7999 		adjust_ancestors(ip->ni_path, wasvolpmd);
8000 		/*
8001 		 * Had an ancestor been removed before this node, it would have
8002 		 * been skipped. Adjust the no-invol counters for such skipped
8003 		 * ancestors.
8004 		 */
8005 		pm_noinvol_process_ancestors(ip->ni_path);
8006 		kmem_free(ip->ni_path, ip->ni_size);
8007 		kmem_free(ip, sizeof (*ip));
8008 		goto again;
8009 	}
8010 	rw_exit(&pm_noinvol_rwlock);
8011 }
8012 
8013 /*
8014  * returns 1, if *aip is a ancestor of a no-invol node
8015  *	   0, otherwise
8016  */
8017 static int
8018 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8019 {
8020 	pm_noinvol_t *ip;
8021 
8022 	ASSERT(strlen(aip->ni_path) != 0);
8023 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8024 		if (ip == aip)
8025 			continue;
8026 		/*
8027 		 * To be an ancestor, the path must be an initial substring of
8028 		 * the descendent, and end just before a '/' in the
8029 		 * descendent's path.
8030 		 */
8031 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8032 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8033 			return (1);
8034 	}
8035 	return (0);
8036 }
8037 
8038 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8039 /*
8040  * scan through the pm_noinvolpm list adjusting ancestors of the current
8041  * node;  Modifies string *path.
8042  */
8043 static void
8044 adjust_ancestors(char *path, int wasvolpmd)
8045 {
8046 	PMD_FUNC(pmf, "adjust_ancestors")
8047 	char *cp;
8048 	pm_noinvol_t *lp;
8049 	pm_noinvol_t *pp = NULL;
8050 	major_t locked = (major_t)UINT_MAX;
8051 	dev_info_t *dip;
8052 	char	*pathbuf;
8053 	size_t pathbuflen = strlen(path) + 1;
8054 
8055 	/*
8056 	 * First we look up the ancestor's dip.  If we find it, then we
8057 	 * adjust counts up the tree
8058 	 */
8059 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8060 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8061 	(void) strcpy(pathbuf, path);
8062 	cp = strrchr(pathbuf, '/');
8063 	if (cp == NULL)	{
8064 		/* if no ancestors, then nothing to do */
8065 		kmem_free(pathbuf, pathbuflen);
8066 		return;
8067 	}
8068 	*cp = '\0';
8069 	dip = pm_name_to_dip(pathbuf, 1);
8070 	if (dip != NULL) {
8071 		locked = PM_MAJOR(dip);
8072 
8073 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8074 		    path, dip);
8075 
8076 		if (locked != (major_t)UINT_MAX)
8077 			ddi_release_devi(dip);
8078 	} else {
8079 		char *apath;
8080 		size_t len = strlen(pathbuf) + 1;
8081 		int  lock_held = 1;
8082 
8083 		/*
8084 		 * Now check for ancestors that exist only in the list
8085 		 */
8086 		apath = kmem_alloc(len, KM_SLEEP);
8087 		(void) strcpy(apath, pathbuf);
8088 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8089 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8090 			/*
8091 			 * This can only happen once.  Since we have to drop
8092 			 * the lock, we need to extract the relevant info.
8093 			 */
8094 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8095 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8096 				    lp->ni_path, lp->ni_noinvolpm,
8097 				    lp->ni_noinvolpm - 1))
8098 				lp->ni_noinvolpm--;
8099 				if (wasvolpmd && lp->ni_volpmd) {
8100 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8101 					    "%d\n", pmf, lp->ni_path,
8102 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8103 					lp->ni_volpmd--;
8104 				}
8105 				/*
8106 				 * remove the entry from the list, if there
8107 				 * are no more no-invol descendants and node
8108 				 * itself is not a no-invol node.
8109 				 */
8110 				if (!(lp->ni_noinvolpm ||
8111 				    (lp->ni_flags & PMC_NO_INVOL))) {
8112 					ASSERT(lp->ni_volpmd == 0);
8113 					if (pp) {
8114 						PMD(PMD_NOINVOL, ("%s: freeing "
8115 						    "%s, prev is %s\n", pmf,
8116 						    lp->ni_path, pp->ni_path))
8117 						pp->ni_next = lp->ni_next;
8118 					} else {
8119 						PMD(PMD_NOINVOL, ("%s: free %s "
8120 						    "head\n", pmf, lp->ni_path))
8121 						ASSERT(pm_noinvol_head == lp);
8122 						pm_noinvol_head = lp->ni_next;
8123 					}
8124 					lock_held = 0;
8125 					rw_exit(&pm_noinvol_rwlock);
8126 					adjust_ancestors(apath, wasvolpmd);
8127 					/* restore apath */
8128 					(void) strcpy(apath, pathbuf);
8129 					kmem_free(lp->ni_path, lp->ni_size);
8130 					kmem_free(lp, sizeof (*lp));
8131 				}
8132 				break;
8133 			}
8134 		}
8135 		if (lock_held)
8136 			rw_exit(&pm_noinvol_rwlock);
8137 		adjust_ancestors(apath, wasvolpmd);
8138 		kmem_free(apath, len);
8139 	}
8140 	kmem_free(pathbuf, pathbuflen);
8141 }
8142 
8143 /*
8144  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8145  * which were skipped even though their drivers were removed.
8146  */
8147 static void
8148 pm_noinvol_process_ancestors(char *path)
8149 {
8150 	pm_noinvol_t *lp;
8151 
8152 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8153 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8154 		if (strstr(path, lp->ni_path) &&
8155 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8156 			rw_exit(&pm_noinvol_rwlock);
8157 			i_pm_driver_removed(lp->ni_major);
8158 			return;
8159 		}
8160 	}
8161 	rw_exit(&pm_noinvol_rwlock);
8162 }
8163 
8164 /*
8165  * Returns true if (detached) device needs to be kept up because it exported the
8166  * "no-involuntary-power-cycles" property or we're pretending it did (console
8167  * fb case) or it is an ancestor of such a device and has used up the "one
8168  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8169  * upon detach.  In any event, we need an exact hit on the path or we return
8170  * false.
8171  */
8172 int
8173 pm_noinvol_detached(char *path)
8174 {
8175 	PMD_FUNC(pmf, "noinvol_detached")
8176 	pm_noinvol_t *ip;
8177 	int ret = 0;
8178 
8179 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8180 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8181 		if (strcmp(path, ip->ni_path) == 0) {
8182 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8183 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8184 				    "%s\n", pmf, path))
8185 				ret = 1;
8186 				break;
8187 			}
8188 #ifdef	DEBUG
8189 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8190 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8191 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8192 				    path))
8193 #endif
8194 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8195 			break;
8196 		}
8197 	}
8198 	rw_exit(&pm_noinvol_rwlock);
8199 	return (ret);
8200 }
8201 
8202 int
8203 pm_is_cfb(dev_info_t *dip)
8204 {
8205 	return (dip == cfb_dip);
8206 }
8207 
8208 #ifdef	DEBUG
8209 /*
8210  * Return true if all components of the console frame buffer are at
8211  * "normal" power, i.e., fully on.  For the case where the console is not
8212  * a framebuffer, we also return true
8213  */
8214 int
8215 pm_cfb_is_up(void)
8216 {
8217 	return (pm_cfb_comps_off == 0);
8218 }
8219 #endif
8220 
8221 /*
8222  * Preventing scan from powering down the node by incrementing the
8223  * kidsupcnt.
8224  */
8225 void
8226 pm_hold_power(dev_info_t *dip)
8227 {
8228 	e_pm_hold_rele_power(dip, 1);
8229 }
8230 
8231 /*
8232  * Releasing the hold by decrementing the kidsupcnt allowing scan
8233  * to power down the node if all conditions are met.
8234  */
8235 void
8236 pm_rele_power(dev_info_t *dip)
8237 {
8238 	e_pm_hold_rele_power(dip, -1);
8239 }
8240 
8241 /*
8242  * A wrapper of pm_all_to_normal() to power up a dip
8243  * to its normal level
8244  */
8245 int
8246 pm_powerup(dev_info_t *dip)
8247 {
8248 	PMD_FUNC(pmf, "pm_powerup")
8249 
8250 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8251 	ASSERT(!(servicing_interrupt()));
8252 
8253 	/*
8254 	 * in case this node is not already participating pm
8255 	 */
8256 	if (!PM_GET_PM_INFO(dip)) {
8257 		if (!DEVI_IS_ATTACHING(dip))
8258 			return (DDI_SUCCESS);
8259 		if (pm_start(dip) != DDI_SUCCESS)
8260 			return (DDI_FAILURE);
8261 		if (!PM_GET_PM_INFO(dip))
8262 			return (DDI_SUCCESS);
8263 	}
8264 
8265 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8266 }
8267 
8268 int
8269 pm_rescan_walk(dev_info_t *dip, void *arg)
8270 {
8271 	_NOTE(ARGUNUSED(arg))
8272 
8273 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8274 		return (DDI_WALK_CONTINUE);
8275 
8276 	/*
8277 	 * Currently pm_cpr_callb/resume code is the only caller
8278 	 * and it needs to make sure that stopped scan get
8279 	 * reactivated. Otherwise, rescan walk needn't reactive
8280 	 * stopped scan.
8281 	 */
8282 	pm_scan_init(dip);
8283 
8284 	(void) pm_rescan(dip);
8285 	return (DDI_WALK_CONTINUE);
8286 }
8287 
8288 static dev_info_t *
8289 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8290 {
8291 	dev_info_t *wdip, *pdip;
8292 
8293 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8294 		pdip = ddi_get_parent(wdip);
8295 		if (pdip == dip)
8296 			return (wdip);
8297 	}
8298 	return (NULL);
8299 }
8300 
8301 int
8302 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8303     void *arg, void *result)
8304 {
8305 	PMD_FUNC(pmf, "bp_bus_power")
8306 	dev_info_t	*cdip;
8307 	pm_info_t	*cinfo;
8308 	pm_bp_child_pwrchg_t	*bpc;
8309 	pm_sp_misc_t		*pspm;
8310 	pm_bp_nexus_pwrup_t *bpn;
8311 	pm_bp_child_pwrchg_t new_bpc;
8312 	pm_bp_noinvol_t *bpi;
8313 	dev_info_t *tdip;
8314 	char *pathbuf;
8315 	int		ret = DDI_SUCCESS;
8316 	int		errno = 0;
8317 	pm_component_t *cp;
8318 
8319 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8320 	    pm_decode_op(op)))
8321 	switch (op) {
8322 	case BUS_POWER_CHILD_PWRCHG:
8323 		bpc = (pm_bp_child_pwrchg_t *)arg;
8324 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8325 		tdip = bpc->bpc_dip;
8326 		cdip = pm_get_next_descendent(dip, tdip);
8327 		cinfo = PM_GET_PM_INFO(cdip);
8328 		if (cdip != tdip) {
8329 			/*
8330 			 * If the node is an involved parent, it needs to
8331 			 * power up the node as it is needed.  There is nothing
8332 			 * else the framework can do here.
8333 			 */
8334 			if (PM_WANTS_NOTIFICATION(cdip)) {
8335 				PMD(PMD_SET, ("%s: call bus_power for "
8336 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8337 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8338 				    impl_arg, op, arg, result));
8339 			}
8340 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8341 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8342 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8343 			/*
8344 			 * we presume that the parent needs to be up in
8345 			 * order for the child to change state (either
8346 			 * because it must already be on if the child is on
8347 			 * (and the pm_all_to_normal_nexus() will be a nop)
8348 			 * or because it will need to be on for the child
8349 			 * to come on; so we make the call regardless
8350 			 */
8351 			pm_hold_power(cdip);
8352 			if (cinfo) {
8353 				pm_canblock_t canblock = pspm->pspm_canblock;
8354 				ret = pm_all_to_normal_nexus(cdip, canblock);
8355 				if (ret != DDI_SUCCESS) {
8356 					pm_rele_power(cdip);
8357 					return (ret);
8358 				}
8359 			}
8360 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8361 			    PM_DEVICE(cdip)))
8362 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8363 			    result);
8364 			pm_rele_power(cdip);
8365 		} else {
8366 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8367 			    result);
8368 		}
8369 		return (ret);
8370 
8371 	case BUS_POWER_NEXUS_PWRUP:
8372 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8373 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8374 
8375 		if (!e_pm_valid_info(dip, NULL) ||
8376 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8377 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8378 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8379 			    pmf, PM_DEVICE(dip)))
8380 			*pspm->pspm_errnop = EIO;
8381 			*(int *)result = DDI_FAILURE;
8382 			return (DDI_FAILURE);
8383 		}
8384 
8385 		ASSERT(bpn->bpn_dip == dip);
8386 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8387 		    PM_DEVICE(dip)))
8388 		new_bpc.bpc_dip = dip;
8389 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8390 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8391 		new_bpc.bpc_comp = bpn->bpn_comp;
8392 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8393 		new_bpc.bpc_nlevel = bpn->bpn_level;
8394 		new_bpc.bpc_private = bpn->bpn_private;
8395 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8396 		    PM_LEVEL_UPONLY;
8397 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8398 		    &errno;
8399 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8400 		    (void *)&new_bpc, result);
8401 		kmem_free(pathbuf, MAXPATHLEN);
8402 		return (ret);
8403 
8404 	case BUS_POWER_NOINVOL:
8405 		bpi = (pm_bp_noinvol_t *)arg;
8406 		tdip = bpi->bpni_dip;
8407 		cdip = pm_get_next_descendent(dip, tdip);
8408 
8409 		/* In case of rem_drv, the leaf node has been removed */
8410 		if (cdip == NULL)
8411 			return (DDI_SUCCESS);
8412 
8413 		cinfo = PM_GET_PM_INFO(cdip);
8414 		if (cdip != tdip) {
8415 			if (PM_WANTS_NOTIFICATION(cdip)) {
8416 				PMD(PMD_NOINVOL,
8417 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8418 				    pmf, PM_DEVICE(cdip)))
8419 				ret = (*PM_BUS_POWER_FUNC(cdip))
8420 				    (cdip, NULL, op, arg, result);
8421 				if ((cinfo) && (ret == DDI_SUCCESS))
8422 					(void) pm_noinvol_update_node(cdip,
8423 					    bpi);
8424 				return (ret);
8425 			} else {
8426 				PMD(PMD_NOINVOL,
8427 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8428 				    PM_DEVICE(cdip)))
8429 				ret = pm_busop_bus_power(cdip, NULL, op,
8430 				    arg, result);
8431 				/*
8432 				 * Update the current node.
8433 				 */
8434 				if ((cinfo) && (ret == DDI_SUCCESS))
8435 					(void) pm_noinvol_update_node(cdip,
8436 					    bpi);
8437 				return (ret);
8438 			}
8439 		} else {
8440 			/*
8441 			 * For attach, detach, power up:
8442 			 * Do nothing for leaf node since its
8443 			 * counts are already updated.
8444 			 * For CFB and driver removal, since the
8445 			 * path and the target dip passed in is up to and incl.
8446 			 * the immediate ancestor, need to do the update.
8447 			 */
8448 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8449 			    "reached\n", pmf, PM_DEVICE(cdip)))
8450 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8451 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8452 				(void) pm_noinvol_update_node(cdip, bpi);
8453 			return (DDI_SUCCESS);
8454 		}
8455 
8456 	default:
8457 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8458 		return (DDI_FAILURE);
8459 	}
8460 }
8461 
8462 static int
8463 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8464     void *arg, void *resultp)
8465 {
8466 	_NOTE(ARGUNUSED(impl_arg))
8467 	PMD_FUNC(pmf, "bp_set_power")
8468 	pm_ppm_devlist_t *devl;
8469 	int clevel, circ;
8470 #ifdef	DEBUG
8471 	int circ_db, ccirc_db;
8472 #endif
8473 	int ret = DDI_SUCCESS;
8474 	dev_info_t *cdip;
8475 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8476 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8477 	pm_canblock_t canblock = pspm->pspm_canblock;
8478 	int scan = pspm->pspm_scan;
8479 	int comp = bpc->bpc_comp;
8480 	int olevel = bpc->bpc_olevel;
8481 	int nlevel = bpc->bpc_nlevel;
8482 	int comps_off_incr = 0;
8483 	dev_info_t *pdip = ddi_get_parent(dip);
8484 	int dodeps;
8485 	int direction = pspm->pspm_direction;
8486 	int *errnop = pspm->pspm_errnop;
8487 	char *dir = pm_decode_direction(direction);
8488 	int *iresp = (int *)resultp;
8489 	time_t	idletime, thresh;
8490 	pm_component_t *cp = PM_CP(dip, comp);
8491 	int work_type;
8492 
8493 	*iresp = DDI_SUCCESS;
8494 	*errnop = 0;
8495 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8496 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8497 	    pm_decode_op(op)))
8498 
8499 	/*
8500 	 * The following set of conditions indicate we are here to handle a
8501 	 * driver's pm_[raise|lower]_power request, but the device is being
8502 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8503 	 * we want to pm_block and pass a status back to the caller based
8504 	 * on whether the controlling process's next activity on the device
8505 	 * matches the current request or not.  This distinction tells
8506 	 * downstream functions to avoid calling into a driver or changing
8507 	 * the framework's power state.  To actually block, we need:
8508 	 *
8509 	 * PM_ISDIRECT(dip)
8510 	 *	no reason to block unless a process is directly controlling dev
8511 	 * direction != PM_LEVEL_EXACT
8512 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8513 	 * !pm_processes_stopped
8514 	 *	don't block if controlling proc already be stopped for cpr
8515 	 * canblock != PM_CANBLOCK_BYPASS
8516 	 *	our caller must not have explicitly prevented blocking
8517 	 */
8518 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8519 		PM_LOCK_DIP(dip);
8520 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8521 			/* releases dip lock */
8522 			ret = pm_busop_match_request(dip, bpc);
8523 			if (ret == EAGAIN) {
8524 				PM_LOCK_DIP(dip);
8525 				continue;
8526 			}
8527 			return (*iresp = ret);
8528 		}
8529 		PM_UNLOCK_DIP(dip);
8530 	}
8531 	/* BC device is never scanned, so power will stick until we are done */
8532 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8533 	    direction != PM_LEVEL_DOWNONLY) {
8534 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8535 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8536 		    canblock, 0, resultp) != DDI_SUCCESS) {
8537 			/* *resultp set by pm_set_power */
8538 			return (DDI_FAILURE);
8539 		}
8540 	}
8541 	if (PM_WANTS_NOTIFICATION(pdip)) {
8542 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8543 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8544 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8545 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8546 		if (ret != DDI_SUCCESS) {
8547 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8548 			    pmf, PM_DEVICE(pdip)))
8549 			return (DDI_FAILURE);
8550 		}
8551 	} else {
8552 		/*
8553 		 * Since we don't know what the actual power level is,
8554 		 * we place a power hold on the parent no matter what
8555 		 * component and level is changing.
8556 		 */
8557 		pm_hold_power(pdip);
8558 	}
8559 	PM_LOCK_POWER(dip, &circ);
8560 	clevel = PM_CURPOWER(dip, comp);
8561 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8562 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8563 	    clevel, dir))
8564 	switch (direction) {
8565 	case PM_LEVEL_UPONLY:
8566 		/* Powering up */
8567 		if (clevel >= nlevel) {
8568 			PMD(PMD_SET, ("%s: current level is already "
8569 			    "at or above the requested level.\n", pmf))
8570 			*iresp = DDI_SUCCESS;
8571 			ret = DDI_SUCCESS;
8572 			goto post_notify;
8573 		}
8574 		break;
8575 	case PM_LEVEL_EXACT:
8576 		/* specific level request */
8577 		if (clevel == nlevel && !PM_ISBC(dip)) {
8578 			PMD(PMD_SET, ("%s: current level is already "
8579 			    "at the requested level.\n", pmf))
8580 			*iresp = DDI_SUCCESS;
8581 			ret = DDI_SUCCESS;
8582 			goto post_notify;
8583 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8584 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8585 			if (!pm_cfb_enabled) {
8586 				PMD(PMD_ERROR | PMD_CFB,
8587 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8588 				*errnop = EINVAL;
8589 				*iresp = DDI_FAILURE;
8590 				ret = DDI_FAILURE;
8591 				goto post_notify;
8592 			}
8593 			mutex_enter(&pm_cfb_lock);
8594 			while (cfb_inuse) {
8595 				mutex_exit(&pm_cfb_lock);
8596 				if (delay_sig(1) == EINTR) {
8597 					ret = DDI_FAILURE;
8598 					*iresp = DDI_FAILURE;
8599 					*errnop = EINTR;
8600 					goto post_notify;
8601 				}
8602 				mutex_enter(&pm_cfb_lock);
8603 			}
8604 			mutex_exit(&pm_cfb_lock);
8605 		}
8606 		break;
8607 	case PM_LEVEL_DOWNONLY:
8608 		/* Powering down */
8609 		thresh = cur_threshold(dip, comp);
8610 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8611 		if (scan && ((PM_KUC(dip) != 0) ||
8612 		    (cp->pmc_busycount > 0) || (idletime < thresh))) {
8613 #ifdef	DEBUG
8614 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8615 				PMD(PMD_SET, ("%s: scan failed: "
8616 				    "kidsupcnt != 0\n", pmf))
8617 			if (cp->pmc_busycount > 0)
8618 				PMD(PMD_SET, ("%s: scan failed: "
8619 				    "device become busy\n", pmf))
8620 			if (idletime < thresh)
8621 				PMD(PMD_SET, ("%s: scan failed: device "
8622 				    "hasn't been idle long enough\n", pmf))
8623 #endif
8624 			*iresp = DDI_FAILURE;
8625 			*errnop = EBUSY;
8626 			ret = DDI_FAILURE;
8627 			goto post_notify;
8628 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8629 			PMD(PMD_SET, ("%s: current level is already at "
8630 			    "or below the requested level.\n", pmf))
8631 			*iresp = DDI_SUCCESS;
8632 			ret = DDI_SUCCESS;
8633 			goto post_notify;
8634 		}
8635 		break;
8636 	}
8637 
8638 	if (PM_IS_CFB(dip) && (comps_off_incr =
8639 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8640 		/*
8641 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8642 		 * component from full power.  Remember that we tried to
8643 		 * lower power in case it fails and we need to back out
8644 		 * the adjustment.
8645 		 */
8646 		update_comps_off(comps_off_incr, dip);
8647 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8648 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8649 		    pm_cfb_comps_off))
8650 	}
8651 
8652 	if ((*iresp = power_dev(dip,
8653 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8654 #ifdef DEBUG
8655 		/*
8656 		 * All descendents of this node should already be powered off.
8657 		 */
8658 		if (PM_CURPOWER(dip, comp) == 0) {
8659 			pm_desc_pwrchk_t pdpchk;
8660 			pdpchk.pdpc_dip = dip;
8661 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8662 			ndi_devi_enter(dip, &circ_db);
8663 			for (cdip = ddi_get_child(dip); cdip != NULL;
8664 			    cdip = ddi_get_next_sibling(cdip)) {
8665 				ndi_devi_enter(cdip, &ccirc_db);
8666 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8667 				    (void *)&pdpchk);
8668 				ndi_devi_exit(cdip, ccirc_db);
8669 			}
8670 			ndi_devi_exit(dip, circ_db);
8671 		}
8672 #endif
8673 		/*
8674 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8675 		 * back up to full power.
8676 		 */
8677 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8678 			update_comps_off(comps_off_incr, dip);
8679 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8680 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8681 			    comp, clevel, nlevel, pm_cfb_comps_off))
8682 		}
8683 		dodeps = 0;
8684 		if (POWERING_OFF(clevel, nlevel)) {
8685 			if (PM_ISBC(dip)) {
8686 				dodeps = (comp == 0);
8687 			} else {
8688 				int i;
8689 				dodeps = 1;
8690 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8691 					/* if some component still on */
8692 					if (PM_CURPOWER(dip, i)) {
8693 						dodeps = 0;
8694 						break;
8695 					}
8696 				}
8697 			}
8698 			if (dodeps)
8699 				work_type = PM_DEP_WK_POWER_OFF;
8700 		} else if (POWERING_ON(clevel, nlevel)) {
8701 			if (PM_ISBC(dip)) {
8702 				dodeps = (comp == 0);
8703 			} else {
8704 				int i;
8705 				dodeps = 1;
8706 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8707 					if (i == comp)
8708 						continue;
8709 					if (PM_CURPOWER(dip, i) > 0) {
8710 						dodeps = 0;
8711 						break;
8712 					}
8713 				}
8714 			}
8715 			if (dodeps)
8716 				work_type = PM_DEP_WK_POWER_ON;
8717 		}
8718 
8719 		if (dodeps) {
8720 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8721 
8722 			(void) ddi_pathname(dip, pathbuf);
8723 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8724 			    PM_DEP_NOWAIT, NULL, 0);
8725 			kmem_free(pathbuf, MAXPATHLEN);
8726 		}
8727 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8728 			int old;
8729 
8730 			/* If old power cached during deadlock, use it. */
8731 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8732 			    cp->pmc_phc_pwr : olevel);
8733 			mutex_enter(&pm_rsvp_lock);
8734 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8735 			    old, canblock);
8736 			pm_enqueue_notify_others(&devl, canblock);
8737 			mutex_exit(&pm_rsvp_lock);
8738 		}
8739 
8740 		/*
8741 		 * If we are coming from a scan, don't do it again,
8742 		 * else we can have infinite loops.
8743 		 */
8744 		if (!scan)
8745 			pm_rescan(dip);
8746 	} else {
8747 		/* if we incremented pm_comps_off_count, but failed */
8748 		if (comps_off_incr > 0) {
8749 			update_comps_off(-comps_off_incr, dip);
8750 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8751 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8752 			    comp, clevel, nlevel, pm_cfb_comps_off))
8753 		}
8754 		*errnop = EIO;
8755 	}
8756 
8757 post_notify:
8758 	/*
8759 	 * This thread may have been in deadlock with pm_power_has_changed.
8760 	 * Before releasing power lock, clear the flag which marks this
8761 	 * condition.
8762 	 */
8763 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8764 
8765 	/*
8766 	 * Update the old power level in the bus power structure with the
8767 	 * actual power level before the transition was made to the new level.
8768 	 * Some involved parents depend on this information to keep track of
8769 	 * their children's power transition.
8770 	 */
8771 	if (*iresp != DDI_FAILURE)
8772 		bpc->bpc_olevel = clevel;
8773 
8774 	if (PM_WANTS_NOTIFICATION(pdip)) {
8775 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8776 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8777 		PM_UNLOCK_POWER(dip, circ);
8778 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8779 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8780 		    PM_DEVICE(dip), ret))
8781 	} else {
8782 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8783 		PM_UNLOCK_POWER(dip, circ);
8784 		/*
8785 		 * Now that we know what power transition has occurred
8786 		 * (if any), release the power hold.  Leave the hold
8787 		 * in effect in the case of OFF->ON transition.
8788 		 */
8789 		if (!(clevel == 0 && nlevel > 0 &&
8790 		    (!PM_ISBC(dip) || comp == 0)))
8791 			pm_rele_power(pdip);
8792 		/*
8793 		 * If the power transition was an ON->OFF transition,
8794 		 * remove the power hold from the parent.
8795 		 */
8796 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8797 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8798 			pm_rele_power(pdip);
8799 	}
8800 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8801 		return (DDI_FAILURE);
8802 	else
8803 		return (DDI_SUCCESS);
8804 }
8805 
8806 /*
8807  * If an app (SunVTS or Xsun) has taken control, then block until it
8808  * gives it up or makes the requested power level change, unless
8809  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8810  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8811  */
8812 static int
8813 pm_busop_match_request(dev_info_t *dip, void *arg)
8814 {
8815 	PMD_FUNC(pmf, "bp_match_request")
8816 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8817 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8818 	int comp = bpc->bpc_comp;
8819 	int nlevel = bpc->bpc_nlevel;
8820 	pm_canblock_t canblock = pspm->pspm_canblock;
8821 	int direction = pspm->pspm_direction;
8822 	int clevel, circ;
8823 
8824 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8825 	PM_LOCK_POWER(dip, &circ);
8826 	clevel = PM_CURPOWER(dip, comp);
8827 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
8828 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
8829 	if (direction == PM_LEVEL_UPONLY) {
8830 		if (clevel >= nlevel) {
8831 			PM_UNLOCK_POWER(dip, circ);
8832 			PM_UNLOCK_DIP(dip);
8833 			return (DDI_SUCCESS);
8834 		}
8835 	} else if (clevel == nlevel) {
8836 		PM_UNLOCK_POWER(dip, circ);
8837 		PM_UNLOCK_DIP(dip);
8838 		return (DDI_SUCCESS);
8839 	}
8840 	if (canblock == PM_CANBLOCK_FAIL) {
8841 		PM_UNLOCK_POWER(dip, circ);
8842 		PM_UNLOCK_DIP(dip);
8843 		return (DDI_FAILURE);
8844 	}
8845 	if (canblock == PM_CANBLOCK_BLOCK) {
8846 		/*
8847 		 * To avoid a deadlock, we must not hold the
8848 		 * power lock when we pm_block.
8849 		 */
8850 		PM_UNLOCK_POWER(dip, circ);
8851 		PMD(PMD_SET, ("%s: blocking\n", pmf))
8852 		    /* pm_block releases dip lock */
8853 		    switch (pm_block(dip, comp, nlevel, clevel)) {
8854 		    case PMP_RELEASE:
8855 				return (EAGAIN);
8856 		    case PMP_SUCCEED:
8857 				return (DDI_SUCCESS);
8858 		    case PMP_FAIL:
8859 				return (DDI_FAILURE);
8860 		    }
8861 	} else {
8862 		ASSERT(0);
8863 	}
8864 	_NOTE(NOTREACHED);
8865 	return (DDI_FAILURE);	/* keep gcc happy */
8866 }
8867 
8868 static int
8869 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
8870 {
8871 	PMD_FUNC(pmf, "all_to_normal_nexus")
8872 	int		*normal;
8873 	int		i, ncomps;
8874 	size_t		size;
8875 	int		changefailed = 0;
8876 	int		ret, result = DDI_SUCCESS;
8877 	pm_bp_nexus_pwrup_t	bpn;
8878 	pm_sp_misc_t	pspm;
8879 
8880 	ASSERT(PM_GET_PM_INFO(dip));
8881 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8882 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
8883 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
8884 		return (DDI_FAILURE);
8885 	}
8886 	ncomps = PM_NUMCMPTS(dip);
8887 	for (i = 0; i < ncomps; i++) {
8888 		bpn.bpn_dip = dip;
8889 		bpn.bpn_comp = i;
8890 		bpn.bpn_level = normal[i];
8891 		pspm.pspm_canblock = canblock;
8892 		pspm.pspm_scan = 0;
8893 		bpn.bpn_private = &pspm;
8894 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
8895 		    (void *)&bpn, (void *)&result);
8896 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
8897 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
8898 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
8899 			    i, normal[i], result))
8900 			changefailed++;
8901 		}
8902 	}
8903 	kmem_free(normal, size);
8904 	if (changefailed) {
8905 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
8906 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
8907 		return (DDI_FAILURE);
8908 	}
8909 	return (DDI_SUCCESS);
8910 }
8911 
8912 int
8913 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
8914     dev_info_t *tdip)
8915 {
8916 	PMD_FUNC(pmf, "noinvol_update")
8917 	pm_bp_noinvol_t args;
8918 	int ret;
8919 	int result = DDI_SUCCESS;
8920 
8921 	args.bpni_path = path;
8922 	args.bpni_dip = tdip;
8923 	args.bpni_cmd = subcmd;
8924 	args.bpni_wasvolpmd = wasvolpmd;
8925 	args.bpni_volpmd = volpmd;
8926 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
8927 	    "volpmd %d wasvolpmd %d\n", pmf,
8928 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
8929 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
8930 	    &args, &result);
8931 	return (ret);
8932 }
8933 
8934 void
8935 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
8936 {
8937 	PMD_FUNC(pmf, "noinvol_update_node")
8938 
8939 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8940 	switch (req->bpni_cmd) {
8941 	case PM_BP_NOINVOL_ATTACH:
8942 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
8943 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8944 		    DEVI(dip)->devi_pm_noinvolpm,
8945 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8946 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8947 		PM_LOCK_DIP(dip);
8948 		DEVI(dip)->devi_pm_noinvolpm--;
8949 		if (req->bpni_wasvolpmd) {
8950 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
8951 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8952 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8953 			    DEVI(dip)->devi_pm_volpmd - 1))
8954 			if (DEVI(dip)->devi_pm_volpmd)
8955 				DEVI(dip)->devi_pm_volpmd--;
8956 		}
8957 		PM_UNLOCK_DIP(dip);
8958 		break;
8959 
8960 	case PM_BP_NOINVOL_DETACH:
8961 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
8962 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
8963 		    DEVI(dip)->devi_pm_noinvolpm,
8964 		    DEVI(dip)->devi_pm_noinvolpm + 1))
8965 		PM_LOCK_DIP(dip);
8966 		DEVI(dip)->devi_pm_noinvolpm++;
8967 		if (req->bpni_wasvolpmd) {
8968 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
8969 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
8970 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
8971 			    DEVI(dip)->devi_pm_volpmd + 1))
8972 			DEVI(dip)->devi_pm_volpmd++;
8973 		}
8974 		PM_UNLOCK_DIP(dip);
8975 		break;
8976 
8977 	case PM_BP_NOINVOL_REMDRV:
8978 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8979 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
8980 		    DEVI(dip)->devi_pm_noinvolpm,
8981 		    DEVI(dip)->devi_pm_noinvolpm - 1))
8982 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
8983 		PM_LOCK_DIP(dip);
8984 		DEVI(dip)->devi_pm_noinvolpm--;
8985 		if (req->bpni_wasvolpmd) {
8986 			PMD(PMD_NOINVOL,
8987 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
8988 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
8989 			    DEVI(dip)->devi_pm_volpmd,
8990 			    DEVI(dip)->devi_pm_volpmd - 1))
8991 			/*
8992 			 * A power up could come in between and
8993 			 * clear the volpmd, if that's the case,
8994 			 * volpmd would be clear.
8995 			 */
8996 			if (DEVI(dip)->devi_pm_volpmd)
8997 				DEVI(dip)->devi_pm_volpmd--;
8998 		}
8999 		PM_UNLOCK_DIP(dip);
9000 		break;
9001 
9002 	case PM_BP_NOINVOL_CFB:
9003 		PMD(PMD_NOINVOL,
9004 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9005 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9006 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9007 		PM_LOCK_DIP(dip);
9008 		DEVI(dip)->devi_pm_noinvolpm++;
9009 		PM_UNLOCK_DIP(dip);
9010 		break;
9011 
9012 	case PM_BP_NOINVOL_POWER:
9013 		PMD(PMD_NOINVOL,
9014 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9015 		    pmf, PM_DEVICE(dip),
9016 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9017 		    req->bpni_volpmd))
9018 		PM_LOCK_DIP(dip);
9019 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9020 		PM_UNLOCK_DIP(dip);
9021 		break;
9022 
9023 	default:
9024 		break;
9025 	}
9026 
9027 }
9028 
9029 #ifdef DEBUG
9030 static int
9031 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9032 {
9033 	PMD_FUNC(pmf, "desc_pwrchk")
9034 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9035 	pm_info_t *info = PM_GET_PM_INFO(dip);
9036 	int i, curpwr, ce_level;
9037 
9038 	if (!info)
9039 		return (DDI_WALK_CONTINUE);
9040 
9041 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9042 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9043 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9044 			continue;
9045 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9046 		    CE_WARN;
9047 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9048 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9049 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9050 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9051 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9052 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9053 	}
9054 	return (DDI_WALK_CONTINUE);
9055 }
9056 #endif
9057 
9058 /*
9059  * Record the fact that one thread is borrowing the lock on a device node.
9060  * Use is restricted to the case where the lending thread will block until
9061  * the borrowing thread (always curthread) completes.
9062  */
9063 void
9064 pm_borrow_lock(kthread_t *lender)
9065 {
9066 	lock_loan_t *prev = &lock_loan_head;
9067 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9068 
9069 	cur->pmlk_borrower = curthread;
9070 	cur->pmlk_lender = lender;
9071 	mutex_enter(&pm_loan_lock);
9072 	cur->pmlk_next = prev->pmlk_next;
9073 	prev->pmlk_next = cur;
9074 	mutex_exit(&pm_loan_lock);
9075 }
9076 
9077 /*
9078  * Return the borrowed lock.  A thread can borrow only one.
9079  */
9080 void
9081 pm_return_lock(void)
9082 {
9083 	lock_loan_t *cur;
9084 	lock_loan_t *prev = &lock_loan_head;
9085 
9086 	mutex_enter(&pm_loan_lock);
9087 	ASSERT(prev->pmlk_next != NULL);
9088 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9089 		if (cur->pmlk_borrower == curthread)
9090 			break;
9091 
9092 	ASSERT(cur != NULL);
9093 	prev->pmlk_next = cur->pmlk_next;
9094 	mutex_exit(&pm_loan_lock);
9095 	kmem_free(cur, sizeof (*cur));
9096 }
9097