xref: /titanic_44/usr/src/uts/common/os/sunpm.c (revision edcc07547a39d6570197493a9836083bd6b2a197)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sunpm.c builds sunpm.o	"power management framework"
30  *	kernel-resident power management code.  Implements power management
31  *	policy
32  *	Assumes: all backwards compat. device components wake up on &
33  *		 the pm_info pointer in dev_info is initially NULL
34  *
35  * PM - (device) Power Management
36  *
37  * Each device may have 0 or more components.  If a device has no components,
38  * then it can't be power managed.  Each component has 2 or more
39  * power states.
40  *
41  * "Backwards Compatible" (bc) devices:
42  * There are two different types of devices from the point of view of this
43  * code.  The original type, left over from the original PM implementation on
44  * the voyager platform are known in this code as "backwards compatible"
45  * devices (PM_ISBC(dip) returns true).
46  * They are recognized by the pm code by the lack of a pm-components property
47  * and a call made by the driver to pm_create_components(9F).
48  * For these devices, component 0 is special, and represents the power state
49  * of the device.  If component 0 is to be set to power level 0 (off), then
50  * the framework must first call into the driver's detach(9E) routine with
51  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
52  * After setting component 0 from 0 to a non-zero power level, a call must be
53  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
54  *
55  * Currently, the only way to get a bc device power managed is via a set of
56  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
57  *
58  * For non-bc devices, the driver describes the components by exporting a
59  * pm-components(9P) property that tells how many components there are,
60  * tells what each component's power state values are, and provides human
61  * readable strings (currently unused) for each component name and power state.
62  * Devices which export pm-components(9P) are automatically power managed
63  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
64  * after parsing power.conf(4)). The exception to this rule is that power
65  * manageable CPU devices may be automatically managed independently of autopm
66  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
67  * ioctls) cpupm. If the CPU devices are not managed independently, then they
68  * are managed by autopm. In either case, for automatically power managed
69  * devices, all components are considered independent of each other, and it is
70  * up to the driver to decide when a transition requires saving or restoring
71  * hardware state.
72  *
73  * Each device component also has a threshold time associated with each power
74  * transition (see power.conf(4)), and a busy/idle state maintained by the
75  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
76  * Components are created idle.
77  *
78  * The PM framework provides several functions:
79  * -implement PM policy as described in power.conf(4)
80  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
81  *  Policies consist of:
82  *    -set threshold values (defaults if none provided by pmconfig)
83  *    -set dependencies among devices
84  *    -enable/disable autopm
85  *    -enable/disable cpupm
86  *    -turn down idle components based on thresholds (if autopm or cpupm is
87  *     enabled) (aka scanning)
88  *    -maintain power states based on dependencies among devices
89  *    -upon request, or when the frame buffer powers off, attempt to turn off
90  *     all components that are idle or become idle over the next (10 sec)
91  *     period in an attempt to get down to an EnergyStar compliant state
92  *    -prevent powering off of a device which exported the
93  *     pm-no-involuntary-power-cycles property without active involvement of
94  *     the device's driver (so no removing power when the device driver is
95  *     not attached)
96  * -provide a mechanism for a device driver to request that a device's component
97  *  be brought back to the power level necessary for the use of the device
98  * -allow a process to directly control the power levels of device components
99  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
100  * -ensure that the console frame buffer is powered up before being referenced
101  *  via prom_printf() or other prom calls that might generate console output
102  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
103  * -provide "backwards compatible" behavior for devices without pm-components
104  *  property
105  *
106  * Scanning:
107  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
108  * component of each managed device to its lowest power based on the threshold
109  * of idleness associated with each transition and the busy/idle state of the
110  * component.
111  *
112  * The actual work of this is done by pm_scan_dev(), which cycles through each
113  * component of a device, checking its idleness against its current threshold,
114  * and calling pm_set_power() as appropriate to change the power level.
115  * This function also indicates when it would next be profitable to scan the
116  * device again, and a new scan is scheduled after that time.
117  *
118  * Dependencies:
119  * It is possible to establish a dependency between the power states of two
120  * otherwise unrelated devices.  This is currently done to ensure that the
121  * cdrom is always up whenever the console framebuffer is up, so that the user
122  * can insert a cdrom and see a popup as a result.
123  *
124  * The dependency terminology used in power.conf(4) is not easy to understand,
125  * so we've adopted a different terminology in the implementation.  We write
126  * of a "keeps up" and a "kept up" device.  A relationship can be established
127  * where one device keeps up another.  That means that if the keepsup device
128  * has any component that is at a non-zero power level, all components of the
129  * "kept up" device must be brought to full power.  This relationship is
130  * asynchronous.  When the keeping device is powered up, a request is queued
131  * to a worker thread to bring up the kept device.  The caller does not wait.
132  * Scan will not turn down a kept up device.
133  *
134  * Direct PM:
135  * A device may be directly power managed by a process.  If a device is
136  * directly pm'd, then it will not be scanned, and dependencies will not be
137  * enforced.  * If a directly pm'd device's driver requests a power change (via
138  * pm_raise_power(9F)), then the request is blocked and notification is sent
139  * to the controlling process, which must issue the requested power change for
140  * the driver to proceed.
141  *
142  */
143 
144 #include <sys/types.h>
145 #include <sys/errno.h>
146 #include <sys/callb.h>		/* callback registration during CPR */
147 #include <sys/conf.h>		/* driver flags and functions */
148 #include <sys/open.h>		/* OTYP_CHR definition */
149 #include <sys/stat.h>		/* S_IFCHR definition */
150 #include <sys/pathname.h>	/* name -> dev_info xlation */
151 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
152 #include <sys/kmem.h>		/* memory alloc stuff */
153 #include <sys/debug.h>
154 #include <sys/archsystm.h>
155 #include <sys/pm.h>
156 #include <sys/ddi.h>
157 #include <sys/sunddi.h>
158 #include <sys/sunndi.h>
159 #include <sys/sunpm.h>
160 #include <sys/epm.h>
161 #include <sys/vfs.h>
162 #include <sys/mode.h>
163 #include <sys/mkdev.h>
164 #include <sys/promif.h>
165 #include <sys/consdev.h>
166 #include <sys/esunddi.h>
167 #include <sys/modctl.h>
168 #include <sys/fs/ufs_fs.h>
169 #include <sys/note.h>
170 #include <sys/taskq.h>
171 #include <sys/bootconf.h>
172 #include <sys/reboot.h>
173 #include <sys/spl.h>
174 #include <sys/disp.h>
175 #include <sys/sobject.h>
176 #include <sys/sunmdi.h>
177 #include <sys/systm.h>
178 #include <sys/cpuvar.h>
179 #include <sys/cyclic.h>
180 #include <sys/uadmin.h>
181 #include <sys/srn.h>
182 
183 
184 /*
185  * PM LOCKING
186  *	The list of locks:
187  * Global pm mutex locks.
188  *
189  * pm_scan_lock:
190  *		It protects the timeout id of the scan thread, and the value
191  *		of autopm_enabled and cpupm.  This lock is not held
192  *		concurrently with any other PM locks.
193  *
194  * pm_clone_lock:	Protects the clone list and count of poll events
195  *		pending for the pm driver.
196  *		Lock ordering:
197  *			pm_clone_lock -> pm_pscc_interest_rwlock,
198  *			pm_clone_lock -> pm_pscc_direct_rwlock.
199  *
200  * pm_rsvp_lock:
201  *		Used to synchronize the data structures used for processes
202  *		to rendezvous with state change information when doing
203  *		direct PM.
204  *		Lock ordering:
205  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
206  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
207  *			pm_rsvp_lock -> pm_clone_lock.
208  *
209  * ppm_lock:	protects the list of registered ppm drivers
210  *		Lock ordering:
211  *			ppm_lock -> ppm driver unit_lock
212  *
213  * pm_compcnt_lock:
214  *		Protects count of components that are not at their lowest
215  *		power level.
216  *		Lock ordering:
217  *			pm_compcnt_lock -> ppm_lock.
218  *
219  * pm_dep_thread_lock:
220  *		Protects work list for pm_dep_thread.  Not taken concurrently
221  *		with any other pm lock.
222  *
223  * pm_remdrv_lock:
224  *		Serializes the operation of removing noinvol data structure
225  *		entries for a branch of the tree when a driver has been
226  *		removed from the system (modctl_rem_major).
227  *		Lock ordering:
228  *			pm_remdrv_lock -> pm_noinvol_rwlock.
229  *
230  * pm_cfb_lock: (High level spin lock)
231  *		Protects the count of how many components of the console
232  *		frame buffer are off (so we know if we have to bring up the
233  *		console as a result of a prom_printf, etc.
234  *		No other locks are taken while holding this lock.
235  *
236  * pm_loan_lock:
237  *		Protects the lock_loan list.  List is used to record that one
238  *		thread has acquired a power lock but has launched another thread
239  *		to complete its processing.  An entry in the list indicates that
240  *		the worker thread can borrow the lock held by the other thread,
241  *		which must block on the completion of the worker.  Use is
242  *		specific to module loading.
243  *		No other locks are taken while holding this lock.
244  *
245  * Global PM rwlocks
246  *
247  * pm_thresh_rwlock:
248  *		Protects the list of thresholds recorded for future use (when
249  *		devices attach).
250  *		Lock ordering:
251  *			pm_thresh_rwlock -> devi_pm_lock
252  *
253  * pm_noinvol_rwlock:
254  *		Protects list of detached nodes that had noinvol registered.
255  *		No other PM locks are taken while holding pm_noinvol_rwlock.
256  *
257  * pm_pscc_direct_rwlock:
258  *		Protects the list that maps devices being directly power
259  *		managed to the processes that manage them.
260  *		Lock ordering:
261  *			pm_pscc_direct_rwlock -> psce_lock
262  *
263  * pm_pscc_interest_rwlock;
264  *		Protects the list that maps state change events to processes
265  *		that want to know about them.
266  *		Lock ordering:
267  *			pm_pscc_interest_rwlock -> psce_lock
268  *
269  * per-dip locks:
270  *
271  * Each node has these per-dip locks, which are only used if the device is
272  * a candidate for power management (e.g. has pm components)
273  *
274  * devi_pm_lock:
275  *		Protects all power management state of the node except for
276  *		power level, which is protected by ndi_devi_enter().
277  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
278  *		Lock ordering:
279  *			devi_pm_lock -> pm_rsvp_lock,
280  *			devi_pm_lock -> pm_dep_thread_lock,
281  *			devi_pm_lock -> pm_noinvol_rwlock,
282  *			devi_pm_lock -> power lock
283  *
284  * power lock (ndi_devi_enter()):
285  *		Since changing power level is possibly a slow operation (30
286  *		seconds to spin up a disk drive), this is locked separately.
287  *		Since a call into the driver to change the power level of one
288  *		component may result in a call back into the framework to change
289  *		the power level of another, this lock allows re-entrancy by
290  *		the same thread (ndi_devi_enter is used for this because
291  *		the USB framework uses ndi_devi_enter in its power entry point,
292  *		and use of any other lock would produce a deadlock.
293  *
294  * devi_pm_busy_lock:
295  *		This lock protects the integrity of the busy count.  It is
296  *		only taken by pm_busy_component() and pm_idle_component and
297  *		some code that adjust the busy time after the timer gets set
298  *		up or after a CPR operation.  It is per-dip to keep from
299  *		single-threading all the disk drivers on a system.
300  *		It could be per component instead, but most devices have
301  *		only one component.
302  *		No other PM locks are taken while holding this lock.
303  *
304  */
305 
306 static int stdout_is_framebuffer;
307 static kmutex_t	e_pm_power_lock;
308 static kmutex_t pm_loan_lock;
309 kmutex_t	pm_scan_lock;
310 callb_id_t	pm_cpr_cb_id;
311 callb_id_t	pm_panic_cb_id;
312 callb_id_t	pm_halt_cb_id;
313 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
314 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
315 
316 clock_t pm_min_scan = PM_MIN_SCAN;
317 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
318 
319 static int pm_busop_set_power(dev_info_t *,
320     void *, pm_bus_power_op_t, void *, void *);
321 static int pm_busop_match_request(dev_info_t *, void *);
322 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
323 static void e_pm_set_max_power(dev_info_t *, int, int);
324 static int e_pm_get_max_power(dev_info_t *, int);
325 
326 /*
327  * Dependency Processing is done thru a seperate thread.
328  */
329 kmutex_t	pm_dep_thread_lock;
330 kcondvar_t	pm_dep_thread_cv;
331 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
332 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
333 
334 /*
335  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
336  * power managing things in single user mode that have been suppressed via
337  * power.conf entries.  Protected by pm_scan_lock.
338  */
339 int		autopm_enabled;
340 
341 /*
342  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
343  * to define the power management behavior of CPU devices separate from
344  * autopm. Protected by pm_scan_lock.
345  */
346 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
347 
348 /*
349  * AutoS3 depends on autopm being enabled, and must be enabled by
350  * PM_START_AUTOS3 command.
351  */
352 int		autoS3_enabled;
353 
354 #if !defined(__sparc)
355 /*
356  * on sparc these live in fillsysinfo.c
357  *
358  * If this variable is non-zero, cpr should return "not supported" when
359  * it is queried even though it would normally be supported on this platform.
360  */
361 int cpr_supported_override;
362 
363 /*
364  * Some platforms may need to support CPR even in the absence of
365  * having the correct platform id information.  If this
366  * variable is non-zero, cpr should proceed even in the absence
367  * of otherwise being qualified.
368  */
369 int cpr_platform_enable = 0;
370 
371 #endif
372 
373 /*
374  * pm_S3_enabled indicates that we believe the platform can support S3,
375  * which we get from pmconfig(1M)
376  */
377 int		pm_S3_enabled;
378 
379 /*
380  * This flag is true while processes are stopped for a checkpoint/resume.
381  * Controlling processes of direct pm'd devices are not available to
382  * participate in power level changes, so we bypass them when this is set.
383  */
384 static int	pm_processes_stopped;
385 
386 #ifdef	DEBUG
387 
388 /*
389  * see common/sys/epm.h for PMD_* values
390  */
391 
392 uint_t		pm_debug = 0;
393 
394 /*
395  * If pm_divertdebug is set, then no prom_printf calls will be made by
396  * PMD(), which will prevent debug output from bringing up the console
397  * frame buffer.  Clearing this variable before setting pm_debug will result
398  * in PMD output going to the console.
399  *
400  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
401  * deadlocks and decremented at the end of pm_set_power()
402  */
403 uint_t		pm_divertdebug = 1;
404 volatile uint_t pm_debug_to_console = 0;
405 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
406 
407 void prdeps(char *);
408 #endif
409 
410 /* Globals */
411 
412 /*
413  * List of recorded thresholds and dependencies
414  */
415 pm_thresh_rec_t *pm_thresh_head;
416 krwlock_t pm_thresh_rwlock;
417 
418 pm_pdr_t *pm_dep_head;
419 static int pm_unresolved_deps = 0;
420 static int pm_prop_deps = 0;
421 
422 /*
423  * List of devices that exported no-involuntary-power-cycles property
424  */
425 pm_noinvol_t *pm_noinvol_head;
426 
427 /*
428  * Locks used in noinvol processing
429  */
430 krwlock_t pm_noinvol_rwlock;
431 kmutex_t pm_remdrv_lock;
432 
433 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
434 int pm_system_idle_threshold;
435 int pm_cpu_idle_threshold;
436 
437 /*
438  * By default nexus has 0 threshold, and depends on its children to keep it up
439  */
440 int pm_default_nexus_threshold = 0;
441 
442 /*
443  * Data structures shared with common/io/pm.c
444  */
445 kmutex_t	pm_clone_lock;
446 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
447 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
448 unsigned char	pm_interest[PM_MAX_CLONE];
449 struct pollhead	pm_pollhead;
450 
451 /*
452  * Data structures shared with common/io/srn.c
453  */
454 kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
455 void (*srn_signal)(int type, int event);
456 int srn_inuse;				/* stop srn detach */
457 
458 extern int	hz;
459 extern char	*platform_module_list[];
460 
461 /*
462  * Wrappers for use in ddi_walk_devs
463  */
464 
465 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
466 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
467 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
468 static int		pm_discard_dep_walk(dev_info_t *, void *);
469 #ifdef DEBUG
470 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
471 #endif
472 
473 /*
474  * Routines for managing noinvol devices
475  */
476 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
477 void			pm_noinvol_update_node(dev_info_t *,
478 			    pm_bp_noinvol_t *req);
479 
480 kmutex_t pm_rsvp_lock;
481 kmutex_t pm_compcnt_lock;
482 krwlock_t pm_pscc_direct_rwlock;
483 krwlock_t pm_pscc_interest_rwlock;
484 
485 #define	PSC_INTEREST	0	/* belongs to interest psc list */
486 #define	PSC_DIRECT	1	/* belongs to direct psc list */
487 
488 pscc_t *pm_pscc_interest;
489 pscc_t *pm_pscc_direct;
490 
491 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
492 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
493 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
494 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
495 
496 #define	PM_INCR_NOTLOWEST(dip) {					\
497 	mutex_enter(&pm_compcnt_lock);					\
498 	if (!PM_IS_NEXUS(dip) ||					\
499 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
500 		if (pm_comps_notlowest == 0)				\
501 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
502 		pm_comps_notlowest++;					\
503 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
504 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
505 	}								\
506 	mutex_exit(&pm_compcnt_lock);					\
507 }
508 #define	PM_DECR_NOTLOWEST(dip) {					\
509 	mutex_enter(&pm_compcnt_lock);					\
510 	if (!PM_IS_NEXUS(dip) ||					\
511 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
512 		ASSERT(pm_comps_notlowest);				\
513 		pm_comps_notlowest--;					\
514 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
515 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
516 		if (pm_comps_notlowest == 0)				\
517 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
518 	}								\
519 	mutex_exit(&pm_compcnt_lock);					\
520 }
521 
522 /*
523  * console frame-buffer power-management is not enabled when
524  * debugging services are present.  to override, set pm_cfb_override
525  * to non-zero.
526  */
527 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
528 kmutex_t pm_cfb_lock;
529 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
530 #ifdef DEBUG
531 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
532 #else
533 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
534 #endif
535 
536 static dev_info_t *cfb_dip = 0;
537 static dev_info_t *cfb_dip_detaching = 0;
538 uint_t cfb_inuse = 0;
539 static ddi_softintr_t pm_soft_id;
540 static clock_t pm_soft_pending;
541 int	pm_scans_disabled = 0;
542 
543 /*
544  * A structure to record the fact that one thread has borrowed a lock held
545  * by another thread.  The context requires that the lender block on the
546  * completion of the borrower.
547  */
548 typedef struct lock_loan {
549 	struct lock_loan	*pmlk_next;
550 	kthread_t		*pmlk_borrower;
551 	kthread_t		*pmlk_lender;
552 	dev_info_t		*pmlk_dip;
553 } lock_loan_t;
554 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
555 
556 #ifdef	DEBUG
557 #ifdef	PMDDEBUG
558 #define	PMD_FUNC(func, name)	char *(func) = (name);
559 #else	/* !PMDDEBUG */
560 #define	PMD_FUNC(func, name)
561 #endif	/* PMDDEBUG */
562 #else	/* !DEBUG */
563 #define	PMD_FUNC(func, name)
564 #endif	/* DEBUG */
565 
566 
567 /*
568  * Must be called before first device (including pseudo) attach
569  */
570 void
571 pm_init_locks(void)
572 {
573 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
574 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
575 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
576 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
577 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
578 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
579 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
580 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
581 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
582 }
583 
584 static boolean_t
585 pm_cpr_callb(void *arg, int code)
586 {
587 	_NOTE(ARGUNUSED(arg))
588 	static int auto_save;
589 	static pm_cpupm_t cpupm_save;
590 	static int pm_reset_timestamps(dev_info_t *, void *);
591 
592 	switch (code) {
593 	case CB_CODE_CPR_CHKPT:
594 		/*
595 		 * Cancel scan or wait for scan in progress to finish
596 		 * Other threads may be trying to restart the scan, so we
597 		 * have to keep at it unil it sticks
598 		 */
599 		mutex_enter(&pm_scan_lock);
600 		ASSERT(!pm_scans_disabled);
601 		pm_scans_disabled = 1;
602 		auto_save = autopm_enabled;
603 		autopm_enabled = 0;
604 		cpupm_save = cpupm;
605 		cpupm = PM_CPUPM_NOTSET;
606 		mutex_exit(&pm_scan_lock);
607 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
608 		break;
609 
610 	case CB_CODE_CPR_RESUME:
611 		ASSERT(!autopm_enabled);
612 		ASSERT(cpupm == PM_CPUPM_NOTSET);
613 		ASSERT(pm_scans_disabled);
614 		pm_scans_disabled = 0;
615 		/*
616 		 * Call pm_reset_timestamps to reset timestamps of each
617 		 * device to the time when the system is resumed so that their
618 		 * idleness can be re-calculated. That's to avoid devices from
619 		 * being powered down right after resume if the system was in
620 		 * suspended mode long enough.
621 		 */
622 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
623 
624 		autopm_enabled = auto_save;
625 		cpupm = cpupm_save;
626 		/*
627 		 * If there is any auto-pm device, get the scanning
628 		 * going. Otherwise don't bother.
629 		 */
630 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
631 		break;
632 	}
633 	return (B_TRUE);
634 }
635 
636 /*
637  * This callback routine is called when there is a system panic.  This function
638  * exists for prototype matching.
639  */
640 static boolean_t
641 pm_panic_callb(void *arg, int code)
642 {
643 	_NOTE(ARGUNUSED(arg, code))
644 	void pm_cfb_check_and_powerup(void);
645 	PMD(PMD_CFB, ("pm_panic_callb\n"))
646 	pm_cfb_check_and_powerup();
647 	return (B_TRUE);
648 }
649 
650 static boolean_t
651 pm_halt_callb(void *arg, int code)
652 {
653 	_NOTE(ARGUNUSED(arg, code))
654 	return (B_TRUE);
655 }
656 
657 /*
658  * This needs to be called after the root and platform drivers are loaded
659  * and be single-threaded with respect to driver attach/detach
660  */
661 void
662 pm_init(void)
663 {
664 	PMD_FUNC(pmf, "pm_init")
665 	char **mod;
666 	extern pri_t minclsyspri;
667 	static void pm_dep_thread(void);
668 
669 	pm_comps_notlowest = 0;
670 	pm_system_idle_threshold = pm_default_idle_threshold;
671 	pm_cpu_idle_threshold = 0;
672 
673 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
674 	    CB_CL_CPR_PM, "pm_cpr");
675 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
676 	    CB_CL_PANIC, "pm_panic");
677 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
678 	    CB_CL_HALT, "pm_halt");
679 
680 	/*
681 	 * Create a thread to do dependency processing.
682 	 */
683 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
684 	    TS_RUN, minclsyspri);
685 
686 	/*
687 	 * loadrootmodules already loaded these ppm drivers, now get them
688 	 * attached so they can claim the root drivers as they attach
689 	 */
690 	for (mod = platform_module_list; *mod; mod++) {
691 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
692 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
693 			    *mod);
694 		} else {
695 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
696 			    ddi_major_to_name(ddi_name_to_major(*mod))))
697 		}
698 	}
699 }
700 
701 /*
702  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
703  * enabled) when device becomes power managed or after a failed detach and
704  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
705  * a CPR resume to get all the devices scanning again.
706  */
707 void
708 pm_scan_init(dev_info_t *dip)
709 {
710 	PMD_FUNC(pmf, "scan_init")
711 	pm_scan_t	*scanp;
712 
713 	ASSERT(!PM_ISBC(dip));
714 
715 	PM_LOCK_DIP(dip);
716 	scanp = PM_GET_PM_SCAN(dip);
717 	if (!scanp) {
718 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
719 		    pmf, PM_DEVICE(dip)))
720 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
721 		DEVI(dip)->devi_pm_scan = scanp;
722 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
723 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
724 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
725 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
726 	}
727 	PM_UNLOCK_DIP(dip);
728 }
729 
730 /*
731  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
732  */
733 void
734 pm_scan_fini(dev_info_t *dip)
735 {
736 	PMD_FUNC(pmf, "scan_fini")
737 	pm_scan_t	*scanp;
738 
739 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
740 	ASSERT(!PM_ISBC(dip));
741 	PM_LOCK_DIP(dip);
742 	scanp = PM_GET_PM_SCAN(dip);
743 	if (!scanp) {
744 		PM_UNLOCK_DIP(dip);
745 		return;
746 	}
747 
748 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
749 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
750 
751 	kmem_free(scanp, sizeof (pm_scan_t));
752 	DEVI(dip)->devi_pm_scan = NULL;
753 	PM_UNLOCK_DIP(dip);
754 }
755 
756 /*
757  * Given a pointer to a component struct, return the current power level
758  * (struct contains index unless it is a continuous level).
759  * Located here in hopes of getting both this and dev_is_needed into the
760  * cache together
761  */
762 static int
763 cur_power(pm_component_t *cp)
764 {
765 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
766 		return (cp->pmc_cur_pwr);
767 
768 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
769 }
770 
771 static char *
772 pm_decode_direction(int direction)
773 {
774 	switch (direction) {
775 	case PM_LEVEL_UPONLY:
776 		return ("up");
777 
778 	case PM_LEVEL_EXACT:
779 		return ("exact");
780 
781 	case PM_LEVEL_DOWNONLY:
782 		return ("down");
783 
784 	default:
785 		return ("INVALID DIRECTION");
786 	}
787 }
788 
789 char *
790 pm_decode_op(pm_bus_power_op_t op)
791 {
792 	switch (op) {
793 	case BUS_POWER_CHILD_PWRCHG:
794 		return ("CHILD_PWRCHG");
795 	case BUS_POWER_NEXUS_PWRUP:
796 		return ("NEXUS_PWRUP");
797 	case BUS_POWER_PRE_NOTIFICATION:
798 		return ("PRE_NOTIFICATION");
799 	case BUS_POWER_POST_NOTIFICATION:
800 		return ("POST_NOTIFICATION");
801 	case BUS_POWER_HAS_CHANGED:
802 		return ("HAS_CHANGED");
803 	case BUS_POWER_NOINVOL:
804 		return ("NOINVOL");
805 	default:
806 		return ("UNKNOWN OP");
807 	}
808 }
809 
810 /*
811  * Returns true if level is a possible (valid) power level for component
812  */
813 int
814 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
815 {
816 	PMD_FUNC(pmf, "e_pm_valid_power")
817 	pm_component_t *cp = PM_CP(dip, cmpt);
818 	int i;
819 	int *ip = cp->pmc_comp.pmc_lvals;
820 	int limit = cp->pmc_comp.pmc_numlevels;
821 
822 	if (level < 0)
823 		return (0);
824 	for (i = 0; i < limit; i++) {
825 		if (level == *ip++)
826 			return (1);
827 	}
828 #ifdef DEBUG
829 	if (pm_debug & PMD_FAIL) {
830 		ip = cp->pmc_comp.pmc_lvals;
831 
832 		for (i = 0; i < limit; i++)
833 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
834 			    pmf, i, *ip++))
835 	}
836 #endif
837 	return (0);
838 }
839 
840 /*
841  * Returns true if device is pm'd (after calling pm_start if need be)
842  */
843 int
844 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
845 {
846 	pm_info_t *info;
847 	static int pm_start(dev_info_t *dip);
848 
849 	/*
850 	 * Check if the device is power managed if not.
851 	 * To make the common case (device is power managed already)
852 	 * fast, we check without the lock.  If device is not already
853 	 * power managed, then we take the lock and the long route through
854 	 * go get it managed.  Devices never go unmanaged until they
855 	 * detach.
856 	 */
857 	info = PM_GET_PM_INFO(dip);
858 	if (!info) {
859 		if (!DEVI_IS_ATTACHING(dip)) {
860 			return (0);
861 		}
862 		if (pm_start(dip) != DDI_SUCCESS) {
863 			return (0);
864 		}
865 		info = PM_GET_PM_INFO(dip);
866 	}
867 	ASSERT(info);
868 	if (infop != NULL)
869 		*infop = info;
870 	return (1);
871 }
872 
873 int
874 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
875 {
876 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
877 		if (cpp != NULL)
878 			*cpp = PM_CP(dip, cmpt);
879 		return (1);
880 	} else {
881 		return (0);
882 	}
883 }
884 
885 /*
886  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
887  */
888 static int
889 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
890 {
891 	PMD_FUNC(pmf, "din")
892 	pm_component_t *cp;
893 	char *pathbuf;
894 	int result;
895 
896 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
897 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
898 	    !e_pm_valid_power(dip, cmpt, level))
899 		return (DDI_FAILURE);
900 
901 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
902 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
903 	    level, cur_power(cp)))
904 
905 	if (pm_set_power(dip, cmpt, level,  direction,
906 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
907 		if (direction == PM_LEVEL_UPONLY) {
908 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
909 			(void) ddi_pathname(dip, pathbuf);
910 			cmn_err(CE_WARN, "Device %s failed to power up.",
911 			    pathbuf);
912 			kmem_free(pathbuf, MAXPATHLEN);
913 		}
914 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
915 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
916 		    pm_decode_direction(direction), level, result))
917 		return (DDI_FAILURE);
918 	}
919 
920 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
921 	    PM_DEVICE(dip)))
922 	pm_rescan(dip);
923 	return (DDI_SUCCESS);
924 }
925 
926 /*
927  * We can get multiple pm_rescan() threads, if one of them discovers
928  * that no scan is running at the moment, it kicks it into action.
929  * Otherwise, it tells the current scanning thread to scan again when
930  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
931  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
932  * thread at a time runs the pm_scan_dev() code.
933  */
934 void
935 pm_rescan(void *arg)
936 {
937 	PMD_FUNC(pmf, "rescan")
938 	dev_info_t	*dip = (dev_info_t *)arg;
939 	pm_info_t	*info;
940 	pm_scan_t	*scanp;
941 	timeout_id_t	scanid;
942 
943 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
944 	PM_LOCK_DIP(dip);
945 	info = PM_GET_PM_INFO(dip);
946 	scanp = PM_GET_PM_SCAN(dip);
947 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
948 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
949 		PM_UNLOCK_DIP(dip);
950 		return;
951 	}
952 	if (scanp->ps_scan_flags & PM_SCANNING) {
953 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
954 		PM_UNLOCK_DIP(dip);
955 		return;
956 	} else if (scanp->ps_scan_id) {
957 		scanid = scanp->ps_scan_id;
958 		scanp->ps_scan_id = 0;
959 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
960 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
961 		PM_UNLOCK_DIP(dip);
962 		(void) untimeout(scanid);
963 		PM_LOCK_DIP(dip);
964 	}
965 
966 	/*
967 	 * Dispatching pm_scan during attach time is risky due to the fact that
968 	 * attach might soon fail and dip dissolved, and panic may happen while
969 	 * attempting to stop scan. So schedule a pm_rescan instead.
970 	 * (Note that if either of the first two terms are true, taskq_dispatch
971 	 * will not be invoked).
972 	 *
973 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
974 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
975 	 * to regulate the dispatching.
976 	 *
977 	 * Scan is stopped before the device is detached (in pm_detaching())
978 	 * but it may get re-started during the post_detach processing if the
979 	 * driver fails to detach.
980 	 */
981 	if (DEVI_IS_ATTACHING(dip) ||
982 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
983 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
984 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
985 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
986 		if (scanp->ps_scan_id) {
987 			scanid = scanp->ps_scan_id;
988 			scanp->ps_scan_id = 0;
989 			PM_UNLOCK_DIP(dip);
990 			(void) untimeout(scanid);
991 			PM_LOCK_DIP(dip);
992 			if (scanp->ps_scan_id) {
993 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
994 				    "thread scheduled pm_rescan, scanid %lx\n",
995 				    pmf, PM_DEVICE(dip),
996 				    (ulong_t)scanp->ps_scan_id))
997 				PM_UNLOCK_DIP(dip);
998 				return;
999 			}
1000 		}
1001 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1002 		    (scanp->ps_idle_down ? pm_id_ticks :
1003 		    (pm_min_scan * hz)));
1004 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1005 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1006 		    (ulong_t)scanp->ps_scan_id))
1007 	} else {
1008 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1009 		    pmf, PM_DEVICE(dip)))
1010 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1011 	}
1012 	PM_UNLOCK_DIP(dip);
1013 }
1014 
1015 void
1016 pm_scan(void *arg)
1017 {
1018 	PMD_FUNC(pmf, "scan")
1019 	dev_info_t	*dip = (dev_info_t *)arg;
1020 	pm_scan_t	*scanp;
1021 	time_t		nextscan;
1022 
1023 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1024 
1025 	PM_LOCK_DIP(dip);
1026 	scanp = PM_GET_PM_SCAN(dip);
1027 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1028 
1029 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1030 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1031 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1032 		PM_UNLOCK_DIP(dip);
1033 		return;
1034 	}
1035 
1036 	if (scanp->ps_idle_down) {
1037 		/*
1038 		 * make sure we remember idledown was in affect until
1039 		 * we've completed the scan
1040 		 */
1041 		PMID_SET_SCANS(scanp->ps_idle_down)
1042 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1043 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1044 	}
1045 
1046 	/* possible having two threads running pm_scan() */
1047 	if (scanp->ps_scan_flags & PM_SCANNING) {
1048 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1049 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1050 		    pmf, PM_DEVICE(dip)))
1051 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1052 		PM_UNLOCK_DIP(dip);
1053 		return;
1054 	}
1055 
1056 	scanp->ps_scan_flags |= PM_SCANNING;
1057 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1058 	do {
1059 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1060 		PM_UNLOCK_DIP(dip);
1061 		nextscan = pm_scan_dev(dip);
1062 		PM_LOCK_DIP(dip);
1063 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1064 
1065 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1066 	scanp->ps_scan_flags &= ~PM_SCANNING;
1067 
1068 	if (scanp->ps_idle_down) {
1069 		scanp->ps_idle_down &= ~PMID_SCANS;
1070 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1071 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1072 	}
1073 
1074 	/* schedule for next idle check */
1075 	if (nextscan != LONG_MAX) {
1076 		if (nextscan > (LONG_MAX / hz))
1077 			nextscan = (LONG_MAX - 1) / hz;
1078 		if (scanp->ps_scan_id) {
1079 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1080 			    "another rescan scheduled scanid(%lx)\n", pmf,
1081 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1082 			PM_UNLOCK_DIP(dip);
1083 			return;
1084 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1085 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1086 			    (clock_t)(nextscan * hz));
1087 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1088 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1089 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1090 		}
1091 	}
1092 	PM_UNLOCK_DIP(dip);
1093 }
1094 
1095 void
1096 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1097 {
1098 	int components = PM_NUMCMPTS(dip);
1099 	int i;
1100 
1101 	ASSERT(components > 0);
1102 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1103 	for (i = 0; i < components; i++) {
1104 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1105 	}
1106 	PM_UNLOCK_BUSY(dip);
1107 }
1108 
1109 /*
1110  * Returns true if device needs to be kept up because it exported the
1111  * "no-involuntary-power-cycles" property or we're pretending it did (console
1112  * fb case) or it is an ancestor of such a device and has used up the "one
1113  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1114  * upon detach
1115  */
1116 int
1117 pm_noinvol(dev_info_t *dip)
1118 {
1119 	PMD_FUNC(pmf, "noinvol")
1120 
1121 	/*
1122 	 * This doesn't change over the life of a driver, so no locking needed
1123 	 */
1124 	if (PM_IS_CFB(dip)) {
1125 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1126 		    pmf, PM_DEVICE(dip)))
1127 		return (1);
1128 	}
1129 	/*
1130 	 * Not an issue if no such kids
1131 	 */
1132 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1133 #ifdef DEBUG
1134 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1135 			dev_info_t *pdip = dip;
1136 			do {
1137 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1138 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1139 				    DEVI(pdip)->devi_pm_noinvolpm,
1140 				    DEVI(pdip)->devi_pm_volpmd))
1141 				pdip = ddi_get_parent(pdip);
1142 			} while (pdip);
1143 		}
1144 #endif
1145 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1146 		return (0);
1147 	}
1148 
1149 	/*
1150 	 * Since we now maintain the counts correct at every node, we no longer
1151 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1152 	 * without the children getting their counts adjusted.
1153 	 */
1154 
1155 #ifdef	DEBUG
1156 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1157 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1158 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1159 		    PM_DEVICE(dip)))
1160 #endif
1161 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1162 }
1163 
1164 /*
1165  * This function performs the actual scanning of the device.
1166  * It attempts to power off the indicated device's components if they have
1167  * been idle and other restrictions are met.
1168  * pm_scan_dev calculates and returns when the next scan should happen for
1169  * this device.
1170  */
1171 time_t
1172 pm_scan_dev(dev_info_t *dip)
1173 {
1174 	PMD_FUNC(pmf, "scan_dev")
1175 	pm_scan_t	*scanp;
1176 	time_t		*timestamp, idletime, now, thresh;
1177 	time_t		timeleft = 0;
1178 #ifdef PMDDEBUG
1179 	int		curpwr;
1180 #endif
1181 	int		i, nxtpwr, pwrndx, unused;
1182 	size_t		size;
1183 	pm_component_t	 *cp;
1184 	dev_info_t	*pdip = ddi_get_parent(dip);
1185 	int		circ;
1186 	static int	cur_threshold(dev_info_t *, int);
1187 	static int	pm_next_lower_power(pm_component_t *, int);
1188 
1189 	/*
1190 	 * skip attaching device
1191 	 */
1192 	if (DEVI_IS_ATTACHING(dip)) {
1193 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1194 		    pmf, PM_DEVICE(dip), pm_min_scan))
1195 		return (pm_min_scan);
1196 	}
1197 
1198 	PM_LOCK_DIP(dip);
1199 	scanp = PM_GET_PM_SCAN(dip);
1200 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1201 
1202 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1203 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1204 	    PM_KUC(dip)))
1205 
1206 	/* no scan under the following conditions */
1207 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1208 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1209 	    (PM_KUC(dip) != 0) ||
1210 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1211 		PM_UNLOCK_DIP(dip);
1212 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1213 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1214 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1215 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1216 		    cpupm, PM_KUC(dip),
1217 		    PM_ISDIRECT(dip) ? "is" : "is not",
1218 		    pm_noinvol(dip) ? "is" : "is not"))
1219 		return (LONG_MAX);
1220 	}
1221 	PM_UNLOCK_DIP(dip);
1222 
1223 	if (!ndi_devi_tryenter(pdip, &circ)) {
1224 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1225 		    pmf, PM_DEVICE(pdip)))
1226 		return ((time_t)1);
1227 	}
1228 	now = gethrestime_sec();
1229 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1230 	timestamp = kmem_alloc(size, KM_SLEEP);
1231 	pm_get_timestamps(dip, timestamp);
1232 
1233 	/*
1234 	 * Since we removed support for backwards compatible devices,
1235 	 * (see big comment at top of file)
1236 	 * it is no longer required to deal with component 0 last.
1237 	 */
1238 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1239 		/*
1240 		 * If already off (an optimization, perhaps)
1241 		 */
1242 		cp = PM_CP(dip, i);
1243 		pwrndx = cp->pmc_cur_pwr;
1244 #ifdef PMDDEBUG
1245 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1246 		    PM_LEVEL_UNKNOWN :
1247 		    cp->pmc_comp.pmc_lvals[pwrndx];
1248 #endif
1249 
1250 		if (pwrndx == 0) {
1251 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1252 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1253 			/* skip device if off or at its lowest */
1254 			continue;
1255 		}
1256 
1257 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1258 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1259 			/* were busy or newly became busy by another thread */
1260 			if (timeleft == 0)
1261 				timeleft = max(thresh, pm_min_scan);
1262 			else
1263 				timeleft = min(
1264 				    timeleft, max(thresh, pm_min_scan));
1265 			continue;
1266 		}
1267 
1268 		idletime = now - timestamp[i];		/* idle time */
1269 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1270 		    pmf, PM_DEVICE(dip), i, idletime))
1271 		if (idletime >= thresh || PM_IS_PID(dip)) {
1272 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1273 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1274 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1275 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1276 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1277 			    PM_CURPOWER(dip, i) != nxtpwr) {
1278 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1279 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1280 				    i, curpwr, nxtpwr))
1281 				timeleft = pm_min_scan;
1282 				continue;
1283 			} else {
1284 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1285 				    "%d->%d, GOOD curpwr %d\n", pmf,
1286 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1287 				    cur_power(cp)))
1288 
1289 				if (nxtpwr == 0)	/* component went off */
1290 					continue;
1291 
1292 				/*
1293 				 * scan to next lower level
1294 				 */
1295 				if (timeleft == 0)
1296 					timeleft = max(
1297 					    1, cur_threshold(dip, i));
1298 				else
1299 					timeleft = min(timeleft,
1300 					    max(1, cur_threshold(dip, i)));
1301 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1302 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1303 				    i, timeleft))
1304 			}
1305 		} else {	/* comp not idle long enough */
1306 			if (timeleft == 0)
1307 				timeleft = thresh - idletime;
1308 			else
1309 				timeleft = min(timeleft, (thresh - idletime));
1310 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1311 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1312 		}
1313 	}
1314 	ndi_devi_exit(pdip, circ);
1315 	kmem_free(timestamp, size);
1316 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1317 	    PM_DEVICE(dip), timeleft))
1318 
1319 	/*
1320 	 * if components are already at lowest level, timeleft is left 0
1321 	 */
1322 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1323 }
1324 
1325 /*
1326  * pm_scan_stop - cancel scheduled pm_rescan,
1327  *                wait for termination of dispatched pm_scan thread
1328  *                     and active pm_scan_dev thread.
1329  */
1330 void
1331 pm_scan_stop(dev_info_t *dip)
1332 {
1333 	PMD_FUNC(pmf, "scan_stop")
1334 	pm_scan_t	*scanp;
1335 	timeout_id_t	scanid;
1336 
1337 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1338 	PM_LOCK_DIP(dip);
1339 	scanp = PM_GET_PM_SCAN(dip);
1340 	if (!scanp) {
1341 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1342 		    pmf, PM_DEVICE(dip)))
1343 		PM_UNLOCK_DIP(dip);
1344 		return;
1345 	}
1346 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1347 
1348 	/* cancel scheduled scan taskq */
1349 	while (scanp->ps_scan_id) {
1350 		scanid = scanp->ps_scan_id;
1351 		scanp->ps_scan_id = 0;
1352 		PM_UNLOCK_DIP(dip);
1353 		(void) untimeout(scanid);
1354 		PM_LOCK_DIP(dip);
1355 	}
1356 
1357 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1358 		PM_UNLOCK_DIP(dip);
1359 		delay(1);
1360 		PM_LOCK_DIP(dip);
1361 	}
1362 	PM_UNLOCK_DIP(dip);
1363 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1364 }
1365 
1366 int
1367 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1368 {
1369 	_NOTE(ARGUNUSED(arg))
1370 
1371 	if (!PM_GET_PM_SCAN(dip))
1372 		return (DDI_WALK_CONTINUE);
1373 	ASSERT(!PM_ISBC(dip));
1374 	pm_scan_stop(dip);
1375 	return (DDI_WALK_CONTINUE);
1376 }
1377 
1378 /*
1379  * Converts a power level value to its index
1380  */
1381 static int
1382 power_val_to_index(pm_component_t *cp, int val)
1383 {
1384 	int limit, i, *ip;
1385 
1386 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1387 	    val != PM_LEVEL_EXACT);
1388 	/*  convert power value into index (i) */
1389 	limit = cp->pmc_comp.pmc_numlevels;
1390 	ip = cp->pmc_comp.pmc_lvals;
1391 	for (i = 0; i < limit; i++)
1392 		if (val == *ip++)
1393 			return (i);
1394 	return (-1);
1395 }
1396 
1397 /*
1398  * Converts a numeric power level to a printable string
1399  */
1400 static char *
1401 power_val_to_string(pm_component_t *cp, int val)
1402 {
1403 	int index;
1404 
1405 	if (val == PM_LEVEL_UPONLY)
1406 		return ("<UPONLY>");
1407 
1408 	if (val == PM_LEVEL_UNKNOWN ||
1409 	    (index = power_val_to_index(cp, val)) == -1)
1410 		return ("<LEVEL_UNKNOWN>");
1411 
1412 	return (cp->pmc_comp.pmc_lnames[index]);
1413 }
1414 
1415 /*
1416  * Return true if this node has been claimed by a ppm.
1417  */
1418 static int
1419 pm_ppm_claimed(dev_info_t *dip)
1420 {
1421 	return (PPM(dip) != NULL);
1422 }
1423 
1424 /*
1425  * A node which was voluntarily power managed has just used up its "free cycle"
1426  * and need is volpmd field cleared, and the same done to all its descendents
1427  */
1428 static void
1429 pm_clear_volpm_dip(dev_info_t *dip)
1430 {
1431 	PMD_FUNC(pmf, "clear_volpm_dip")
1432 
1433 	if (dip == NULL)
1434 		return;
1435 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1436 	    PM_DEVICE(dip)))
1437 	DEVI(dip)->devi_pm_volpmd = 0;
1438 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1439 		pm_clear_volpm_dip(dip);
1440 	}
1441 }
1442 
1443 /*
1444  * A node which was voluntarily power managed has used up the "free cycles"
1445  * for the subtree that it is the root of.  Scan through the list of detached
1446  * nodes and adjust the counts of any that are descendents of the node.
1447  */
1448 static void
1449 pm_clear_volpm_list(dev_info_t *dip)
1450 {
1451 	PMD_FUNC(pmf, "clear_volpm_list")
1452 	char	*pathbuf;
1453 	size_t	len;
1454 	pm_noinvol_t *ip;
1455 
1456 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1457 	(void) ddi_pathname(dip, pathbuf);
1458 	len = strlen(pathbuf);
1459 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1460 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1461 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1462 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1463 		    ip->ni_path))
1464 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1465 		    ip->ni_path[len] == '/') {
1466 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1467 			    ip->ni_path))
1468 			ip->ni_volpmd = 0;
1469 			ip->ni_wasvolpmd = 0;
1470 		}
1471 	}
1472 	kmem_free(pathbuf, MAXPATHLEN);
1473 	rw_exit(&pm_noinvol_rwlock);
1474 }
1475 
1476 /*
1477  * Powers a device, suspending or resuming the driver if it is a backward
1478  * compatible device, calling into ppm to change power level.
1479  * Called with the component's power lock held.
1480  */
1481 static int
1482 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1483     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1484 {
1485 	PMD_FUNC(pmf, "power_dev")
1486 	power_req_t power_req;
1487 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1488 	int		resume_needed = 0;
1489 	int		suspended = 0;
1490 	int		result;
1491 #ifdef PMDDEBUG
1492 	struct pm_component *cp = PM_CP(dip, comp);
1493 #endif
1494 	int		bc = PM_ISBC(dip);
1495 	int pm_all_components_off(dev_info_t *);
1496 	int		clearvolpmd = 0;
1497 	char		pathbuf[MAXNAMELEN];
1498 #ifdef PMDDEBUG
1499 	char *ppmname, *ppmaddr;
1500 #endif
1501 	/*
1502 	 * If this is comp 0 of a backwards compat device and we are
1503 	 * going to take the power away, we need to detach it with
1504 	 * DDI_PM_SUSPEND command.
1505 	 */
1506 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1507 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1508 			/* We could not suspend before turning cmpt zero off */
1509 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1510 			    pmf, PM_DEVICE(dip)))
1511 			return (DDI_FAILURE);
1512 		} else {
1513 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1514 			suspended++;
1515 		}
1516 	}
1517 	power_req.request_type = PMR_PPM_SET_POWER;
1518 	power_req.req.ppm_set_power_req.who = dip;
1519 	power_req.req.ppm_set_power_req.cmpt = comp;
1520 	power_req.req.ppm_set_power_req.old_level = old_level;
1521 	power_req.req.ppm_set_power_req.new_level = level;
1522 	power_req.req.ppm_set_power_req.canblock = canblock;
1523 	power_req.req.ppm_set_power_req.cookie = NULL;
1524 #ifdef PMDDEBUG
1525 	if (pm_ppm_claimed(dip)) {
1526 		ppmname = PM_NAME(PPM(dip));
1527 		ppmaddr = PM_ADDR(PPM(dip));
1528 
1529 	} else {
1530 		ppmname = "noppm";
1531 		ppmaddr = "0";
1532 	}
1533 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1534 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1535 	    power_val_to_string(cp, old_level), old_level,
1536 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1537 #endif
1538 	/*
1539 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1540 	 * bc device comp 0 is powering on, then we count it as a power cycle
1541 	 * against its voluntary count.
1542 	 */
1543 	if (DEVI(dip)->devi_pm_volpmd &&
1544 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1545 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1546 		clearvolpmd = 1;
1547 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1548 	    &power_req, &result)) == DDI_SUCCESS) {
1549 		/*
1550 		 * Now do involuntary pm accounting;  If we've just cycled power
1551 		 * on a voluntarily pm'd node, and by inference on its entire
1552 		 * subtree, we need to set the subtree (including those nodes
1553 		 * already detached) volpmd counts to 0, and subtract out the
1554 		 * value of the current node's volpmd count from the ancestors
1555 		 */
1556 		if (clearvolpmd) {
1557 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1558 			pm_clear_volpm_dip(dip);
1559 			pm_clear_volpm_list(dip);
1560 			if (volpmd) {
1561 				(void) ddi_pathname(dip, pathbuf);
1562 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1563 				    volpmd, 0, pathbuf, dip);
1564 			}
1565 		}
1566 	} else {
1567 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1568 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1569 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1570 	}
1571 	/*
1572 	 * If some other devices were also powered up (e.g. other cpus in
1573 	 * the same domain) return a pointer to that list
1574 	 */
1575 	if (devlist) {
1576 		*devlist = (pm_ppm_devlist_t *)
1577 		    power_req.req.ppm_set_power_req.cookie;
1578 	}
1579 	/*
1580 	 * We will have to resume the device if the device is backwards compat
1581 	 * device and either of the following is true:
1582 	 * -This is comp 0 and we have successfully powered it up
1583 	 * -This is comp 0 and we have failed to power it down. Resume is
1584 	 *  needed because we have suspended it above
1585 	 */
1586 
1587 	if (bc && comp == 0) {
1588 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1589 		if (power_op_ret == DDI_SUCCESS) {
1590 			if (POWERING_ON(old_level, level)) {
1591 				/*
1592 				 * It must be either suspended or resumed
1593 				 * via pm_power_has_changed path
1594 				 */
1595 				ASSERT((DEVI(dip)->devi_pm_flags &
1596 				    PMC_SUSPENDED) ||
1597 				    (PM_CP(dip, comp)->pmc_flags &
1598 				    PM_PHC_WHILE_SET_POWER));
1599 
1600 					resume_needed = suspended;
1601 			}
1602 		} else {
1603 			if (POWERING_OFF(old_level, level)) {
1604 				/*
1605 				 * It must be either suspended or resumed
1606 				 * via pm_power_has_changed path
1607 				 */
1608 				ASSERT((DEVI(dip)->devi_pm_flags &
1609 				    PMC_SUSPENDED) ||
1610 				    (PM_CP(dip, comp)->pmc_flags &
1611 				    PM_PHC_WHILE_SET_POWER));
1612 
1613 					resume_needed = suspended;
1614 			}
1615 		}
1616 	}
1617 	if (resume_needed) {
1618 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1619 		/* ppm is not interested in DDI_PM_RESUME */
1620 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1621 		    DDI_SUCCESS) {
1622 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1623 		} else
1624 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1625 			    PM_DEVICE(dip));
1626 	}
1627 	return (power_op_ret);
1628 }
1629 
1630 /*
1631  * Return true if we are the owner or a borrower of the devi lock.  See
1632  * pm_lock_power_single() about borrowing the lock.
1633  */
1634 static int
1635 pm_devi_lock_held(dev_info_t *dip)
1636 {
1637 	lock_loan_t *cur;
1638 
1639 	if (DEVI_BUSY_OWNED(dip))
1640 		return (1);
1641 
1642 	/* return false if no locks borrowed */
1643 	if (lock_loan_head.pmlk_next == NULL)
1644 		return (0);
1645 
1646 	mutex_enter(&pm_loan_lock);
1647 	/* see if our thread is registered as a lock borrower. */
1648 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1649 		if (cur->pmlk_borrower == curthread)
1650 			break;
1651 	mutex_exit(&pm_loan_lock);
1652 
1653 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1654 }
1655 
1656 /*
1657  * pm_set_power: adjusts power level of device.	 Assumes device is power
1658  * manageable & component exists.
1659  *
1660  * Cases which require us to bring up devices we keep up ("wekeepups") for
1661  * backwards compatible devices:
1662  *	component 0 is off and we're bringing it up from 0
1663  *		bring up wekeepup first
1664  *	and recursively when component 0 is off and we bring some other
1665  *	component up from 0
1666  * For devices which are not backward compatible, our dependency notion is much
1667  * simpler.  Unless all components are off, then wekeeps must be on.
1668  * We don't treat component 0 differently.
1669  * Canblock tells how to deal with a direct pm'd device.
1670  * Scan arg tells us if we were called from scan, in which case we don't need
1671  * to go back to the root node and walk down to change power.
1672  */
1673 int
1674 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1675     pm_canblock_t canblock, int scan, int *retp)
1676 {
1677 	PMD_FUNC(pmf, "set_power")
1678 	char		*pathbuf;
1679 	pm_bp_child_pwrchg_t bpc;
1680 	pm_sp_misc_t	pspm;
1681 	int		ret = DDI_SUCCESS;
1682 	int		unused = DDI_SUCCESS;
1683 	dev_info_t	*pdip = ddi_get_parent(dip);
1684 
1685 #ifdef DEBUG
1686 	int		diverted = 0;
1687 
1688 	/*
1689 	 * This prevents operations on the console from calling prom_printf and
1690 	 * either deadlocking or bringing up the console because of debug
1691 	 * output
1692 	 */
1693 	if (dip == cfb_dip) {
1694 		diverted++;
1695 		mutex_enter(&pm_debug_lock);
1696 		pm_divertdebug++;
1697 		mutex_exit(&pm_debug_lock);
1698 	}
1699 #endif
1700 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1701 	    direction == PM_LEVEL_EXACT);
1702 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1703 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1704 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1705 	(void) ddi_pathname(dip, pathbuf);
1706 	bpc.bpc_dip = dip;
1707 	bpc.bpc_path = pathbuf;
1708 	bpc.bpc_comp = comp;
1709 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1710 	bpc.bpc_nlevel = level;
1711 	pspm.pspm_direction = direction;
1712 	pspm.pspm_errnop = retp;
1713 	pspm.pspm_canblock = canblock;
1714 	pspm.pspm_scan = scan;
1715 	bpc.bpc_private = &pspm;
1716 
1717 	/*
1718 	 * If a config operation is being done (we've locked the parent) or
1719 	 * we already hold the power lock (we've locked the node)
1720 	 * then we can operate directly on the node because we have already
1721 	 * brought up all the ancestors, otherwise, we have to go back to the
1722 	 * top of the tree.
1723 	 */
1724 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1725 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1726 		    (void *)&bpc, (void *)&unused);
1727 	else
1728 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1729 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1730 #ifdef DEBUG
1731 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1732 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1733 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1734 	}
1735 	if (diverted) {
1736 		mutex_enter(&pm_debug_lock);
1737 		pm_divertdebug--;
1738 		mutex_exit(&pm_debug_lock);
1739 	}
1740 #endif
1741 	kmem_free(pathbuf, MAXPATHLEN);
1742 	return (ret);
1743 }
1744 
1745 /*
1746  * If holddip is set, then if a dip is found we return with the node held.
1747  *
1748  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1749  * (resolve_pathname), but it does not drive attach.
1750  */
1751 dev_info_t *
1752 pm_name_to_dip(char *pathname, int holddip)
1753 {
1754 	struct pathname pn;
1755 	char		*component;
1756 	dev_info_t	*parent, *child;
1757 	int		circ;
1758 
1759 	if ((pathname == NULL) || (*pathname != '/'))
1760 		return (NULL);
1761 
1762 	/* setup pathname and allocate component */
1763 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1764 		return (NULL);
1765 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1766 
1767 	/* start at top, process '/' component */
1768 	parent = child = ddi_root_node();
1769 	ndi_hold_devi(parent);
1770 	pn_skipslash(&pn);
1771 	ASSERT(i_ddi_devi_attached(parent));
1772 
1773 	/* process components of pathname */
1774 	while (pn_pathleft(&pn)) {
1775 		(void) pn_getcomponent(&pn, component);
1776 
1777 		/* enter parent and search for component child */
1778 		ndi_devi_enter(parent, &circ);
1779 		child = ndi_devi_findchild(parent, component);
1780 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1781 			child = NULL;
1782 			ndi_devi_exit(parent, circ);
1783 			ndi_rele_devi(parent);
1784 			goto out;
1785 		}
1786 
1787 		/* attached child found, hold child and release parent */
1788 		ndi_hold_devi(child);
1789 		ndi_devi_exit(parent, circ);
1790 		ndi_rele_devi(parent);
1791 
1792 		/* child becomes parent, and process next component */
1793 		parent = child;
1794 		pn_skipslash(&pn);
1795 
1796 		/* loop with active ndi_devi_hold of child->parent */
1797 	}
1798 
1799 out:
1800 	pn_free(&pn);
1801 	kmem_free(component, MAXNAMELEN);
1802 
1803 	/* if we are not asked to return with hold, drop current hold */
1804 	if (child && !holddip)
1805 		ndi_rele_devi(child);
1806 	return (child);
1807 }
1808 
1809 /*
1810  * Search for a dependency and mark it unsatisfied
1811  */
1812 static void
1813 pm_unsatisfy(char *keeper, char *kept)
1814 {
1815 	PMD_FUNC(pmf, "unsatisfy")
1816 	pm_pdr_t *dp;
1817 
1818 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1819 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1820 		if (!dp->pdr_isprop) {
1821 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1822 			    (dp->pdr_kept_count > 0) &&
1823 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1824 				if (dp->pdr_satisfied) {
1825 					dp->pdr_satisfied = 0;
1826 					pm_unresolved_deps++;
1827 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1828 					    "pm_unresolved_deps now %d\n", pmf,
1829 					    pm_unresolved_deps))
1830 				}
1831 			}
1832 		}
1833 	}
1834 }
1835 
1836 /*
1837  * Device dip is being un power managed, it keeps up count other devices.
1838  * We need to release any hold we have on the kept devices, and also
1839  * mark the dependency no longer satisfied.
1840  */
1841 static void
1842 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1843 {
1844 	PMD_FUNC(pmf, "unkeeps")
1845 	int i, j;
1846 	dev_info_t *kept;
1847 	dev_info_t *dip;
1848 	struct pm_component *cp;
1849 	int keeper_on = 0, circ;
1850 
1851 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1852 	    keeper, (void *)keptpaths))
1853 	/*
1854 	 * Try to grab keeper. Keeper may have gone away by now,
1855 	 * in this case, used the passed in value pwr
1856 	 */
1857 	dip = pm_name_to_dip(keeper, 1);
1858 	for (i = 0; i < count; i++) {
1859 		/* Release power hold */
1860 		kept = pm_name_to_dip(keptpaths[i], 1);
1861 		if (kept) {
1862 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1863 			    PM_DEVICE(kept), i))
1864 			/*
1865 			 * We need to check if we skipped a bringup here
1866 			 * because we could have failed the bringup
1867 			 * (ie DIRECT PM device) and have
1868 			 * not increment the count.
1869 			 */
1870 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1871 				keeper_on = 0;
1872 				PM_LOCK_POWER(dip, &circ);
1873 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1874 					cp = &DEVI(dip)->devi_pm_components[j];
1875 					if (cur_power(cp)) {
1876 						keeper_on++;
1877 						break;
1878 					}
1879 				}
1880 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1881 					pm_rele_power(kept);
1882 					DEVI(kept)->devi_pm_flags
1883 					    &= ~PMC_SKIP_BRINGUP;
1884 				}
1885 				PM_UNLOCK_POWER(dip, circ);
1886 			} else if (pwr) {
1887 				if (PM_SKBU(kept) == 0) {
1888 					pm_rele_power(kept);
1889 					DEVI(kept)->devi_pm_flags
1890 					    &= ~PMC_SKIP_BRINGUP;
1891 				}
1892 			}
1893 			ddi_release_devi(kept);
1894 		}
1895 		/*
1896 		 * mark this dependency not satisfied
1897 		 */
1898 		pm_unsatisfy(keeper, keptpaths[i]);
1899 	}
1900 	if (dip)
1901 		ddi_release_devi(dip);
1902 }
1903 
1904 /*
1905  * Device kept is being un power managed, it is kept up by keeper.
1906  * We need to mark the dependency no longer satisfied.
1907  */
1908 static void
1909 pm_unkepts(char *kept, char *keeper)
1910 {
1911 	PMD_FUNC(pmf, "unkepts")
1912 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1913 	ASSERT(keeper != NULL);
1914 	/*
1915 	 * mark this dependency not satisfied
1916 	 */
1917 	pm_unsatisfy(keeper, kept);
1918 }
1919 
1920 /*
1921  * Removes dependency information and hold on the kepts, if the path is a
1922  * path of a keeper.
1923  */
1924 static void
1925 pm_free_keeper(char *path, int pwr)
1926 {
1927 	pm_pdr_t *dp;
1928 	int i;
1929 	size_t length;
1930 
1931 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1932 		if (strcmp(dp->pdr_keeper, path) != 0)
1933 			continue;
1934 		/*
1935 		 * Remove all our kept holds and the dependency records,
1936 		 * then free up the kept lists.
1937 		 */
1938 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1939 		if (dp->pdr_kept_count)  {
1940 			for (i = 0; i < dp->pdr_kept_count; i++) {
1941 				length = strlen(dp->pdr_kept_paths[i]);
1942 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1943 			}
1944 			kmem_free(dp->pdr_kept_paths,
1945 			    dp->pdr_kept_count * sizeof (char **));
1946 			dp->pdr_kept_paths = NULL;
1947 			dp->pdr_kept_count = 0;
1948 		}
1949 	}
1950 }
1951 
1952 /*
1953  * Removes the device represented by path from the list of kepts, if the
1954  * path is a path of a kept
1955  */
1956 static void
1957 pm_free_kept(char *path)
1958 {
1959 	pm_pdr_t *dp;
1960 	int i;
1961 	int j, count;
1962 	size_t length;
1963 	char **paths;
1964 
1965 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1966 		if (dp->pdr_kept_count == 0)
1967 			continue;
1968 		count = dp->pdr_kept_count;
1969 		/* Remove this device from the kept path lists */
1970 		for (i = 0; i < count; i++) {
1971 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1972 				pm_unkepts(path, dp->pdr_keeper);
1973 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1974 				kmem_free(dp->pdr_kept_paths[i], length);
1975 				dp->pdr_kept_paths[i] = NULL;
1976 				dp->pdr_kept_count--;
1977 			}
1978 		}
1979 		/* Compact the kept paths array */
1980 		if (dp->pdr_kept_count) {
1981 			length = dp->pdr_kept_count * sizeof (char **);
1982 			paths = kmem_zalloc(length, KM_SLEEP);
1983 			j = 0;
1984 			for (i = 0; i < count; i++) {
1985 				if (dp->pdr_kept_paths[i] != NULL) {
1986 					paths[j] = dp->pdr_kept_paths[i];
1987 					j++;
1988 				}
1989 			}
1990 			ASSERT(j == dp->pdr_kept_count);
1991 		}
1992 		/* Now free the old array and point to the new one */
1993 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1994 		if (dp->pdr_kept_count)
1995 			dp->pdr_kept_paths = paths;
1996 		else
1997 			dp->pdr_kept_paths = NULL;
1998 	}
1999 }
2000 
2001 /*
2002  * Free the dependency information for a device.
2003  */
2004 void
2005 pm_free_keeps(char *path, int pwr)
2006 {
2007 	PMD_FUNC(pmf, "free_keeps")
2008 
2009 #ifdef DEBUG
2010 	int doprdeps = 0;
2011 	void prdeps(char *);
2012 
2013 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2014 	if (pm_debug & PMD_KEEPS) {
2015 		doprdeps = 1;
2016 		prdeps("pm_free_keeps before");
2017 	}
2018 #endif
2019 	/*
2020 	 * First assume we are a keeper and remove all our kepts.
2021 	 */
2022 	pm_free_keeper(path, pwr);
2023 	/*
2024 	 * Now assume we a kept device, and remove all our records.
2025 	 */
2026 	pm_free_kept(path);
2027 #ifdef	DEBUG
2028 	if (doprdeps) {
2029 		prdeps("pm_free_keeps after");
2030 	}
2031 #endif
2032 }
2033 
2034 static int
2035 pm_is_kept(char *path)
2036 {
2037 	pm_pdr_t *dp;
2038 	int i;
2039 
2040 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2041 		if (dp->pdr_kept_count == 0)
2042 			continue;
2043 		for (i = 0; i < dp->pdr_kept_count; i++) {
2044 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2045 				return (1);
2046 		}
2047 	}
2048 	return (0);
2049 }
2050 
2051 static void
2052 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2053 {
2054 	PMD_FUNC(pmf, "hold_rele_power")
2055 	int circ;
2056 
2057 	if ((dip == NULL) ||
2058 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2059 		return;
2060 
2061 	PM_LOCK_POWER(dip, &circ);
2062 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2063 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2064 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2065 
2066 	PM_KUC(dip) += cnt;
2067 
2068 	ASSERT(PM_KUC(dip) >= 0);
2069 	PM_UNLOCK_POWER(dip, circ);
2070 
2071 	if (cnt < 0 && PM_KUC(dip) == 0)
2072 		pm_rescan(dip);
2073 }
2074 
2075 #define	MAX_PPM_HANDLERS	4
2076 
2077 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2078 
2079 struct	ppm_callbacks {
2080 	int (*ppmc_func)(dev_info_t *);
2081 	dev_info_t	*ppmc_dip;
2082 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2083 
2084 
2085 /*
2086  * This routine calls into all the registered ppms to notify them
2087  * that either all components of power-managed devices are at their
2088  * lowest levels or no longer all are at their lowest levels.
2089  */
2090 static void
2091 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2092 {
2093 	struct ppm_callbacks *ppmcp;
2094 	power_req_t power_req;
2095 	int result = 0;
2096 
2097 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2098 	power_req.req.ppm_all_lowest_req.mode = mode;
2099 	mutex_enter(&ppm_lock);
2100 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2101 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2102 		    DDI_CTLOPS_POWER, &power_req, &result);
2103 	mutex_exit(&ppm_lock);
2104 	if (mode == PM_ALL_LOWEST) {
2105 		if (autoS3_enabled) {
2106 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2107 			    "autos3\n"))
2108 			mutex_enter(&srn_clone_lock);
2109 			if (srn_signal) {
2110 				srn_inuse++;
2111 				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2112 				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2113 				srn_inuse--;
2114 			} else {
2115 				PMD(PMD_SX, ("srn_signal NULL\n"))
2116 			}
2117 			mutex_exit(&srn_clone_lock);
2118 		} else {
2119 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2120 			    "disabled\n"));
2121 		}
2122 	}
2123 }
2124 
2125 static void
2126 pm_set_pm_info(dev_info_t *dip, void *value)
2127 {
2128 	DEVI(dip)->devi_pm_info = value;
2129 }
2130 
2131 pm_rsvp_t *pm_blocked_list;
2132 
2133 /*
2134  * Look up an entry in the blocked list by dip and component
2135  */
2136 static pm_rsvp_t *
2137 pm_rsvp_lookup(dev_info_t *dip, int comp)
2138 {
2139 	pm_rsvp_t *p;
2140 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2141 	for (p = pm_blocked_list; p; p = p->pr_next)
2142 		if (p->pr_dip == dip && p->pr_comp == comp) {
2143 			return (p);
2144 		}
2145 	return (NULL);
2146 }
2147 
2148 /*
2149  * Called when a device which is direct power managed (or the parent or
2150  * dependent of such a device) changes power, or when a pm clone is closed
2151  * that was direct power managing a device.  This call results in pm_blocked()
2152  * (below) returning.
2153  */
2154 void
2155 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2156 {
2157 	PMD_FUNC(pmf, "proceed")
2158 	pm_rsvp_t *found = NULL;
2159 	pm_rsvp_t *p;
2160 
2161 	mutex_enter(&pm_rsvp_lock);
2162 	switch (cmd) {
2163 	/*
2164 	 * we're giving up control, let any pending op continue
2165 	 */
2166 	case PMP_RELEASE:
2167 		for (p = pm_blocked_list; p; p = p->pr_next) {
2168 			if (dip == p->pr_dip) {
2169 				p->pr_retval = PMP_RELEASE;
2170 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2171 				    pmf, PM_DEVICE(dip)))
2172 				cv_signal(&p->pr_cv);
2173 			}
2174 		}
2175 		break;
2176 
2177 	/*
2178 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2179 	 * succeed and a non-matching request for the same device fail
2180 	 */
2181 	case PMP_SETPOWER:
2182 		found = pm_rsvp_lookup(dip, comp);
2183 		if (!found)	/* if driver not waiting */
2184 			break;
2185 		/*
2186 		 * This cannot be pm_lower_power, since that can only happen
2187 		 * during detach or probe
2188 		 */
2189 		if (found->pr_newlevel <= newlevel) {
2190 			found->pr_retval = PMP_SUCCEED;
2191 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2192 			    PM_DEVICE(dip)))
2193 		} else {
2194 			found->pr_retval = PMP_FAIL;
2195 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2196 			    PM_DEVICE(dip)))
2197 		}
2198 		cv_signal(&found->pr_cv);
2199 		break;
2200 
2201 	default:
2202 		panic("pm_proceed unknown cmd %d", cmd);
2203 	}
2204 	mutex_exit(&pm_rsvp_lock);
2205 }
2206 
2207 /*
2208  * This routine dispatches new work to the dependency thread. Caller must
2209  * be prepared to block for memory if necessary.
2210  */
2211 void
2212 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2213     int *res, int cached_pwr)
2214 {
2215 	pm_dep_wk_t	*new_work;
2216 
2217 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2218 	new_work->pdw_type = cmd;
2219 	new_work->pdw_wait = wait;
2220 	new_work->pdw_done = 0;
2221 	new_work->pdw_ret = 0;
2222 	new_work->pdw_pwr = cached_pwr;
2223 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2224 	if (keeper != NULL) {
2225 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2226 		    KM_SLEEP);
2227 		(void) strcpy(new_work->pdw_keeper, keeper);
2228 	}
2229 	if (kept != NULL) {
2230 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2231 		(void) strcpy(new_work->pdw_kept, kept);
2232 	}
2233 	mutex_enter(&pm_dep_thread_lock);
2234 	if (pm_dep_thread_workq == NULL) {
2235 		pm_dep_thread_workq = new_work;
2236 		pm_dep_thread_tail = new_work;
2237 		new_work->pdw_next = NULL;
2238 	} else {
2239 		pm_dep_thread_tail->pdw_next = new_work;
2240 		pm_dep_thread_tail = new_work;
2241 		new_work->pdw_next = NULL;
2242 	}
2243 	cv_signal(&pm_dep_thread_cv);
2244 	/* If caller asked for it, wait till it is done. */
2245 	if (wait)  {
2246 		while (!new_work->pdw_done)
2247 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2248 		/*
2249 		 * Pass return status, if any, back.
2250 		 */
2251 		if (res != NULL)
2252 			*res = new_work->pdw_ret;
2253 		/*
2254 		 * If we asked to wait, it is our job to free the request
2255 		 * structure.
2256 		 */
2257 		if (new_work->pdw_keeper)
2258 			kmem_free(new_work->pdw_keeper,
2259 			    strlen(new_work->pdw_keeper) + 1);
2260 		if (new_work->pdw_kept)
2261 			kmem_free(new_work->pdw_kept,
2262 			    strlen(new_work->pdw_kept) + 1);
2263 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2264 	}
2265 	mutex_exit(&pm_dep_thread_lock);
2266 }
2267 
2268 /*
2269  * Release the pm resource for this device.
2270  */
2271 void
2272 pm_rem_info(dev_info_t *dip)
2273 {
2274 	PMD_FUNC(pmf, "rem_info")
2275 	int		i, count = 0;
2276 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2277 	dev_info_t	*pdip = ddi_get_parent(dip);
2278 	char		*pathbuf;
2279 	int		work_type = PM_DEP_WK_DETACH;
2280 
2281 	ASSERT(info);
2282 
2283 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2284 	if (PM_ISDIRECT(dip)) {
2285 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2286 		ASSERT(info->pmi_clone);
2287 		info->pmi_clone = 0;
2288 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2289 	}
2290 	ASSERT(!PM_GET_PM_SCAN(dip));
2291 
2292 	/*
2293 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2294 	 * Others we check all components.  BC node that has already
2295 	 * called pm_destroy_components() has zero component count.
2296 	 * Parents that get notification are not adjusted because their
2297 	 * kidsupcnt is always 0 (or 1 during configuration).
2298 	 */
2299 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2300 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2301 
2302 	/* node is detached, so we can examine power without locking */
2303 	if (PM_ISBC(dip)) {
2304 		count = (PM_CURPOWER(dip, 0) != 0);
2305 	} else {
2306 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2307 			count += (PM_CURPOWER(dip, i) != 0);
2308 	}
2309 
2310 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2311 		e_pm_hold_rele_power(pdip, -count);
2312 
2313 	/* Schedule a request to clean up dependency records */
2314 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2315 	(void) ddi_pathname(dip, pathbuf);
2316 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2317 	    PM_DEP_NOWAIT, NULL, (count > 0));
2318 	kmem_free(pathbuf, MAXPATHLEN);
2319 
2320 	/*
2321 	 * Adjust the pm_comps_notlowest count since this device is
2322 	 * not being power-managed anymore.
2323 	 */
2324 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2325 		if (PM_CURPOWER(dip, i) != 0)
2326 			PM_DECR_NOTLOWEST(dip);
2327 	}
2328 	/*
2329 	 * Once we clear the info pointer, it looks like it is not power
2330 	 * managed to everybody else.
2331 	 */
2332 	pm_set_pm_info(dip, NULL);
2333 	kmem_free(info, sizeof (pm_info_t));
2334 }
2335 
2336 int
2337 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2338 {
2339 	int components = PM_NUMCMPTS(dip);
2340 	int *bufp;
2341 	size_t size;
2342 	int i;
2343 
2344 	if (components <= 0) {
2345 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2346 		    "can't get normal power values\n", PM_DEVICE(dip));
2347 		return (DDI_FAILURE);
2348 	} else {
2349 		size = components * sizeof (int);
2350 		bufp = kmem_alloc(size, KM_SLEEP);
2351 		for (i = 0; i < components; i++) {
2352 			bufp[i] = pm_get_normal_power(dip, i);
2353 		}
2354 	}
2355 	*length = size;
2356 	*valuep = bufp;
2357 	return (DDI_SUCCESS);
2358 }
2359 
2360 static int
2361 pm_reset_timestamps(dev_info_t *dip, void *arg)
2362 {
2363 	_NOTE(ARGUNUSED(arg))
2364 
2365 	int components;
2366 	int	i;
2367 
2368 	if (!PM_GET_PM_INFO(dip))
2369 		return (DDI_WALK_CONTINUE);
2370 	components = PM_NUMCMPTS(dip);
2371 	ASSERT(components > 0);
2372 	PM_LOCK_BUSY(dip);
2373 	for (i = 0; i < components; i++) {
2374 		struct pm_component *cp;
2375 		/*
2376 		 * If the component was not marked as busy,
2377 		 * reset its timestamp to now.
2378 		 */
2379 		cp = PM_CP(dip, i);
2380 		if (cp->pmc_timestamp)
2381 			cp->pmc_timestamp = gethrestime_sec();
2382 	}
2383 	PM_UNLOCK_BUSY(dip);
2384 	return (DDI_WALK_CONTINUE);
2385 }
2386 
2387 /*
2388  * Convert a power level to an index into the levels array (or
2389  * just PM_LEVEL_UNKNOWN in that special case).
2390  */
2391 static int
2392 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2393 {
2394 	PMD_FUNC(pmf, "level_to_index")
2395 	int i;
2396 	int limit = cp->pmc_comp.pmc_numlevels;
2397 	int *ip = cp->pmc_comp.pmc_lvals;
2398 
2399 	if (level == PM_LEVEL_UNKNOWN)
2400 		return (level);
2401 
2402 	for (i = 0; i < limit; i++) {
2403 		if (level == *ip++) {
2404 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2405 			    pmf, PM_DEVICE(dip),
2406 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2407 			return (i);
2408 		}
2409 	}
2410 	panic("pm_level_to_index: level %d not found for device "
2411 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2412 	/*NOTREACHED*/
2413 }
2414 
2415 /*
2416  * Internal function to set current power level
2417  */
2418 static void
2419 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2420 {
2421 	PMD_FUNC(pmf, "set_cur_pwr")
2422 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2423 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2424 
2425 	/*
2426 	 * Nothing to adjust if current & new levels are the same.
2427 	 */
2428 	if (curpwr != PM_LEVEL_UNKNOWN &&
2429 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2430 		return;
2431 
2432 	/*
2433 	 * Keep the count for comps doing transition to/from lowest
2434 	 * level.
2435 	 */
2436 	if (curpwr == 0) {
2437 		PM_INCR_NOTLOWEST(dip);
2438 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2439 		PM_DECR_NOTLOWEST(dip);
2440 	}
2441 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2442 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2443 }
2444 
2445 /*
2446  * This is the default method of setting the power of a device if no ppm
2447  * driver has claimed it.
2448  */
2449 int
2450 pm_power(dev_info_t *dip, int comp, int level)
2451 {
2452 	PMD_FUNC(pmf, "power")
2453 	struct dev_ops	*ops;
2454 	int		(*fn)(dev_info_t *, int, int);
2455 	struct pm_component *cp = PM_CP(dip, comp);
2456 	int retval;
2457 	pm_info_t *info = PM_GET_PM_INFO(dip);
2458 	static int pm_phc_impl(dev_info_t *, int, int, int);
2459 
2460 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2461 	    PM_DEVICE(dip), comp, level))
2462 	if (!(ops = ddi_get_driver(dip))) {
2463 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2464 		    PM_DEVICE(dip)))
2465 		return (DDI_FAILURE);
2466 	}
2467 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2468 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2469 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2470 		    (!fn ? " devo_power NULL" : "")))
2471 		return (DDI_FAILURE);
2472 	}
2473 	cp->pmc_flags |= PM_POWER_OP;
2474 	retval = (*fn)(dip, comp, level);
2475 	cp->pmc_flags &= ~PM_POWER_OP;
2476 	if (retval == DDI_SUCCESS) {
2477 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2478 		return (DDI_SUCCESS);
2479 	}
2480 
2481 	/*
2482 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2483 	 * updated only the power level of the component.  If our attempt to
2484 	 * set the device new to a power level above has failed we sync the
2485 	 * total power state via phc code now.
2486 	 */
2487 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2488 		int phc_lvl =
2489 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2490 
2491 		ASSERT(info);
2492 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2493 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2494 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2495 	}
2496 
2497 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2498 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2499 	    level, power_val_to_string(cp, level)));
2500 	return (DDI_FAILURE);
2501 }
2502 
2503 int
2504 pm_unmanage(dev_info_t *dip)
2505 {
2506 	PMD_FUNC(pmf, "unmanage")
2507 	power_req_t power_req;
2508 	int result, retval = 0;
2509 
2510 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2511 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2512 	    PM_DEVICE(dip)))
2513 	power_req.request_type = PMR_PPM_UNMANAGE;
2514 	power_req.req.ppm_config_req.who = dip;
2515 	if (pm_ppm_claimed(dip))
2516 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2517 		    &power_req, &result);
2518 #ifdef DEBUG
2519 	else
2520 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2521 		    &power_req, &result);
2522 #endif
2523 	ASSERT(retval == DDI_SUCCESS);
2524 	pm_rem_info(dip);
2525 	return (retval);
2526 }
2527 
2528 int
2529 pm_raise_power(dev_info_t *dip, int comp, int level)
2530 {
2531 	if (level < 0)
2532 		return (DDI_FAILURE);
2533 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2534 	    !e_pm_valid_power(dip, comp, level))
2535 		return (DDI_FAILURE);
2536 
2537 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2538 }
2539 
2540 int
2541 pm_lower_power(dev_info_t *dip, int comp, int level)
2542 {
2543 	PMD_FUNC(pmf, "pm_lower_power")
2544 
2545 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2546 	    !e_pm_valid_power(dip, comp, level)) {
2547 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2548 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2549 		return (DDI_FAILURE);
2550 	}
2551 
2552 	if (!DEVI_IS_DETACHING(dip)) {
2553 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2554 		    pmf, PM_DEVICE(dip)))
2555 		return (DDI_FAILURE);
2556 	}
2557 
2558 	/*
2559 	 * If we don't care about saving power, or we're treating this node
2560 	 * specially, then this is a no-op
2561 	 */
2562 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2563 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2564 		    pmf, PM_DEVICE(dip),
2565 		    !autopm_enabled ? "!autopm_enabled " : "",
2566 		    !PM_CPUPM_ENABLED ? "!cpupm_enabled " : "",
2567 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2568 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2569 		return (DDI_SUCCESS);
2570 	}
2571 
2572 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2573 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2574 		    PM_DEVICE(dip)))
2575 		return (DDI_FAILURE);
2576 	}
2577 	return (DDI_SUCCESS);
2578 }
2579 
2580 /*
2581  * Find the entries struct for a given dip in the blocked list, return it locked
2582  */
2583 static psce_t *
2584 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2585 {
2586 	pscc_t *p;
2587 	psce_t *psce;
2588 
2589 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2590 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2591 		if (p->pscc_dip == dip) {
2592 			*psccp = p;
2593 			psce = p->pscc_entries;
2594 			mutex_enter(&psce->psce_lock);
2595 			ASSERT(psce);
2596 			rw_exit(&pm_pscc_direct_rwlock);
2597 			return (psce);
2598 		}
2599 	}
2600 	rw_exit(&pm_pscc_direct_rwlock);
2601 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2602 	/*NOTREACHED*/
2603 }
2604 
2605 /*
2606  * Write an entry indicating a power level change (to be passed to a process
2607  * later) in the given psce.
2608  * If we were called in the path that brings up the console fb in the
2609  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2610  * we create a record that has a size of -1, a physaddr of NULL, and that
2611  * has the overflow flag set.
2612  */
2613 static int
2614 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2615     int old, int which, pm_canblock_t canblock)
2616 {
2617 	char	buf[MAXNAMELEN];
2618 	pm_state_change_t *p;
2619 	size_t	size;
2620 	caddr_t physpath = NULL;
2621 	int	overrun = 0;
2622 
2623 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2624 	(void) ddi_pathname(dip, buf);
2625 	size = strlen(buf) + 1;
2626 	p = psce->psce_in;
2627 	if (canblock == PM_CANBLOCK_BYPASS) {
2628 		physpath = kmem_alloc(size, KM_NOSLEEP);
2629 		if (physpath == NULL) {
2630 			/*
2631 			 * mark current entry as overrun
2632 			 */
2633 			p->flags |= PSC_EVENT_LOST;
2634 			size = (size_t)-1;
2635 		}
2636 	} else
2637 		physpath = kmem_alloc(size, KM_SLEEP);
2638 	if (p->size) {	/* overflow; mark the next entry */
2639 		if (p->size != (size_t)-1)
2640 			kmem_free(p->physpath, p->size);
2641 		ASSERT(psce->psce_out == p);
2642 		if (p == psce->psce_last) {
2643 			psce->psce_first->flags |= PSC_EVENT_LOST;
2644 			psce->psce_out = psce->psce_first;
2645 		} else {
2646 			(p + 1)->flags |= PSC_EVENT_LOST;
2647 			psce->psce_out = (p + 1);
2648 		}
2649 		overrun++;
2650 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2651 		p->flags |= PSC_EVENT_LOST;
2652 		p->size = 0;
2653 		p->physpath = NULL;
2654 	}
2655 	if (which == PSC_INTEREST) {
2656 		mutex_enter(&pm_compcnt_lock);
2657 		if (pm_comps_notlowest == 0)
2658 			p->flags |= PSC_ALL_LOWEST;
2659 		else
2660 			p->flags &= ~PSC_ALL_LOWEST;
2661 		mutex_exit(&pm_compcnt_lock);
2662 	}
2663 	p->event = event;
2664 	p->timestamp = gethrestime_sec();
2665 	p->component = comp;
2666 	p->old_level = old;
2667 	p->new_level = new;
2668 	p->physpath = physpath;
2669 	p->size = size;
2670 	if (physpath != NULL)
2671 		(void) strcpy(p->physpath, buf);
2672 	if (p == psce->psce_last)
2673 		psce->psce_in = psce->psce_first;
2674 	else
2675 		psce->psce_in = ++p;
2676 	mutex_exit(&psce->psce_lock);
2677 	return (overrun);
2678 }
2679 
2680 /*
2681  * Find the next entry on the interest list.  We keep a pointer to the item we
2682  * last returned in the user's cooke.  Returns a locked entries struct.
2683  */
2684 static psce_t *
2685 psc_interest(void **cookie, pscc_t **psccp)
2686 {
2687 	pscc_t *pscc;
2688 	pscc_t **cookiep = (pscc_t **)cookie;
2689 
2690 	if (*cookiep == NULL)
2691 		pscc = pm_pscc_interest;
2692 	else
2693 		pscc = (*cookiep)->pscc_next;
2694 	if (pscc) {
2695 		*cookiep = pscc;
2696 		*psccp = pscc;
2697 		mutex_enter(&pscc->pscc_entries->psce_lock);
2698 		return (pscc->pscc_entries);
2699 	} else {
2700 		return (NULL);
2701 	}
2702 }
2703 
2704 /*
2705  * Create an entry for a process to pick up indicating a power level change.
2706  */
2707 static void
2708 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2709     int newlevel, int oldlevel, pm_canblock_t canblock)
2710 {
2711 	PMD_FUNC(pmf, "enqueue_notify")
2712 	pscc_t	*pscc;
2713 	psce_t	*psce;
2714 	void		*cookie = NULL;
2715 	int	overrun;
2716 
2717 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2718 	switch (cmd) {
2719 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2720 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2721 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2722 		psce = pm_psc_dip_to_direct(dip, &pscc);
2723 		ASSERT(psce);
2724 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2725 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2726 		    pm_poll_cnt[pscc->pscc_clone]))
2727 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2728 		    PSC_DIRECT, canblock);
2729 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2730 		mutex_enter(&pm_clone_lock);
2731 		if (!overrun)
2732 			pm_poll_cnt[pscc->pscc_clone]++;
2733 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2734 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2735 		mutex_exit(&pm_clone_lock);
2736 		break;
2737 	case PSC_HAS_CHANGED:
2738 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2739 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2740 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2741 			psce = pm_psc_dip_to_direct(dip, &pscc);
2742 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2743 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2744 			    pm_poll_cnt[pscc->pscc_clone]))
2745 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2746 			    oldlevel, PSC_DIRECT, canblock);
2747 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2748 			mutex_enter(&pm_clone_lock);
2749 			if (!overrun)
2750 				pm_poll_cnt[pscc->pscc_clone]++;
2751 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2752 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2753 			mutex_exit(&pm_clone_lock);
2754 		}
2755 		mutex_enter(&pm_clone_lock);
2756 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2757 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2758 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2759 			    oldlevel, PSC_INTEREST, canblock);
2760 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2761 		}
2762 		rw_exit(&pm_pscc_interest_rwlock);
2763 		mutex_exit(&pm_clone_lock);
2764 		break;
2765 #ifdef DEBUG
2766 	default:
2767 		ASSERT(0);
2768 #endif
2769 	}
2770 }
2771 
2772 static void
2773 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2774 {
2775 	if (listp) {
2776 		pm_ppm_devlist_t *p, *next = NULL;
2777 
2778 		for (p = *listp; p; p = next) {
2779 			next = p->ppd_next;
2780 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2781 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2782 			    canblock);
2783 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2784 		}
2785 		*listp = NULL;
2786 	}
2787 }
2788 
2789 /*
2790  * Try to get the power locks of the parent node and target (child)
2791  * node.  Return true if successful (with both locks held) or false
2792  * (with no locks held).
2793  */
2794 static int
2795 pm_try_parent_child_locks(dev_info_t *pdip,
2796     dev_info_t *dip, int *pcircp, int *circp)
2797 {
2798 	if (ndi_devi_tryenter(pdip, pcircp))
2799 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2800 			return (1);
2801 		} else {
2802 			ndi_devi_exit(pdip, *pcircp);
2803 		}
2804 	return (0);
2805 }
2806 
2807 /*
2808  * Determine if the power lock owner is blocked by current thread.
2809  * returns :
2810  * 	1 - If the thread owning the effective power lock (the first lock on
2811  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2812  *          a mutex held by the current thread.
2813  *
2814  *	0 - otherwise
2815  *
2816  * Note : This function is called by pm_power_has_changed to determine whether
2817  * it is executing in parallel with pm_set_power.
2818  */
2819 static int
2820 pm_blocked_by_us(dev_info_t *dip)
2821 {
2822 	power_req_t power_req;
2823 	kthread_t *owner;
2824 	int result;
2825 	kmutex_t *mp;
2826 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2827 
2828 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2829 	power_req.req.ppm_power_lock_owner_req.who = dip;
2830 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2831 	    DDI_SUCCESS) {
2832 		/*
2833 		 * It is assumed that if the device is claimed by ppm, ppm
2834 		 * will always implement this request type and it'll always
2835 		 * return success. We panic here, if it fails.
2836 		 */
2837 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2838 		    PM_DEVICE(dip));
2839 		/*NOTREACHED*/
2840 	}
2841 
2842 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2843 	    owner->t_state == TS_SLEEP &&
2844 	    owner->t_sobj_ops &&
2845 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2846 	    (mp = (kmutex_t *)owner->t_wchan) &&
2847 	    mutex_owner(mp) == curthread)
2848 		return (1);
2849 
2850 	return (0);
2851 }
2852 
2853 /*
2854  * Notify parent which wants to hear about a child's power changes.
2855  */
2856 static void
2857 pm_notify_parent(dev_info_t *dip,
2858     dev_info_t *pdip, int comp, int old_level, int level)
2859 {
2860 	pm_bp_has_changed_t bphc;
2861 	pm_sp_misc_t pspm;
2862 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2863 	int result = DDI_SUCCESS;
2864 
2865 	bphc.bphc_dip = dip;
2866 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2867 	bphc.bphc_comp = comp;
2868 	bphc.bphc_olevel = old_level;
2869 	bphc.bphc_nlevel = level;
2870 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2871 	pspm.pspm_scan = 0;
2872 	bphc.bphc_private = &pspm;
2873 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2874 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2875 	kmem_free(pathbuf, MAXPATHLEN);
2876 }
2877 
2878 /*
2879  * Check if we need to resume a BC device, and make the attach call as required.
2880  */
2881 static int
2882 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2883 {
2884 	int ret = DDI_SUCCESS;
2885 
2886 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2887 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2888 		/* ppm is not interested in DDI_PM_RESUME */
2889 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2890 			/* XXX Should we mark it resumed, */
2891 			/* even though it failed? */
2892 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2893 			    PM_NAME(dip), PM_ADDR(dip));
2894 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2895 	}
2896 
2897 	return (ret);
2898 }
2899 
2900 /*
2901  * Tests outside the lock to see if we should bother to enqueue an entry
2902  * for any watching process.  If yes, then caller will take the lock and
2903  * do the full protocol
2904  */
2905 static int
2906 pm_watchers()
2907 {
2908 	if (pm_processes_stopped)
2909 		return (0);
2910 	return (pm_pscc_direct || pm_pscc_interest);
2911 }
2912 
2913 /*
2914  * A driver is reporting that the power of one of its device's components
2915  * has changed.  Update the power state accordingly.
2916  */
2917 int
2918 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2919 {
2920 	PMD_FUNC(pmf, "pm_power_has_changed")
2921 	int ret;
2922 	dev_info_t *pdip = ddi_get_parent(dip);
2923 	struct pm_component *cp;
2924 	int blocked, circ, pcirc, old_level;
2925 	static int pm_phc_impl(dev_info_t *, int, int, int);
2926 
2927 	if (level < 0) {
2928 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2929 		    PM_DEVICE(dip), level))
2930 		return (DDI_FAILURE);
2931 	}
2932 
2933 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2934 	    PM_DEVICE(dip), comp, level))
2935 
2936 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2937 	    !e_pm_valid_power(dip, comp, level))
2938 		return (DDI_FAILURE);
2939 
2940 	/*
2941 	 * A driver thread calling pm_power_has_changed and another thread
2942 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2943 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2944 	 * this specific deadlock.  If we can't get the lock immediately
2945 	 * and we are deadlocked, just update the component's level, do
2946 	 * notifications, and return.  We intend to update the total power
2947 	 * state later (if the other thread fails to set power to the
2948 	 * desired level).  If we were called because of a power change on a
2949 	 * component that isn't involved in a set_power op, update all state
2950 	 * immediately.
2951 	 */
2952 	cp = PM_CP(dip, comp);
2953 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2954 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2955 		    (cp->pmc_flags & PM_POWER_OP)) {
2956 			if (pm_watchers()) {
2957 				mutex_enter(&pm_rsvp_lock);
2958 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2959 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2960 				mutex_exit(&pm_rsvp_lock);
2961 			}
2962 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2963 				pm_notify_parent(dip,
2964 				    pdip, comp, cur_power(cp), level);
2965 			(void) pm_check_and_resume(dip,
2966 			    comp, cur_power(cp), level);
2967 
2968 			/*
2969 			 * Stash the old power index, update curpwr, and flag
2970 			 * that the total power state needs to be synched.
2971 			 */
2972 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2973 			/*
2974 			 * Several pm_power_has_changed calls could arrive
2975 			 * while the set power path remains blocked.  Keep the
2976 			 * oldest old power and the newest new power of any
2977 			 * sequence of phc calls which arrive during deadlock.
2978 			 */
2979 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2980 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2981 			cp->pmc_cur_pwr =
2982 			    pm_level_to_index(dip, cp, level);
2983 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2984 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2985 			return (DDI_SUCCESS);
2986 		} else
2987 			if (blocked) {	/* blocked, but different cmpt? */
2988 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2989 					cmn_err(CE_NOTE,
2990 					    "!pm: parent kuc not updated due "
2991 					    "to possible deadlock.\n");
2992 					return (pm_phc_impl(dip,
2993 					    comp, level, 1));
2994 				}
2995 				old_level = cur_power(cp);
2996 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
2997 				    (!PM_ISBC(dip) || comp == 0) &&
2998 				    POWERING_ON(old_level, level))
2999 					pm_hold_power(pdip);
3000 				ret = pm_phc_impl(dip, comp, level, 1);
3001 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3002 					if ((!PM_ISBC(dip) ||
3003 					    comp == 0) && level == 0 &&
3004 					    old_level != PM_LEVEL_UNKNOWN)
3005 						pm_rele_power(pdip);
3006 				}
3007 				ndi_devi_exit(pdip, pcirc);
3008 				/* child lock not held: deadlock */
3009 				return (ret);
3010 			}
3011 		delay(1);
3012 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3013 	}
3014 
3015 	/* non-deadlock case */
3016 	old_level = cur_power(cp);
3017 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3018 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3019 		pm_hold_power(pdip);
3020 	ret = pm_phc_impl(dip, comp, level, 1);
3021 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3022 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3023 		    old_level != PM_LEVEL_UNKNOWN)
3024 			pm_rele_power(pdip);
3025 	}
3026 	PM_UNLOCK_POWER(dip, circ);
3027 	ndi_devi_exit(pdip, pcirc);
3028 	return (ret);
3029 }
3030 
3031 /*
3032  * Account for power changes to a component of the the console frame buffer.
3033  * If lowering power from full (or "unkown", which is treatd as full)
3034  * we will increment the "components off" count of the fb device.
3035  * Subsequent lowering of the same component doesn't affect the count.  If
3036  * raising a component back to full power, we will decrement the count.
3037  *
3038  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3039  */
3040 static int
3041 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3042 {
3043 	struct pm_component *cp = PM_CP(dip, cmpt);
3044 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3045 	int want_normal = (new == cp->pmc_norm_pwr);
3046 	int incr = 0;
3047 
3048 	if (on && !want_normal)
3049 		incr = 1;
3050 	else if (!on && want_normal)
3051 		incr = -1;
3052 	return (incr);
3053 }
3054 
3055 /*
3056  * Adjust the count of console frame buffer components < full power.
3057  */
3058 static void
3059 update_comps_off(int incr, dev_info_t *dip)
3060 {
3061 		mutex_enter(&pm_cfb_lock);
3062 		pm_cfb_comps_off += incr;
3063 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3064 		mutex_exit(&pm_cfb_lock);
3065 }
3066 
3067 /*
3068  * Update the power state in the framework (via the ppm).  The 'notify'
3069  * argument tells whether to notify watchers.  Power lock is already held.
3070  */
3071 static int
3072 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3073 {
3074 	PMD_FUNC(pmf, "phc_impl")
3075 	power_req_t power_req;
3076 	int i, dodeps = 0;
3077 	dev_info_t *pdip = ddi_get_parent(dip);
3078 	int result;
3079 	int old_level;
3080 	struct pm_component *cp;
3081 	int incr = 0;
3082 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3083 	int work_type = 0;
3084 	char *pathbuf;
3085 
3086 	/* Must use "official" power level for this test. */
3087 	cp = PM_CP(dip, comp);
3088 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3089 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3090 	if (old_level != PM_LEVEL_UNKNOWN)
3091 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3092 
3093 	if (level == old_level) {
3094 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3095 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3096 		return (DDI_SUCCESS);
3097 	}
3098 
3099 	/*
3100 	 * Tell ppm about this.
3101 	 */
3102 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3103 	power_req.req.ppm_notify_level_req.who = dip;
3104 	power_req.req.ppm_notify_level_req.cmpt = comp;
3105 	power_req.req.ppm_notify_level_req.new_level = level;
3106 	power_req.req.ppm_notify_level_req.old_level = old_level;
3107 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3108 	    &result) == DDI_FAILURE) {
3109 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3110 		    pmf, PM_DEVICE(dip), level))
3111 		return (DDI_FAILURE);
3112 	}
3113 
3114 	if (PM_IS_CFB(dip)) {
3115 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3116 
3117 		if (incr) {
3118 			update_comps_off(incr, dip);
3119 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3120 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3121 			    comp, old_level, level, pm_cfb_comps_off))
3122 		}
3123 	}
3124 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3125 	result = DDI_SUCCESS;
3126 
3127 	if (notify) {
3128 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3129 			pm_notify_parent(dip, pdip, comp, old_level, level);
3130 		(void) pm_check_and_resume(dip, comp, old_level, level);
3131 	}
3132 
3133 	/*
3134 	 * Decrement the dependency kidsup count if we turn a device
3135 	 * off.
3136 	 */
3137 	if (POWERING_OFF(old_level, level)) {
3138 		dodeps = 1;
3139 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3140 			cp = PM_CP(dip, i);
3141 			if (cur_power(cp)) {
3142 				dodeps = 0;
3143 				break;
3144 			}
3145 		}
3146 		if (dodeps)
3147 			work_type = PM_DEP_WK_POWER_OFF;
3148 	}
3149 
3150 	/*
3151 	 * Increment if we turn it on. Check to see
3152 	 * if other comps are already on, if so,
3153 	 * dont increment.
3154 	 */
3155 	if (POWERING_ON(old_level, level)) {
3156 		dodeps = 1;
3157 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3158 			cp = PM_CP(dip, i);
3159 			if (comp == i)
3160 				continue;
3161 			/* -1 also treated as 0 in this case */
3162 			if (cur_power(cp) > 0) {
3163 				dodeps = 0;
3164 				break;
3165 			}
3166 		}
3167 		if (dodeps)
3168 			work_type = PM_DEP_WK_POWER_ON;
3169 	}
3170 
3171 	if (dodeps) {
3172 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3173 		(void) ddi_pathname(dip, pathbuf);
3174 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3175 		    PM_DEP_NOWAIT, NULL, 0);
3176 		kmem_free(pathbuf, MAXPATHLEN);
3177 	}
3178 
3179 	if (notify && (level != old_level) && pm_watchers()) {
3180 		mutex_enter(&pm_rsvp_lock);
3181 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3182 		    PM_CANBLOCK_BLOCK);
3183 		mutex_exit(&pm_rsvp_lock);
3184 	}
3185 
3186 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3187 	pm_rescan(dip);
3188 	return (DDI_SUCCESS);
3189 }
3190 
3191 /*
3192  * This function is called at startup time to notify pm of the existence
3193  * of any platform power managers for this platform.  As a result of
3194  * this registration, each function provided will be called each time
3195  * a device node is attached, until one returns true, and it must claim the
3196  * device node (by returning non-zero) if it wants to be involved in the
3197  * node's power management.  If it does claim the node, then it will
3198  * subsequently be notified of attach and detach events.
3199  *
3200  */
3201 
3202 int
3203 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3204 {
3205 	PMD_FUNC(pmf, "register_ppm")
3206 	struct ppm_callbacks *ppmcp;
3207 	pm_component_t *cp;
3208 	int i, pwr, result, circ;
3209 	power_req_t power_req;
3210 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3211 	void pm_ppm_claim(dev_info_t *);
3212 
3213 	mutex_enter(&ppm_lock);
3214 	ppmcp = ppm_callbacks;
3215 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3216 		if (ppmcp->ppmc_func == NULL) {
3217 			ppmcp->ppmc_func = func;
3218 			ppmcp->ppmc_dip = dip;
3219 			break;
3220 		}
3221 	}
3222 	mutex_exit(&ppm_lock);
3223 
3224 	if (i >= MAX_PPM_HANDLERS)
3225 		return (DDI_FAILURE);
3226 	while ((dip = ddi_get_parent(dip)) != NULL) {
3227 		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3228 			continue;
3229 		pm_ppm_claim(dip);
3230 		/* don't bother with the not power-manageable nodes */
3231 		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3232 			/*
3233 			 * Tell ppm about this.
3234 			 */
3235 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3236 			p->old_level = PM_LEVEL_UNKNOWN;
3237 			p->who = dip;
3238 			PM_LOCK_POWER(dip, &circ);
3239 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3240 				cp = PM_CP(dip, i);
3241 				pwr = cp->pmc_cur_pwr;
3242 				if (pwr != PM_LEVEL_UNKNOWN) {
3243 					p->cmpt = i;
3244 					p->new_level = cur_power(cp);
3245 					p->old_level = PM_LEVEL_UNKNOWN;
3246 					if (pm_ctlops(PPM(dip), dip,
3247 					    DDI_CTLOPS_POWER, &power_req,
3248 					    &result) == DDI_FAILURE) {
3249 						PMD(PMD_FAIL, ("%s: pc "
3250 						    "%s@%s(%s#%d) to %d "
3251 						    "fails\n", pmf,
3252 						    PM_DEVICE(dip), pwr))
3253 					}
3254 				}
3255 			}
3256 			PM_UNLOCK_POWER(dip, circ);
3257 		}
3258 	}
3259 	return (DDI_SUCCESS);
3260 }
3261 
3262 /*
3263  * Call the ppm's that have registered and adjust the devinfo struct as
3264  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3265  * by each ppm are assumed to be disjoint.
3266  */
3267 void
3268 pm_ppm_claim(dev_info_t *dip)
3269 {
3270 	struct ppm_callbacks *ppmcp;
3271 
3272 	if (PPM(dip)) {
3273 		return;
3274 	}
3275 	mutex_enter(&ppm_lock);
3276 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3277 		if ((*ppmcp->ppmc_func)(dip)) {
3278 			DEVI(dip)->devi_pm_ppm =
3279 			    (struct dev_info *)ppmcp->ppmc_dip;
3280 			mutex_exit(&ppm_lock);
3281 			return;
3282 		}
3283 	}
3284 	mutex_exit(&ppm_lock);
3285 }
3286 
3287 /*
3288  * Node is being detached so stop autopm until we see if it succeeds, in which
3289  * case pm_stop will be called.  For backwards compatible devices we bring the
3290  * device up to full power on the assumption the detach will succeed.
3291  */
3292 void
3293 pm_detaching(dev_info_t *dip)
3294 {
3295 	PMD_FUNC(pmf, "detaching")
3296 	pm_info_t *info = PM_GET_PM_INFO(dip);
3297 	int iscons;
3298 
3299 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3300 	    PM_NUMCMPTS(dip)))
3301 	if (info == NULL)
3302 		return;
3303 	ASSERT(DEVI_IS_DETACHING(dip));
3304 	PM_LOCK_DIP(dip);
3305 	info->pmi_dev_pm_state |= PM_DETACHING;
3306 	PM_UNLOCK_DIP(dip);
3307 	if (!PM_ISBC(dip))
3308 		pm_scan_stop(dip);
3309 
3310 	/*
3311 	 * console and old-style devices get brought up when detaching.
3312 	 */
3313 	iscons = PM_IS_CFB(dip);
3314 	if (iscons || PM_ISBC(dip)) {
3315 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3316 		if (iscons) {
3317 			mutex_enter(&pm_cfb_lock);
3318 			while (cfb_inuse) {
3319 				mutex_exit(&pm_cfb_lock);
3320 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3321 				delay(1);
3322 				mutex_enter(&pm_cfb_lock);
3323 			}
3324 			ASSERT(cfb_dip_detaching == NULL);
3325 			ASSERT(cfb_dip);
3326 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3327 			cfb_dip = NULL;
3328 			mutex_exit(&pm_cfb_lock);
3329 		}
3330 	}
3331 }
3332 
3333 /*
3334  * Node failed to detach.  If it used to be autopm'd, make it so again.
3335  */
3336 void
3337 pm_detach_failed(dev_info_t *dip)
3338 {
3339 	PMD_FUNC(pmf, "detach_failed")
3340 	pm_info_t *info = PM_GET_PM_INFO(dip);
3341 	int pm_all_at_normal(dev_info_t *);
3342 
3343 	if (info == NULL)
3344 		return;
3345 	ASSERT(DEVI_IS_DETACHING(dip));
3346 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3347 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3348 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3349 			/* Make sure the operation is still needed */
3350 			if (!pm_all_at_normal(dip)) {
3351 				if (pm_all_to_normal(dip,
3352 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3353 					PMD(PMD_ERROR, ("%s: could not bring "
3354 					    "%s@%s(%s#%d) to normal\n", pmf,
3355 					    PM_DEVICE(dip)))
3356 				}
3357 			}
3358 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3359 		}
3360 	}
3361 	if (!PM_ISBC(dip)) {
3362 		mutex_enter(&pm_scan_lock);
3363 		if (PM_SCANABLE(dip))
3364 			pm_scan_init(dip);
3365 		mutex_exit(&pm_scan_lock);
3366 		pm_rescan(dip);
3367 	}
3368 }
3369 
3370 /* generic Backwards Compatible component */
3371 static char *bc_names[] = {"off", "on"};
3372 
3373 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3374 
3375 static void
3376 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3377 {
3378 	pm_comp_t *pmc;
3379 	pmc = &cp->pmc_comp;
3380 	pmc->pmc_numlevels = 2;
3381 	pmc->pmc_lvals[0] = 0;
3382 	pmc->pmc_lvals[1] = norm;
3383 	e_pm_set_cur_pwr(dip, cp, norm);
3384 }
3385 
3386 static void
3387 e_pm_default_components(dev_info_t *dip, int cmpts)
3388 {
3389 	int i;
3390 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3391 
3392 	p = DEVI(dip)->devi_pm_components;
3393 	for (i = 0; i < cmpts; i++, p++) {
3394 		p->pmc_comp = bc_comp;	/* struct assignment */
3395 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3396 		    KM_SLEEP);
3397 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3398 		    KM_SLEEP);
3399 		p->pmc_comp.pmc_numlevels = 2;
3400 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3401 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3402 	}
3403 }
3404 
3405 /*
3406  * Called from functions that require components to exist already to allow
3407  * for their creation by parsing the pm-components property.
3408  * Device will not be power managed as a result of this call
3409  * No locking needed because we're single threaded by the ndi_devi_enter
3410  * done while attaching, and the device isn't visible until after it has
3411  * attached
3412  */
3413 int
3414 pm_premanage(dev_info_t *dip, int style)
3415 {
3416 	PMD_FUNC(pmf, "premanage")
3417 	pm_comp_t	*pcp, *compp;
3418 	int		cmpts, i, norm, error;
3419 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3420 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3421 
3422 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3423 	/*
3424 	 * If this dip has already been processed, don't mess with it
3425 	 */
3426 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3427 		return (DDI_SUCCESS);
3428 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3429 		return (DDI_FAILURE);
3430 	}
3431 	/*
3432 	 * Look up pm-components property and create components accordingly
3433 	 * If that fails, fall back to backwards compatibility
3434 	 */
3435 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3436 		/*
3437 		 * If error is set, the property existed but was not well formed
3438 		 */
3439 		if (error || (style == PM_STYLE_NEW)) {
3440 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3441 			return (DDI_FAILURE);
3442 		}
3443 		/*
3444 		 * If they don't have the pm-components property, then we
3445 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3446 		 * behavior driver must have called pm_create_components, and
3447 		 * we need to flesh out dummy components
3448 		 */
3449 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3450 			/*
3451 			 * Not really failure, but we don't want the
3452 			 * caller to treat it as success
3453 			 */
3454 			return (DDI_FAILURE);
3455 		}
3456 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3457 		e_pm_default_components(dip, cmpts);
3458 		for (i = 0; i < cmpts; i++) {
3459 			/*
3460 			 * if normal power not set yet, we don't really know
3461 			 * what *ANY* of the power values are.  If normal
3462 			 * power is set, then we assume for this backwards
3463 			 * compatible case that the values are 0, normal power.
3464 			 */
3465 			norm = pm_get_normal_power(dip, i);
3466 			if (norm == (uint_t)-1) {
3467 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3468 				    PM_DEVICE(dip), i))
3469 				return (DDI_FAILURE);
3470 			}
3471 			/*
3472 			 * Components of BC devices start at their normal power,
3473 			 * so count them to be not at their lowest power.
3474 			 */
3475 			PM_INCR_NOTLOWEST(dip);
3476 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3477 		}
3478 	} else {
3479 		/*
3480 		 * e_pm_create_components was called from pm_autoconfig(), it
3481 		 * creates components with no descriptions (or known levels)
3482 		 */
3483 		cmpts = PM_NUMCMPTS(dip);
3484 		ASSERT(cmpts != 0);
3485 		pcp = compp;
3486 		p = DEVI(dip)->devi_pm_components;
3487 		for (i = 0; i < cmpts; i++, p++) {
3488 			p->pmc_comp = *pcp++;   /* struct assignment */
3489 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3490 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3491 		}
3492 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3493 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3494 			    PMC_CPU_THRESH);
3495 		else
3496 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3497 			    PMC_DEF_THRESH);
3498 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3499 	}
3500 	return (DDI_SUCCESS);
3501 }
3502 
3503 /*
3504  * Called from during or after the device's attach to let us know it is ready
3505  * to play autopm.   Look up the pm model and manage the device accordingly.
3506  * Returns system call errno value.
3507  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3508  * a little cleaner
3509  *
3510  * Called with dip lock held, return with dip lock unheld.
3511  */
3512 
3513 int
3514 e_pm_manage(dev_info_t *dip, int style)
3515 {
3516 	PMD_FUNC(pmf, "e_manage")
3517 	pm_info_t	*info;
3518 	dev_info_t	*pdip = ddi_get_parent(dip);
3519 	int	pm_thresh_specd(dev_info_t *);
3520 	int	count;
3521 	char	*pathbuf;
3522 
3523 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3524 		return (DDI_FAILURE);
3525 	}
3526 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3527 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3528 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3529 
3530 	/*
3531 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3532 	 * out at their normal power, so they are "up", others start out
3533 	 * unknown, which is effectively "up".  Parent which want notification
3534 	 * get kidsupcnt of 0 always.
3535 	 */
3536 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3537 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3538 		e_pm_hold_rele_power(pdip, count);
3539 
3540 	pm_set_pm_info(dip, info);
3541 	/*
3542 	 * Apply any recorded thresholds
3543 	 */
3544 	(void) pm_thresh_specd(dip);
3545 
3546 	/*
3547 	 * Do dependency processing.
3548 	 */
3549 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3550 	(void) ddi_pathname(dip, pathbuf);
3551 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3552 	    PM_DEP_NOWAIT, NULL, 0);
3553 	kmem_free(pathbuf, MAXPATHLEN);
3554 
3555 	if (!PM_ISBC(dip)) {
3556 		mutex_enter(&pm_scan_lock);
3557 		if (PM_SCANABLE(dip)) {
3558 			pm_scan_init(dip);
3559 			mutex_exit(&pm_scan_lock);
3560 			pm_rescan(dip);
3561 		} else {
3562 			mutex_exit(&pm_scan_lock);
3563 		}
3564 	}
3565 	return (0);
3566 }
3567 
3568 /*
3569  * This is the obsolete exported interface for a driver to find out its
3570  * "normal" (max) power.
3571  * We only get components destroyed while no power management is
3572  * going on (and the device is detached), so we don't need a mutex here
3573  */
3574 int
3575 pm_get_normal_power(dev_info_t *dip, int comp)
3576 {
3577 
3578 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3579 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3580 	}
3581 	return (DDI_FAILURE);
3582 }
3583 
3584 /*
3585  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3586  */
3587 int
3588 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3589 {
3590 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3591 		*levelp = PM_CURPOWER(dip, comp);
3592 		return (DDI_SUCCESS);
3593 	}
3594 	return (DDI_FAILURE);
3595 }
3596 
3597 /*
3598  * Returns current threshold of indicated component
3599  */
3600 static int
3601 cur_threshold(dev_info_t *dip, int comp)
3602 {
3603 	pm_component_t *cp = PM_CP(dip, comp);
3604 	int pwr;
3605 
3606 	if (PM_ISBC(dip)) {
3607 		/*
3608 		 * backwards compatible nodes only have one threshold
3609 		 */
3610 		return (cp->pmc_comp.pmc_thresh[1]);
3611 	}
3612 	pwr = cp->pmc_cur_pwr;
3613 	if (pwr == PM_LEVEL_UNKNOWN) {
3614 		int thresh;
3615 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3616 			thresh = pm_default_nexus_threshold;
3617 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3618 			thresh = pm_cpu_idle_threshold;
3619 		else
3620 			thresh = pm_system_idle_threshold;
3621 		return (thresh);
3622 	}
3623 	ASSERT(cp->pmc_comp.pmc_thresh);
3624 	return (cp->pmc_comp.pmc_thresh[pwr]);
3625 }
3626 
3627 /*
3628  * Compute next lower component power level given power index.
3629  */
3630 static int
3631 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3632 {
3633 	int nxt_pwr;
3634 
3635 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3636 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3637 	} else {
3638 		pwrndx--;
3639 		ASSERT(pwrndx >= 0);
3640 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3641 	}
3642 	return (nxt_pwr);
3643 }
3644 
3645 /*
3646  * Update the maxpower (normal) power of a component. Note that the
3647  * component's power level is only changed if it's current power level
3648  * is higher than the new max power.
3649  */
3650 int
3651 pm_update_maxpower(dev_info_t *dip, int comp, int level)
3652 {
3653 	PMD_FUNC(pmf, "update_maxpower")
3654 	int old;
3655 	int result;
3656 
3657 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3658 	    !e_pm_valid_power(dip, comp, level)) {
3659 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3660 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3661 		return (DDI_FAILURE);
3662 	}
3663 	old = e_pm_get_max_power(dip, comp);
3664 	e_pm_set_max_power(dip, comp, level);
3665 
3666 	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3667 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3668 		e_pm_set_max_power(dip, comp, old);
3669 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3670 		    PM_DEVICE(dip)))
3671 		return (DDI_FAILURE);
3672 	}
3673 	return (DDI_SUCCESS);
3674 }
3675 
3676 /*
3677  * Bring all components of device to normal power
3678  */
3679 int
3680 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3681 {
3682 	PMD_FUNC(pmf, "all_to_normal")
3683 	int		*normal;
3684 	int		i, ncomps, result;
3685 	size_t		size;
3686 	int		changefailed = 0;
3687 
3688 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3689 	ASSERT(PM_GET_PM_INFO(dip));
3690 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3691 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3692 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3693 		return (DDI_FAILURE);
3694 	}
3695 	ncomps = PM_NUMCMPTS(dip);
3696 	for (i = 0; i < ncomps; i++) {
3697 		if (pm_set_power(dip, i, normal[i],
3698 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3699 			changefailed++;
3700 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3701 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3702 			    PM_DEVICE(dip), i, normal[i], result))
3703 		}
3704 	}
3705 	kmem_free(normal, size);
3706 	if (changefailed) {
3707 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3708 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3709 		return (DDI_FAILURE);
3710 	}
3711 	return (DDI_SUCCESS);
3712 }
3713 
3714 /*
3715  * Returns true if all components of device are at normal power
3716  */
3717 int
3718 pm_all_at_normal(dev_info_t *dip)
3719 {
3720 	PMD_FUNC(pmf, "all_at_normal")
3721 	int		*normal;
3722 	int		i;
3723 	size_t		size;
3724 
3725 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3726 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3727 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3728 		return (DDI_FAILURE);
3729 	}
3730 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3731 		int current = PM_CURPOWER(dip, i);
3732 		if (normal[i] > current) {
3733 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3734 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3735 			    normal[i], current))
3736 			break;
3737 		}
3738 	}
3739 	kmem_free(normal, size);
3740 	if (i != PM_NUMCMPTS(dip)) {
3741 		return (0);
3742 	}
3743 	return (1);
3744 }
3745 
3746 static void
3747 bring_wekeeps_up(char *keeper)
3748 {
3749 	PMD_FUNC(pmf, "bring_wekeeps_up")
3750 	int i;
3751 	pm_pdr_t *dp;
3752 	pm_info_t *wku_info;
3753 	char *kept_path;
3754 	dev_info_t *kept;
3755 	static void bring_pmdep_up(dev_info_t *, int);
3756 
3757 	if (panicstr) {
3758 		return;
3759 	}
3760 	/*
3761 	 * We process the request even if the keeper detaches because
3762 	 * detach processing expects this to increment kidsupcnt of kept.
3763 	 */
3764 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3765 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3766 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3767 			continue;
3768 		for (i = 0; i < dp->pdr_kept_count; i++) {
3769 			kept_path = dp->pdr_kept_paths[i];
3770 			if (kept_path == NULL)
3771 				continue;
3772 			ASSERT(kept_path[0] != '\0');
3773 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3774 				continue;
3775 			wku_info = PM_GET_PM_INFO(kept);
3776 			if (wku_info == NULL) {
3777 				if (kept)
3778 					ddi_release_devi(kept);
3779 				continue;
3780 			}
3781 			/*
3782 			 * Don't mess with it if it is being detached, it isn't
3783 			 * safe to call its power entry point
3784 			 */
3785 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3786 				if (kept)
3787 					ddi_release_devi(kept);
3788 				continue;
3789 			}
3790 			bring_pmdep_up(kept, 1);
3791 			ddi_release_devi(kept);
3792 		}
3793 	}
3794 }
3795 
3796 /*
3797  * Bring up the 'kept' device passed as argument
3798  */
3799 static void
3800 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3801 {
3802 	PMD_FUNC(pmf, "bring_pmdep_up")
3803 	int is_all_at_normal = 0;
3804 
3805 	/*
3806 	 * If the kept device has been unmanaged, do nothing.
3807 	 */
3808 	if (!PM_GET_PM_INFO(kept_dip))
3809 		return;
3810 
3811 	/* Just ignore DIRECT PM device till they are released. */
3812 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3813 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3814 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3815 		    "controlling process did something else\n", pmf,
3816 		    PM_DEVICE(kept_dip)))
3817 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3818 		return;
3819 	}
3820 	/* if we got here the keeper had a transition from OFF->ON */
3821 	if (hold)
3822 		pm_hold_power(kept_dip);
3823 
3824 	if (!is_all_at_normal)
3825 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3826 }
3827 
3828 /*
3829  * A bunch of stuff that belongs only to the next routine (or two)
3830  */
3831 
3832 static const char namestr[] = "NAME=";
3833 static const int nameln = sizeof (namestr) - 1;
3834 static const char pmcompstr[] = "pm-components";
3835 
3836 struct pm_comp_pkg {
3837 	pm_comp_t		*comp;
3838 	struct pm_comp_pkg	*next;
3839 };
3840 
3841 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3842 
3843 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3844 			((ch) >= 'A' && (ch) <= 'F'))
3845 
3846 /*
3847  * Rather than duplicate this code ...
3848  * (this code excerpted from the function that follows it)
3849  */
3850 #define	FINISH_COMP { \
3851 	ASSERT(compp); \
3852 	compp->pmc_lnames_sz = size; \
3853 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3854 	compp->pmc_numlevels = level; \
3855 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3856 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3857 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3858 	/* copy string out of prop array into buffer */ \
3859 	for (j = 0; j < level; j++) { \
3860 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3861 		compp->pmc_lvals[j] = lvals[j]; \
3862 		(void) strcpy(tp, lnames[j]); \
3863 		compp->pmc_lnames[j] = tp; \
3864 		tp += lszs[j]; \
3865 	} \
3866 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3867 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3868 	}
3869 
3870 /*
3871  * Create (empty) component data structures.
3872  */
3873 static void
3874 e_pm_create_components(dev_info_t *dip, int num_components)
3875 {
3876 	struct pm_component *compp, *ocompp;
3877 	int i, size = 0;
3878 
3879 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3880 	ASSERT(!DEVI(dip)->devi_pm_components);
3881 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3882 	size = sizeof (struct pm_component) * num_components;
3883 
3884 	compp = kmem_zalloc(size, KM_SLEEP);
3885 	ocompp = compp;
3886 	DEVI(dip)->devi_pm_comp_size = size;
3887 	DEVI(dip)->devi_pm_num_components = num_components;
3888 	PM_LOCK_BUSY(dip);
3889 	for (i = 0; i < num_components;  i++) {
3890 		compp->pmc_timestamp = gethrestime_sec();
3891 		compp->pmc_norm_pwr = (uint_t)-1;
3892 		compp++;
3893 	}
3894 	PM_UNLOCK_BUSY(dip);
3895 	DEVI(dip)->devi_pm_components = ocompp;
3896 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3897 }
3898 
3899 /*
3900  * Parse hex or decimal value from char string
3901  */
3902 static char *
3903 pm_parsenum(char *cp, int *valp)
3904 {
3905 	int ch, offset;
3906 	char numbuf[256];
3907 	char *np = numbuf;
3908 	int value = 0;
3909 
3910 	ch = *cp++;
3911 	if (isdigit(ch)) {
3912 		if (ch == '0') {
3913 			if ((ch = *cp++) == 'x' || ch == 'X') {
3914 				ch = *cp++;
3915 				while (isxdigit(ch)) {
3916 					*np++ = (char)ch;
3917 					ch = *cp++;
3918 				}
3919 				*np = 0;
3920 				cp--;
3921 				goto hexval;
3922 			} else {
3923 				goto digit;
3924 			}
3925 		} else {
3926 digit:
3927 			while (isdigit(ch)) {
3928 				*np++ = (char)ch;
3929 				ch = *cp++;
3930 			}
3931 			*np = 0;
3932 			cp--;
3933 			goto decval;
3934 		}
3935 	} else
3936 		return (NULL);
3937 
3938 hexval:
3939 	for (np = numbuf; *np; np++) {
3940 		if (*np >= 'a' && *np <= 'f')
3941 			offset = 'a' - 10;
3942 		else if (*np >= 'A' && *np <= 'F')
3943 			offset = 'A' - 10;
3944 		else if (*np >= '0' && *np <= '9')
3945 			offset = '0';
3946 		value *= 16;
3947 		value += *np - offset;
3948 	}
3949 	*valp = value;
3950 	return (cp);
3951 
3952 decval:
3953 	offset = '0';
3954 	for (np = numbuf; *np; np++) {
3955 		value *= 10;
3956 		value += *np - offset;
3957 	}
3958 	*valp = value;
3959 	return (cp);
3960 }
3961 
3962 /*
3963  * Set max (previously documented as "normal") power.
3964  */
3965 static void
3966 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3967 {
3968 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3969 }
3970 
3971 /*
3972  * Get max (previously documented as "normal") power.
3973  */
3974 static int
3975 e_pm_get_max_power(dev_info_t *dip, int component_number)
3976 {
3977 	return (PM_CP(dip, component_number)->pmc_norm_pwr);
3978 }
3979 
3980 /*
3981  * Internal routine for destroying components
3982  * It is called even when there might not be any, so it must be forgiving.
3983  */
3984 static void
3985 e_pm_destroy_components(dev_info_t *dip)
3986 {
3987 	int i;
3988 	struct pm_component *cp;
3989 
3990 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3991 	if (PM_NUMCMPTS(dip) == 0)
3992 		return;
3993 	cp = DEVI(dip)->devi_pm_components;
3994 	ASSERT(cp);
3995 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
3996 		int nlevels = cp->pmc_comp.pmc_numlevels;
3997 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
3998 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
3999 		/*
4000 		 * For BC nodes, the rest is static in bc_comp, so skip it
4001 		 */
4002 		if (PM_ISBC(dip))
4003 			continue;
4004 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
4005 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
4006 		kmem_free(cp->pmc_comp.pmc_lname_buf,
4007 		    cp->pmc_comp.pmc_lnames_sz);
4008 	}
4009 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
4010 	DEVI(dip)->devi_pm_components = NULL;
4011 	DEVI(dip)->devi_pm_num_components = 0;
4012 	DEVI(dip)->devi_pm_flags &=
4013 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4014 }
4015 
4016 /*
4017  * Read the pm-components property (if there is one) and use it to set up
4018  * components.  Returns a pointer to an array of component structures if
4019  * pm-components found and successfully parsed, else returns NULL.
4020  * Sets error return *errp to true to indicate a failure (as opposed to no
4021  * property being present).
4022  */
4023 pm_comp_t *
4024 pm_autoconfig(dev_info_t *dip, int *errp)
4025 {
4026 	PMD_FUNC(pmf, "autoconfig")
4027 	uint_t nelems;
4028 	char **pp;
4029 	pm_comp_t *compp = NULL;
4030 	int i, j, level, components = 0;
4031 	size_t size = 0;
4032 	struct pm_comp_pkg *p, *ptail;
4033 	struct pm_comp_pkg *phead = NULL;
4034 	int *lvals = NULL;
4035 	int *lszs = NULL;
4036 	int *np = NULL;
4037 	int npi = 0;
4038 	char **lnames = NULL;
4039 	char *cp, *tp;
4040 	pm_comp_t *ret = NULL;
4041 
4042 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4043 	*errp = 0;	/* assume success */
4044 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4045 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
4046 		return (NULL);
4047 	}
4048 
4049 	if (nelems < 3) {	/* need at least one name and two levels */
4050 		goto errout;
4051 	}
4052 
4053 	/*
4054 	 * pm_create_components is no longer allowed
4055 	 */
4056 	if (PM_NUMCMPTS(dip) != 0) {
4057 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
4058 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4059 		goto errout;
4060 	}
4061 
4062 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4063 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4064 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4065 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4066 
4067 	level = 0;
4068 	phead = NULL;
4069 	for (i = 0; i < nelems; i++) {
4070 		cp = pp[i];
4071 		if (!isdigit(*cp)) {	/*  must be name */
4072 			if (strncmp(cp, namestr, nameln) != 0) {
4073 				goto errout;
4074 			}
4075 			if (i != 0) {
4076 				if (level == 0) {	/* no level spec'd */
4077 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4078 					    pmf))
4079 					goto errout;
4080 				}
4081 				np[npi++] = lvals[level - 1];
4082 				/* finish up previous component levels */
4083 				FINISH_COMP;
4084 			}
4085 			cp += nameln;
4086 			if (!*cp) {
4087 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4088 				goto errout;
4089 			}
4090 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4091 			if (phead == NULL) {
4092 				phead = ptail = p;
4093 			} else {
4094 				ptail->next = p;
4095 				ptail = p;
4096 			}
4097 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4098 			    KM_SLEEP);
4099 			compp->pmc_name_sz = strlen(cp) + 1;
4100 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4101 			    KM_SLEEP);
4102 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4103 			components++;
4104 			level = 0;
4105 		} else {	/* better be power level <num>=<name> */
4106 #ifdef DEBUG
4107 			tp = cp;
4108 #endif
4109 			if (i == 0 ||
4110 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4111 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4112 				goto errout;
4113 			}
4114 #ifdef DEBUG
4115 			tp = cp;
4116 #endif
4117 			if (*cp++ != '=' || !*cp) {
4118 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4119 				goto errout;
4120 			}
4121 
4122 			lszs[level] = strlen(cp) + 1;
4123 			size += lszs[level];
4124 			lnames[level] = cp;	/* points into prop string */
4125 			level++;
4126 		}
4127 	}
4128 	np[npi++] = lvals[level - 1];
4129 	if (level == 0) {	/* ended with a name */
4130 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4131 		goto errout;
4132 	}
4133 	FINISH_COMP;
4134 
4135 
4136 	/*
4137 	 * Now we have a list of components--we have to return instead an
4138 	 * array of them, but we can just copy the top level and leave
4139 	 * the rest as is
4140 	 */
4141 	(void) e_pm_create_components(dip, components);
4142 	for (i = 0; i < components; i++)
4143 		e_pm_set_max_power(dip, i, np[i]);
4144 
4145 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4146 	for (i = 0, p = phead; i < components; i++) {
4147 		ASSERT(p);
4148 		/*
4149 		 * Now sanity-check values:  levels must be monotonically
4150 		 * increasing
4151 		 */
4152 		if (p->comp->pmc_numlevels < 2) {
4153 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4154 			    "levels\n", pmf,
4155 			    p->comp->pmc_name, PM_DEVICE(dip),
4156 			    p->comp->pmc_numlevels))
4157 			goto errout;
4158 		}
4159 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4160 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4161 			    (p->comp->pmc_lvals[j] <=
4162 			    p->comp->pmc_lvals[j - 1]))) {
4163 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4164 				    "not mono. incr, %d follows %d\n", pmf,
4165 				    p->comp->pmc_name, PM_DEVICE(dip),
4166 				    p->comp->pmc_lvals[j],
4167 				    p->comp->pmc_lvals[j - 1]))
4168 				goto errout;
4169 			}
4170 		}
4171 		ret[i] = *p->comp;	/* struct assignment */
4172 		for (j = 0; j < i; j++) {
4173 			/*
4174 			 * Test for unique component names
4175 			 */
4176 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4177 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4178 				    "unique\n", pmf, ret[j].pmc_name,
4179 				    PM_DEVICE(dip)))
4180 				goto errout;
4181 			}
4182 		}
4183 		ptail = p;
4184 		p = p->next;
4185 		phead = p;	/* errout depends on phead making sense */
4186 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4187 		kmem_free(ptail, sizeof (*ptail));
4188 	}
4189 out:
4190 	ddi_prop_free(pp);
4191 	if (lvals)
4192 		kmem_free(lvals, nelems * sizeof (int));
4193 	if (lszs)
4194 		kmem_free(lszs, nelems * sizeof (int));
4195 	if (lnames)
4196 		kmem_free(lnames, nelems * sizeof (char *));
4197 	if (np)
4198 		kmem_free(np, nelems * sizeof (int));
4199 	return (ret);
4200 
4201 errout:
4202 	e_pm_destroy_components(dip);
4203 	*errp = 1;	/* signal failure */
4204 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4205 	for (i = 0; i < nelems - 1; i++)
4206 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4207 	if (nelems != 0)
4208 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4209 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4210 	for (p = phead; p; ) {
4211 		pm_comp_t *pp;
4212 		int n;
4213 
4214 		ptail = p;
4215 		/*
4216 		 * Free component data structures
4217 		 */
4218 		pp = p->comp;
4219 		n = pp->pmc_numlevels;
4220 		if (pp->pmc_name_sz) {
4221 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4222 		}
4223 		if (pp->pmc_lnames_sz) {
4224 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4225 		}
4226 		if (pp->pmc_lnames) {
4227 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4228 		}
4229 		if (pp->pmc_thresh) {
4230 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4231 		}
4232 		if (pp->pmc_lvals) {
4233 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4234 		}
4235 		p = ptail->next;
4236 		kmem_free(ptail, sizeof (*ptail));
4237 	}
4238 	if (ret != NULL)
4239 		kmem_free(ret, components * sizeof (pm_comp_t));
4240 	ret = NULL;
4241 	goto out;
4242 }
4243 
4244 /*
4245  * Set threshold values for a devices components by dividing the target
4246  * threshold (base) by the number of transitions and assign each transition
4247  * that threshold.  This will get the entire device down in the target time if
4248  * all components are idle and even if there are dependencies among components.
4249  *
4250  * Devices may well get powered all the way down before the target time, but
4251  * at least the EPA will be happy.
4252  */
4253 void
4254 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4255 {
4256 	PMD_FUNC(pmf, "set_device_threshold")
4257 	int target_threshold = (base * 95) / 100;
4258 	int level, comp;		/* loop counters */
4259 	int transitions = 0;
4260 	int ncomp = PM_NUMCMPTS(dip);
4261 	int thresh;
4262 	int remainder;
4263 	pm_comp_t *pmc;
4264 	int i, circ;
4265 
4266 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4267 	PM_LOCK_DIP(dip);
4268 	/*
4269 	 * First we handle the easy one.  If we're setting the default
4270 	 * threshold for a node with children, then we set it to the
4271 	 * default nexus threshold (currently 0) and mark it as default
4272 	 * nexus threshold instead
4273 	 */
4274 	if (PM_IS_NEXUS(dip)) {
4275 		if (flag == PMC_DEF_THRESH) {
4276 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4277 			    PM_DEVICE(dip)))
4278 			thresh = pm_default_nexus_threshold;
4279 			for (comp = 0; comp < ncomp; comp++) {
4280 				pmc = &PM_CP(dip, comp)->pmc_comp;
4281 				for (level = 1; level < pmc->pmc_numlevels;
4282 				    level++) {
4283 					pmc->pmc_thresh[level] = thresh;
4284 				}
4285 			}
4286 			DEVI(dip)->devi_pm_dev_thresh =
4287 			    pm_default_nexus_threshold;
4288 			/*
4289 			 * If the nexus node is being reconfigured back to
4290 			 * the default threshold, adjust the notlowest count.
4291 			 */
4292 			if (DEVI(dip)->devi_pm_flags &
4293 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4294 				PM_LOCK_POWER(dip, &circ);
4295 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4296 					if (PM_CURPOWER(dip, i) == 0)
4297 						continue;
4298 					mutex_enter(&pm_compcnt_lock);
4299 					ASSERT(pm_comps_notlowest);
4300 					pm_comps_notlowest--;
4301 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4302 					    "notlowest to %d\n", pmf,
4303 					    PM_DEVICE(dip), pm_comps_notlowest))
4304 					if (pm_comps_notlowest == 0)
4305 						pm_ppm_notify_all_lowest(dip,
4306 						    PM_ALL_LOWEST);
4307 					mutex_exit(&pm_compcnt_lock);
4308 				}
4309 				PM_UNLOCK_POWER(dip, circ);
4310 			}
4311 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4312 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4313 			PM_UNLOCK_DIP(dip);
4314 			return;
4315 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4316 			/*
4317 			 * If the nexus node is being configured for a
4318 			 * non-default threshold, include that node in
4319 			 * the notlowest accounting.
4320 			 */
4321 			PM_LOCK_POWER(dip, &circ);
4322 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4323 				if (PM_CURPOWER(dip, i) == 0)
4324 					continue;
4325 				mutex_enter(&pm_compcnt_lock);
4326 				if (pm_comps_notlowest == 0)
4327 					pm_ppm_notify_all_lowest(dip,
4328 					    PM_NOT_ALL_LOWEST);
4329 				pm_comps_notlowest++;
4330 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4331 				    "notlowest to %d\n", pmf,
4332 				    PM_DEVICE(dip), pm_comps_notlowest))
4333 				mutex_exit(&pm_compcnt_lock);
4334 			}
4335 			PM_UNLOCK_POWER(dip, circ);
4336 		}
4337 	}
4338 	/*
4339 	 * Compute the total number of transitions for all components
4340 	 * of the device.  Distribute the threshold evenly over them
4341 	 */
4342 	for (comp = 0; comp < ncomp; comp++) {
4343 		pmc = &PM_CP(dip, comp)->pmc_comp;
4344 		ASSERT(pmc->pmc_numlevels > 1);
4345 		transitions += pmc->pmc_numlevels - 1;
4346 	}
4347 	ASSERT(transitions);
4348 	thresh = target_threshold / transitions;
4349 
4350 	for (comp = 0; comp < ncomp; comp++) {
4351 		pmc = &PM_CP(dip, comp)->pmc_comp;
4352 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4353 			pmc->pmc_thresh[level] = thresh;
4354 		}
4355 	}
4356 
4357 #ifdef DEBUG
4358 	for (comp = 0; comp < ncomp; comp++) {
4359 		pmc = &PM_CP(dip, comp)->pmc_comp;
4360 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4361 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4362 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4363 			    comp, level, pmc->pmc_thresh[level]))
4364 		}
4365 	}
4366 #endif
4367 	/*
4368 	 * Distribute any remainder till they are all gone
4369 	 */
4370 	remainder = target_threshold - thresh * transitions;
4371 	level = 1;
4372 #ifdef DEBUG
4373 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4374 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4375 	    transitions))
4376 #endif
4377 	while (remainder > 0) {
4378 		comp = 0;
4379 		while (remainder && (comp < ncomp)) {
4380 			pmc = &PM_CP(dip, comp)->pmc_comp;
4381 			if (level < pmc->pmc_numlevels) {
4382 				pmc->pmc_thresh[level] += 1;
4383 				remainder--;
4384 			}
4385 			comp++;
4386 		}
4387 		level++;
4388 	}
4389 #ifdef DEBUG
4390 	for (comp = 0; comp < ncomp; comp++) {
4391 		pmc = &PM_CP(dip, comp)->pmc_comp;
4392 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4393 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4394 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4395 			    comp, level, pmc->pmc_thresh[level]))
4396 		}
4397 	}
4398 #endif
4399 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4400 	DEVI(dip)->devi_pm_dev_thresh = base;
4401 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4402 	DEVI(dip)->devi_pm_flags |= flag;
4403 	PM_UNLOCK_DIP(dip);
4404 }
4405 
4406 /*
4407  * Called when there is no old-style platform power management driver
4408  */
4409 static int
4410 ddi_no_platform_power(power_req_t *req)
4411 {
4412 	_NOTE(ARGUNUSED(req))
4413 	return (DDI_FAILURE);
4414 }
4415 
4416 /*
4417  * This function calls the entry point supplied by the platform-specific
4418  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4419  * The use of global for getting the  function name from platform-specific
4420  * pm driver is not ideal, but it is simple and efficient.
4421  * The previous property lookup was being done in the idle loop on swift
4422  * systems without pmc chips and hurt deskbench performance as well as
4423  * violating scheduler locking rules
4424  */
4425 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4426 
4427 /*
4428  * Old obsolete interface for a device to request a power change (but only
4429  * an increase in power)
4430  */
4431 int
4432 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4433 {
4434 	return (pm_raise_power(dip, cmpt, level));
4435 }
4436 
4437 /*
4438  * The old obsolete interface to platform power management.  Only used by
4439  * Gypsy platform and APM on X86.
4440  */
4441 int
4442 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4443 {
4444 	power_req_t	request;
4445 
4446 	request.request_type = PMR_SET_POWER;
4447 	request.req.set_power_req.who = dip;
4448 	request.req.set_power_req.cmpt = pm_cmpt;
4449 	request.req.set_power_req.level = pm_level;
4450 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4451 }
4452 
4453 /*
4454  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4455  * passed.  Returns true if subsequent processing could result in power being
4456  * removed from the device.  The arg is not currently used because it is
4457  * implicit in the operation of cpr/DR.
4458  */
4459 int
4460 ddi_removing_power(dev_info_t *dip)
4461 {
4462 	_NOTE(ARGUNUSED(dip))
4463 	return (pm_powering_down);
4464 }
4465 
4466 /*
4467  * Returns true if a device indicates that its parent handles suspend/resume
4468  * processing for it.
4469  */
4470 int
4471 e_ddi_parental_suspend_resume(dev_info_t *dip)
4472 {
4473 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4474 }
4475 
4476 /*
4477  * Called for devices which indicate that their parent does suspend/resume
4478  * handling for them
4479  */
4480 int
4481 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4482 {
4483 	power_req_t	request;
4484 	request.request_type = PMR_SUSPEND;
4485 	request.req.suspend_req.who = dip;
4486 	request.req.suspend_req.cmd = cmd;
4487 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4488 }
4489 
4490 /*
4491  * Called for devices which indicate that their parent does suspend/resume
4492  * handling for them
4493  */
4494 int
4495 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4496 {
4497 	power_req_t	request;
4498 	request.request_type = PMR_RESUME;
4499 	request.req.resume_req.who = dip;
4500 	request.req.resume_req.cmd = cmd;
4501 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4502 }
4503 
4504 /*
4505  * Old obsolete exported interface for drivers to create components.
4506  * This is now handled by exporting the pm-components property.
4507  */
4508 int
4509 pm_create_components(dev_info_t *dip, int num_components)
4510 {
4511 	PMD_FUNC(pmf, "pm_create_components")
4512 
4513 	if (num_components < 1)
4514 		return (DDI_FAILURE);
4515 
4516 	if (!DEVI_IS_ATTACHING(dip)) {
4517 		return (DDI_FAILURE);
4518 	}
4519 
4520 	/* don't need to lock dip because attach is single threaded */
4521 	if (DEVI(dip)->devi_pm_components) {
4522 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4523 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4524 		return (DDI_FAILURE);
4525 	}
4526 	e_pm_create_components(dip, num_components);
4527 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4528 	e_pm_default_components(dip, num_components);
4529 	return (DDI_SUCCESS);
4530 }
4531 
4532 /*
4533  * Obsolete interface previously called by drivers to destroy their components
4534  * at detach time.  This is now done automatically.  However, we need to keep
4535  * this for the old drivers.
4536  */
4537 void
4538 pm_destroy_components(dev_info_t *dip)
4539 {
4540 	PMD_FUNC(pmf, "pm_destroy_components")
4541 	dev_info_t *pdip = ddi_get_parent(dip);
4542 
4543 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4544 	    PM_DEVICE(dip)))
4545 	ASSERT(DEVI_IS_DETACHING(dip));
4546 #ifdef DEBUG
4547 	if (!PM_ISBC(dip))
4548 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4549 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4550 		    PM_ADDR(dip));
4551 #endif
4552 	/*
4553 	 * We ignore this unless this is an old-style driver, except for
4554 	 * printing the message above
4555 	 */
4556 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4557 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4558 		    PM_DEVICE(dip)))
4559 		return;
4560 	}
4561 	ASSERT(PM_GET_PM_INFO(dip));
4562 
4563 	/*
4564 	 * pm_unmanage will clear info pointer later, after dealing with
4565 	 * dependencies
4566 	 */
4567 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4568 	/*
4569 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4570 	 * Parents that get notification are not adjusted because their
4571 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4572 	 */
4573 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4574 		pm_rele_power(pdip);
4575 #ifdef DEBUG
4576 	else {
4577 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4578 		    pmf, PM_DEVICE(dip)))
4579 	}
4580 #endif
4581 	e_pm_destroy_components(dip);
4582 	/*
4583 	 * Forget we ever knew anything about the components of this  device
4584 	 */
4585 	DEVI(dip)->devi_pm_flags &=
4586 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4587 }
4588 
4589 /*
4590  * Exported interface for a driver to set a component busy.
4591  */
4592 int
4593 pm_busy_component(dev_info_t *dip, int cmpt)
4594 {
4595 	struct pm_component *cp;
4596 
4597 	ASSERT(dip != NULL);
4598 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4599 		return (DDI_FAILURE);
4600 	PM_LOCK_BUSY(dip);
4601 	cp->pmc_busycount++;
4602 	cp->pmc_timestamp = 0;
4603 	PM_UNLOCK_BUSY(dip);
4604 	return (DDI_SUCCESS);
4605 }
4606 
4607 /*
4608  * Exported interface for a driver to set a component idle.
4609  */
4610 int
4611 pm_idle_component(dev_info_t *dip, int cmpt)
4612 {
4613 	PMD_FUNC(pmf, "pm_idle_component")
4614 	struct pm_component *cp;
4615 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4616 
4617 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4618 		return (DDI_FAILURE);
4619 
4620 	PM_LOCK_BUSY(dip);
4621 	if (cp->pmc_busycount) {
4622 		if (--(cp->pmc_busycount) == 0)
4623 			cp->pmc_timestamp = gethrestime_sec();
4624 	} else {
4625 		cp->pmc_timestamp = gethrestime_sec();
4626 	}
4627 
4628 	PM_UNLOCK_BUSY(dip);
4629 
4630 	/*
4631 	 * if device becomes idle during idle down period, try scan it down
4632 	 */
4633 	if (scanp && PM_IS_PID(dip)) {
4634 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4635 		    PM_DEVICE(dip)))
4636 		pm_rescan(dip);
4637 		return (DDI_SUCCESS);
4638 	}
4639 
4640 	/*
4641 	 * handle scan not running with nexus threshold == 0
4642 	 */
4643 
4644 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4645 		pm_rescan(dip);
4646 	}
4647 
4648 	return (DDI_SUCCESS);
4649 }
4650 
4651 /*
4652  * This is the old  obsolete interface called by drivers to set their normal
4653  * power.  Thus we can't fix its behavior or return a value.
4654  * This functionality is replaced by the pm-component property.
4655  * We'll only get components destroyed while no power management is
4656  * going on (and the device is detached), so we don't need a mutex here
4657  */
4658 void
4659 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4660 {
4661 	PMD_FUNC(pmf, "set_normal_power")
4662 #ifdef DEBUG
4663 	if (!PM_ISBC(dip))
4664 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4665 		    "(driver exporting pm-components property) ignored",
4666 		    PM_NAME(dip), PM_ADDR(dip));
4667 #endif
4668 	if (PM_ISBC(dip)) {
4669 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4670 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4671 		e_pm_set_max_power(dip, comp, level);
4672 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4673 	}
4674 }
4675 
4676 /*
4677  * Called on a successfully detached driver to free pm resources
4678  */
4679 static void
4680 pm_stop(dev_info_t *dip)
4681 {
4682 	PMD_FUNC(pmf, "stop")
4683 	dev_info_t *pdip = ddi_get_parent(dip);
4684 
4685 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4686 	/* stopping scan, destroy scan data structure */
4687 	if (!PM_ISBC(dip)) {
4688 		pm_scan_stop(dip);
4689 		pm_scan_fini(dip);
4690 	}
4691 
4692 	if (PM_GET_PM_INFO(dip) != NULL) {
4693 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4694 			/*
4695 			 * Old style driver may have called
4696 			 * pm_destroy_components already, but just in case ...
4697 			 */
4698 			e_pm_destroy_components(dip);
4699 		} else {
4700 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4701 			    pmf, PM_DEVICE(dip)))
4702 		}
4703 	} else {
4704 		if (PM_NUMCMPTS(dip))
4705 			e_pm_destroy_components(dip);
4706 		else {
4707 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4708 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4709 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4710 					pm_rele_power(pdip);
4711 				} else if (pdip &&
4712 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4713 					(void) mdi_power(pdip,
4714 					    MDI_PM_RELE_POWER,
4715 					    (void *)dip, NULL, 0);
4716 				}
4717 			}
4718 		}
4719 	}
4720 }
4721 
4722 /*
4723  * The node is the subject of a reparse pm props ioctl. Throw away the old
4724  * info and start over.
4725  */
4726 int
4727 e_new_pm_props(dev_info_t *dip)
4728 {
4729 	if (PM_GET_PM_INFO(dip) != NULL) {
4730 		pm_stop(dip);
4731 
4732 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4733 			return (DDI_FAILURE);
4734 		}
4735 	}
4736 	e_pm_props(dip);
4737 	return (DDI_SUCCESS);
4738 }
4739 
4740 /*
4741  * Device has been attached, so process its pm properties
4742  */
4743 void
4744 e_pm_props(dev_info_t *dip)
4745 {
4746 	char *pp;
4747 	int len;
4748 	int flags = 0;
4749 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4750 
4751 	/*
4752 	 * It doesn't matter if we do this more than once, we should always
4753 	 * get the same answers, and if not, then the last one in is the
4754 	 * best one.
4755 	 */
4756 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4757 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4758 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4759 			flags = PMC_NEEDS_SR;
4760 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4761 			flags = PMC_NO_SR;
4762 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4763 			flags = PMC_PARENTAL_SR;
4764 		} else {
4765 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4766 			    "%s property value '%s'", PM_NAME(dip),
4767 			    PM_ADDR(dip), "pm-hardware-state", pp);
4768 		}
4769 		kmem_free(pp, len);
4770 	}
4771 	/*
4772 	 * This next segment (PMC_WANTS_NOTIFY) is in
4773 	 * support of nexus drivers which will want to be involved in
4774 	 * (or at least notified of) their child node's power level transitions.
4775 	 * "pm-want-child-notification?" is defined by the parent.
4776 	 */
4777 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4778 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4779 		flags |= PMC_WANTS_NOTIFY;
4780 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4781 	    dip, propflag, "pm-want-child-notification?"));
4782 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4783 	    "no-involuntary-power-cycles"))
4784 		flags |= PMC_NO_INVOL;
4785 	/*
4786 	 * Is the device a CPU device?
4787 	 */
4788 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4789 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4790 		if (strcmp(pp, "CPU") == 0) {
4791 			flags |= PMC_CPU_DEVICE;
4792 		} else {
4793 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4794 			    "%s property value '%s'", PM_NAME(dip),
4795 			    PM_ADDR(dip), "pm-class", pp);
4796 		}
4797 		kmem_free(pp, len);
4798 	}
4799 	/* devfs single threads us */
4800 	DEVI(dip)->devi_pm_flags |= flags;
4801 }
4802 
4803 /*
4804  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4805  * driver which has claimed a node.
4806  * Sets old_power in arg struct.
4807  */
4808 static int
4809 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4810     ddi_ctl_enum_t ctlop, void *arg, void *result)
4811 {
4812 	_NOTE(ARGUNUSED(dip))
4813 	PMD_FUNC(pmf, "ctlops")
4814 	power_req_t *reqp = (power_req_t *)arg;
4815 	int retval;
4816 	dev_info_t *target_dip;
4817 	int new_level, old_level, cmpt;
4818 #ifdef PMDDEBUG
4819 	char *format;
4820 #endif
4821 
4822 	/*
4823 	 * The interface for doing the actual power level changes is now
4824 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4825 	 * different platform-specific power control drivers.
4826 	 *
4827 	 * This driver implements the "default" version of this interface.
4828 	 * If no ppm driver has been installed then this interface is called
4829 	 * instead.
4830 	 */
4831 	ASSERT(dip == NULL);
4832 	switch (ctlop) {
4833 	case DDI_CTLOPS_POWER:
4834 		switch (reqp->request_type) {
4835 		case PMR_PPM_SET_POWER:
4836 		{
4837 			target_dip = reqp->req.ppm_set_power_req.who;
4838 			ASSERT(target_dip == rdip);
4839 			new_level = reqp->req.ppm_set_power_req.new_level;
4840 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4841 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4842 			old_level = PM_CURPOWER(target_dip, cmpt);
4843 			reqp->req.ppm_set_power_req.old_level = old_level;
4844 			retval = pm_power(target_dip, cmpt, new_level);
4845 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4846 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4847 			    old_level, new_level, (retval == DDI_SUCCESS ?
4848 			    "chd" : "no chg")))
4849 			return (retval);
4850 		}
4851 
4852 		case PMR_PPM_PRE_DETACH:
4853 		case PMR_PPM_POST_DETACH:
4854 		case PMR_PPM_PRE_ATTACH:
4855 		case PMR_PPM_POST_ATTACH:
4856 		case PMR_PPM_PRE_PROBE:
4857 		case PMR_PPM_POST_PROBE:
4858 		case PMR_PPM_PRE_RESUME:
4859 		case PMR_PPM_INIT_CHILD:
4860 		case PMR_PPM_UNINIT_CHILD:
4861 #ifdef PMDDEBUG
4862 			switch (reqp->request_type) {
4863 				case PMR_PPM_PRE_DETACH:
4864 					format = "%s: PMR_PPM_PRE_DETACH "
4865 					    "%s@%s(%s#%d)\n";
4866 					break;
4867 				case PMR_PPM_POST_DETACH:
4868 					format = "%s: PMR_PPM_POST_DETACH "
4869 					    "%s@%s(%s#%d) rets %d\n";
4870 					break;
4871 				case PMR_PPM_PRE_ATTACH:
4872 					format = "%s: PMR_PPM_PRE_ATTACH "
4873 					    "%s@%s(%s#%d)\n";
4874 					break;
4875 				case PMR_PPM_POST_ATTACH:
4876 					format = "%s: PMR_PPM_POST_ATTACH "
4877 					    "%s@%s(%s#%d) rets %d\n";
4878 					break;
4879 				case PMR_PPM_PRE_PROBE:
4880 					format = "%s: PMR_PPM_PRE_PROBE "
4881 					    "%s@%s(%s#%d)\n";
4882 					break;
4883 				case PMR_PPM_POST_PROBE:
4884 					format = "%s: PMR_PPM_POST_PROBE "
4885 					    "%s@%s(%s#%d) rets %d\n";
4886 					break;
4887 				case PMR_PPM_PRE_RESUME:
4888 					format = "%s: PMR_PPM_PRE_RESUME "
4889 					    "%s@%s(%s#%d) rets %d\n";
4890 					break;
4891 				case PMR_PPM_INIT_CHILD:
4892 					format = "%s: PMR_PPM_INIT_CHILD "
4893 					    "%s@%s(%s#%d)\n";
4894 					break;
4895 				case PMR_PPM_UNINIT_CHILD:
4896 					format = "%s: PMR_PPM_UNINIT_CHILD "
4897 					    "%s@%s(%s#%d)\n";
4898 					break;
4899 				default:
4900 					break;
4901 			}
4902 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4903 			    reqp->req.ppm_config_req.result))
4904 #endif
4905 			return (DDI_SUCCESS);
4906 
4907 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4908 			/*
4909 			 * Nothing for us to do
4910 			 */
4911 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4912 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4913 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4914 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4915 			    reqp->req.ppm_notify_level_req.cmpt,
4916 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4917 			    reqp->req.ppm_notify_level_req.cmpt),
4918 			    reqp->req.ppm_notify_level_req.new_level))
4919 			return (DDI_SUCCESS);
4920 
4921 		case PMR_PPM_UNMANAGE:
4922 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4923 			    pmf, PM_DEVICE(rdip)))
4924 			return (DDI_SUCCESS);
4925 
4926 		case PMR_PPM_LOCK_POWER:
4927 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4928 			    reqp->req.ppm_lock_power_req.circp);
4929 			return (DDI_SUCCESS);
4930 
4931 		case PMR_PPM_UNLOCK_POWER:
4932 			pm_unlock_power_single(
4933 			    reqp->req.ppm_unlock_power_req.who,
4934 			    reqp->req.ppm_unlock_power_req.circ);
4935 			return (DDI_SUCCESS);
4936 
4937 		case PMR_PPM_TRY_LOCK_POWER:
4938 			*(int *)result = pm_try_locking_power_single(
4939 			    reqp->req.ppm_lock_power_req.who,
4940 			    reqp->req.ppm_lock_power_req.circp);
4941 			return (DDI_SUCCESS);
4942 
4943 		case PMR_PPM_POWER_LOCK_OWNER:
4944 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4945 			ASSERT(target_dip == rdip);
4946 			reqp->req.ppm_power_lock_owner_req.owner =
4947 			    DEVI(rdip)->devi_busy_thread;
4948 			return (DDI_SUCCESS);
4949 		default:
4950 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4951 			return (DDI_FAILURE);
4952 		}
4953 
4954 	default:
4955 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4956 		return (DDI_FAILURE);
4957 	}
4958 }
4959 
4960 /*
4961  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4962  * power_ops struct for this functionality instead?
4963  * However, we only ever do this on a ppm driver.
4964  */
4965 int
4966 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4967 {
4968 	int (*fp)();
4969 
4970 	/* if no ppm handler, call the default routine */
4971 	if (d == NULL) {
4972 		return (pm_default_ctlops(d, r, op, a, v));
4973 	}
4974 	if (!d || !r)
4975 		return (DDI_FAILURE);
4976 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4977 	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4978 
4979 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4980 	return ((*fp)(d, r, op, a, v));
4981 }
4982 
4983 /*
4984  * Called on a node when attach completes or the driver makes its first pm
4985  * call (whichever comes first).
4986  * In the attach case, device may not be power manageable at all.
4987  * Don't need to lock the dip because we're single threaded by the devfs code
4988  */
4989 static int
4990 pm_start(dev_info_t *dip)
4991 {
4992 	PMD_FUNC(pmf, "start")
4993 	int ret;
4994 	dev_info_t *pdip = ddi_get_parent(dip);
4995 	int e_pm_manage(dev_info_t *, int);
4996 	void pm_noinvol_specd(dev_info_t *dip);
4997 
4998 	e_pm_props(dip);
4999 	pm_noinvol_specd(dip);
5000 	/*
5001 	 * If this dip has already been processed, don't mess with it
5002 	 * (but decrement the speculative count we did above, as whatever
5003 	 * code put it under pm already will have dealt with it)
5004 	 */
5005 	if (PM_GET_PM_INFO(dip)) {
5006 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
5007 		    pmf, PM_DEVICE(dip)))
5008 		return (0);
5009 	}
5010 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
5011 
5012 	if (PM_GET_PM_INFO(dip) == NULL) {
5013 		/*
5014 		 * keep the kidsupcount increment as is
5015 		 */
5016 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
5017 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
5018 			pm_hold_power(pdip);
5019 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
5020 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
5021 			    (void *)dip, NULL, 0);
5022 		}
5023 
5024 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
5025 		    "left up\n", pmf, PM_DEVICE(dip)))
5026 	}
5027 
5028 	return (ret);
5029 }
5030 
5031 /*
5032  * Keep a list of recorded thresholds.  For now we just keep a list and
5033  * search it linearly.  We don't expect too many entries.  Can always hash it
5034  * later if we need to.
5035  */
5036 void
5037 pm_record_thresh(pm_thresh_rec_t *rp)
5038 {
5039 	pm_thresh_rec_t *pptr, *ptr;
5040 
5041 	ASSERT(*rp->ptr_physpath);
5042 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
5043 	for (pptr = NULL, ptr = pm_thresh_head;
5044 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
5045 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
5046 			/* replace this one */
5047 			rp->ptr_next = ptr->ptr_next;
5048 			if (pptr) {
5049 				pptr->ptr_next = rp;
5050 			} else {
5051 				pm_thresh_head = rp;
5052 			}
5053 			rw_exit(&pm_thresh_rwlock);
5054 			kmem_free(ptr, ptr->ptr_size);
5055 			return;
5056 		}
5057 		continue;
5058 	}
5059 	/*
5060 	 * There was not a match in the list, insert this one in front
5061 	 */
5062 	if (pm_thresh_head) {
5063 		rp->ptr_next = pm_thresh_head;
5064 		pm_thresh_head = rp;
5065 	} else {
5066 		rp->ptr_next = NULL;
5067 		pm_thresh_head = rp;
5068 	}
5069 	rw_exit(&pm_thresh_rwlock);
5070 }
5071 
5072 /*
5073  * Create a new dependency record and hang a new dependency entry off of it
5074  */
5075 pm_pdr_t *
5076 newpdr(char *kept, char *keeps, int isprop)
5077 {
5078 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5079 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5080 	p->pdr_size = size;
5081 	p->pdr_isprop = isprop;
5082 	p->pdr_kept_paths = NULL;
5083 	p->pdr_kept_count = 0;
5084 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5085 	(void) strcpy(p->pdr_kept, kept);
5086 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5087 	(void) strcpy(p->pdr_keeper, keeps);
5088 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5089 	    (intptr_t)p + size);
5090 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5091 	    (intptr_t)p + size);
5092 	return (p);
5093 }
5094 
5095 /*
5096  * Keep a list of recorded dependencies.  We only keep the
5097  * keeper -> kept list for simplification. At this point We do not
5098  * care about whether the devices are attached or not yet,
5099  * this would be done in pm_keeper() and pm_kept().
5100  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5101  * and it is up to the user to issue the ioctl again if they want it
5102  * (e.g. pmconfig)
5103  * Returns true if dependency already exists in the list.
5104  */
5105 int
5106 pm_record_keeper(char *kept, char *keeper, int isprop)
5107 {
5108 	PMD_FUNC(pmf, "record_keeper")
5109 	pm_pdr_t *npdr, *ppdr, *pdr;
5110 
5111 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5112 	ASSERT(kept && keeper);
5113 #ifdef DEBUG
5114 	if (pm_debug & PMD_KEEPS)
5115 		prdeps("pm_record_keeper entry");
5116 #endif
5117 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5118 	    ppdr = pdr, pdr = pdr->pdr_next) {
5119 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5120 		    pdr->pdr_keeper))
5121 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5122 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5123 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5124 			return (1);
5125 		}
5126 	}
5127 	/*
5128 	 * We did not find any match, so we have to make an entry
5129 	 */
5130 	npdr = newpdr(kept, keeper, isprop);
5131 	if (ppdr) {
5132 		ASSERT(ppdr->pdr_next == NULL);
5133 		ppdr->pdr_next = npdr;
5134 	} else {
5135 		ASSERT(pm_dep_head == NULL);
5136 		pm_dep_head = npdr;
5137 	}
5138 #ifdef DEBUG
5139 	if (pm_debug & PMD_KEEPS)
5140 		prdeps("pm_record_keeper after new record");
5141 #endif
5142 	if (!isprop)
5143 		pm_unresolved_deps++;
5144 	else
5145 		pm_prop_deps++;
5146 	return (0);
5147 }
5148 
5149 /*
5150  * Look up this device in the set of devices we've seen ioctls for
5151  * to see if we are holding a threshold spec for it.  If so, make it so.
5152  * At ioctl time, we were given the physical path of the device.
5153  */
5154 int
5155 pm_thresh_specd(dev_info_t *dip)
5156 {
5157 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5158 	char *path = 0;
5159 	char pathbuf[MAXNAMELEN];
5160 	pm_thresh_rec_t *rp;
5161 
5162 	path = ddi_pathname(dip, pathbuf);
5163 
5164 	rw_enter(&pm_thresh_rwlock, RW_READER);
5165 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5166 		if (strcmp(rp->ptr_physpath, path) != 0)
5167 			continue;
5168 		pm_apply_recorded_thresh(dip, rp);
5169 		rw_exit(&pm_thresh_rwlock);
5170 		return (1);
5171 	}
5172 	rw_exit(&pm_thresh_rwlock);
5173 	return (0);
5174 }
5175 
5176 static int
5177 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5178 {
5179 	PMD_FUNC(pmf, "set_keeping")
5180 	pm_info_t *kept_info;
5181 	int j, up = 0, circ;
5182 	void prdeps(char *);
5183 
5184 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5185 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5186 #ifdef DEBUG
5187 	if (pm_debug & PMD_KEEPS)
5188 		prdeps("Before PAD\n");
5189 #endif
5190 	ASSERT(keeper != kept);
5191 	if (PM_GET_PM_INFO(keeper) == NULL) {
5192 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5193 		    "%s@%s(%s#%d), but the latter is not power managed",
5194 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5195 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5196 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5197 		return (0);
5198 	}
5199 	kept_info = PM_GET_PM_INFO(kept);
5200 	ASSERT(kept_info);
5201 	PM_LOCK_POWER(keeper, &circ);
5202 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5203 		if (PM_CURPOWER(keeper, j)) {
5204 			up++;
5205 			break;
5206 		}
5207 	}
5208 	if (up) {
5209 		/* Bringup and maintain a hold on the kept */
5210 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5211 		    PM_DEVICE(kept)))
5212 		bring_pmdep_up(kept, 1);
5213 	}
5214 	PM_UNLOCK_POWER(keeper, circ);
5215 #ifdef DEBUG
5216 	if (pm_debug & PMD_KEEPS)
5217 		prdeps("After PAD\n");
5218 #endif
5219 	return (1);
5220 }
5221 
5222 /*
5223  * Should this device keep up another device?
5224  * Look up this device in the set of devices we've seen ioctls for
5225  * to see if we are holding a dependency spec for it.  If so, make it so.
5226  * Because we require the kept device to be attached already in order to
5227  * make the list entry (and hold it), we only need to look for keepers.
5228  * At ioctl time, we were given the physical path of the device.
5229  */
5230 int
5231 pm_keeper(char *keeper)
5232 {
5233 	PMD_FUNC(pmf, "keeper")
5234 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5235 	dev_info_t *dip;
5236 	pm_pdr_t *dp;
5237 	dev_info_t *kept = NULL;
5238 	int ret = 0;
5239 	int i;
5240 
5241 	if (!pm_unresolved_deps && !pm_prop_deps)
5242 		return (0);
5243 	ASSERT(keeper != NULL);
5244 	dip = pm_name_to_dip(keeper, 1);
5245 	if (dip == NULL)
5246 		return (0);
5247 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5248 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5249 		if (!dp->pdr_isprop) {
5250 			if (!pm_unresolved_deps)
5251 				continue;
5252 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5253 			if (dp->pdr_satisfied) {
5254 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5255 				continue;
5256 			}
5257 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5258 				ret += pm_apply_recorded_dep(dip, dp);
5259 			}
5260 		} else {
5261 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5262 				continue;
5263 			for (i = 0; i < dp->pdr_kept_count; i++) {
5264 				if (dp->pdr_kept_paths[i] == NULL)
5265 					continue;
5266 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5267 				if (kept == NULL)
5268 					continue;
5269 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5270 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5271 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5272 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5273 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5274 				    dp->pdr_kept_count))
5275 				if (kept != dip) {
5276 					ret += pm_set_keeping(dip, kept);
5277 				}
5278 				ddi_release_devi(kept);
5279 			}
5280 
5281 		}
5282 	}
5283 	ddi_release_devi(dip);
5284 	return (ret);
5285 }
5286 
5287 /*
5288  * Should this device be kept up by another device?
5289  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5290  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5291  * kept device lists.
5292  */
5293 static int
5294 pm_kept(char *keptp)
5295 {
5296 	PMD_FUNC(pmf, "kept")
5297 	pm_pdr_t *dp;
5298 	int found = 0;
5299 	int ret = 0;
5300 	dev_info_t *keeper;
5301 	dev_info_t *kept;
5302 	size_t length;
5303 	int i;
5304 	char **paths;
5305 	char *path;
5306 
5307 	ASSERT(keptp != NULL);
5308 	kept = pm_name_to_dip(keptp, 1);
5309 	if (kept == NULL)
5310 		return (0);
5311 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5312 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5313 		if (dp->pdr_isprop) {
5314 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5315 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5316 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5317 				/*
5318 				 * Dont allow self dependency.
5319 				 */
5320 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5321 					continue;
5322 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5323 				if (keeper == NULL)
5324 					continue;
5325 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5326 				    "%p\n", pmf, (void *)kept))
5327 #ifdef DEBUG
5328 				if (pm_debug & PMD_DEP)
5329 					prdeps("Before Adding from pm_kept\n");
5330 #endif
5331 				/*
5332 				 * Add ourselves to the dip list.
5333 				 */
5334 				if (dp->pdr_kept_count == 0) {
5335 					length = strlen(keptp) + 1;
5336 					path =
5337 					    kmem_alloc(length, KM_SLEEP);
5338 					paths = kmem_alloc(sizeof (char **),
5339 					    KM_SLEEP);
5340 					(void) strcpy(path, keptp);
5341 					paths[0] = path;
5342 					dp->pdr_kept_paths = paths;
5343 					dp->pdr_kept_count++;
5344 				} else {
5345 					/* Check to see if already on list */
5346 					for (i = 0; i < dp->pdr_kept_count;
5347 					    i++) {
5348 						if (strcmp(keptp,
5349 						    dp->pdr_kept_paths[i])
5350 						    == 0) {
5351 							found++;
5352 							break;
5353 						}
5354 					}
5355 					if (found) {
5356 						ddi_release_devi(keeper);
5357 						continue;
5358 					}
5359 					length = dp->pdr_kept_count *
5360 					    sizeof (char **);
5361 					paths = kmem_alloc(
5362 					    length + sizeof (char **),
5363 					    KM_SLEEP);
5364 					if (dp->pdr_kept_count) {
5365 						bcopy(dp->pdr_kept_paths,
5366 						    paths, length);
5367 						kmem_free(dp->pdr_kept_paths,
5368 						    length);
5369 					}
5370 					dp->pdr_kept_paths = paths;
5371 					length = strlen(keptp) + 1;
5372 					path =
5373 					    kmem_alloc(length, KM_SLEEP);
5374 					(void) strcpy(path, keptp);
5375 					dp->pdr_kept_paths[i] = path;
5376 					dp->pdr_kept_count++;
5377 				}
5378 #ifdef DEBUG
5379 				if (pm_debug & PMD_DEP)
5380 					prdeps("After from pm_kept\n");
5381 #endif
5382 				if (keeper) {
5383 					ret += pm_set_keeping(keeper, kept);
5384 					ddi_release_devi(keeper);
5385 				}
5386 			}
5387 		} else {
5388 			/*
5389 			 * pm_keeper would be called later to do
5390 			 * the actual pm_set_keeping.
5391 			 */
5392 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5393 			    pmf, (void *)kept))
5394 #ifdef DEBUG
5395 			if (pm_debug & PMD_DEP)
5396 				prdeps("Before Adding from pm_kept\n");
5397 #endif
5398 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5399 				if (dp->pdr_kept_paths == NULL) {
5400 					length = strlen(keptp) + 1;
5401 					path =
5402 					    kmem_alloc(length, KM_SLEEP);
5403 					paths = kmem_alloc(sizeof (char **),
5404 					    KM_SLEEP);
5405 					(void) strcpy(path, keptp);
5406 					paths[0] = path;
5407 					dp->pdr_kept_paths = paths;
5408 					dp->pdr_kept_count++;
5409 				}
5410 			}
5411 #ifdef DEBUG
5412 			if (pm_debug & PMD_DEP)
5413 				prdeps("After from pm_kept\n");
5414 #endif
5415 		}
5416 	}
5417 	ddi_release_devi(kept);
5418 	return (ret);
5419 }
5420 
5421 /*
5422  * Apply a recorded dependency.  dp specifies the dependency, and
5423  * keeper is already known to be the device that keeps up the other (kept) one.
5424  * We have to the whole tree for the "kept" device, then apply
5425  * the dependency (which may already be applied).
5426  */
5427 int
5428 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5429 {
5430 	PMD_FUNC(pmf, "apply_recorded_dep")
5431 	dev_info_t *kept = NULL;
5432 	int ret = 0;
5433 	char *keptp = NULL;
5434 
5435 	/*
5436 	 * Device to Device dependency can only be 1 to 1.
5437 	 */
5438 	if (dp->pdr_kept_paths == NULL)
5439 		return (0);
5440 	keptp = dp->pdr_kept_paths[0];
5441 	if (keptp == NULL)
5442 		return (0);
5443 	ASSERT(*keptp != '\0');
5444 	kept = pm_name_to_dip(keptp, 1);
5445 	if (kept == NULL)
5446 		return (0);
5447 	if (kept) {
5448 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5449 		    dp->pdr_keeper, keptp))
5450 		if (pm_set_keeping(keeper, kept)) {
5451 			ASSERT(dp->pdr_satisfied == 0);
5452 			dp->pdr_satisfied = 1;
5453 			ASSERT(pm_unresolved_deps);
5454 			pm_unresolved_deps--;
5455 			ret++;
5456 		}
5457 	}
5458 	ddi_release_devi(kept);
5459 
5460 	return (ret);
5461 }
5462 
5463 /*
5464  * Called from common/io/pm.c
5465  */
5466 int
5467 pm_cur_power(pm_component_t *cp)
5468 {
5469 	return (cur_power(cp));
5470 }
5471 
5472 /*
5473  * External interface to sanity-check a power level.
5474  */
5475 int
5476 pm_valid_power(dev_info_t *dip, int comp, int level)
5477 {
5478 	PMD_FUNC(pmf, "valid_power")
5479 
5480 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5481 		return (e_pm_valid_power(dip, comp, level));
5482 	else {
5483 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5484 		    pmf, comp, PM_NUMCMPTS(dip), level))
5485 		return (0);
5486 	}
5487 }
5488 
5489 /*
5490  * Called when a device that is direct power managed needs to change state.
5491  * This routine arranges to block the request until the process managing
5492  * the device makes the change (or some other incompatible change) or
5493  * the process closes /dev/pm.
5494  */
5495 static int
5496 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5497 {
5498 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5499 	int ret = 0;
5500 	void pm_dequeue_blocked(pm_rsvp_t *);
5501 	void pm_enqueue_blocked(pm_rsvp_t *);
5502 
5503 	ASSERT(!pm_processes_stopped);
5504 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5505 	new->pr_dip = dip;
5506 	new->pr_comp = comp;
5507 	new->pr_newlevel = newpower;
5508 	new->pr_oldlevel = oldpower;
5509 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5510 	mutex_enter(&pm_rsvp_lock);
5511 	pm_enqueue_blocked(new);
5512 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5513 	    PM_CANBLOCK_BLOCK);
5514 	PM_UNLOCK_DIP(dip);
5515 	/*
5516 	 * truss may make the cv_wait_sig return prematurely
5517 	 */
5518 	while (ret == 0) {
5519 		/*
5520 		 * Normally there will be no user context involved, but if
5521 		 * there is (e.g. we are here via an ioctl call to a driver)
5522 		 * then we should allow the process to abort the request,
5523 		 * or we get an unkillable process if the same thread does
5524 		 * PM_DIRECT_PM and pm_raise_power
5525 		 */
5526 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5527 			ret = PMP_FAIL;
5528 		} else {
5529 			ret = new->pr_retval;
5530 		}
5531 	}
5532 	pm_dequeue_blocked(new);
5533 	mutex_exit(&pm_rsvp_lock);
5534 	cv_destroy(&new->pr_cv);
5535 	kmem_free(new, sizeof (*new));
5536 	return (ret);
5537 }
5538 
5539 /*
5540  * Returns true if the process is interested in power level changes (has issued
5541  * PM_GET_STATE_CHANGE ioctl).
5542  */
5543 int
5544 pm_interest_registered(int clone)
5545 {
5546 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5547 	return (pm_interest[clone]);
5548 }
5549 
5550 /*
5551  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5552  * watch all state transitions (dip == NULL).  Set up data
5553  * structs to communicate with process about state changes.
5554  */
5555 void
5556 pm_register_watcher(int clone, dev_info_t *dip)
5557 {
5558 	pscc_t	*p;
5559 	psce_t	*psce;
5560 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5561 
5562 	/*
5563 	 * We definitely need a control struct, then we have to search to see
5564 	 * there is already an entries struct (in the dip != NULL case).
5565 	 */
5566 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5567 	pscc->pscc_clone = clone;
5568 	pscc->pscc_dip = dip;
5569 
5570 	if (dip) {
5571 		int found = 0;
5572 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5573 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5574 			/*
5575 			 * Already an entry for this clone, so just use it
5576 			 * for the new one (for the case where a single
5577 			 * process is watching multiple devices)
5578 			 */
5579 			if (p->pscc_clone == clone) {
5580 				pscc->pscc_entries = p->pscc_entries;
5581 				pscc->pscc_entries->psce_references++;
5582 				found++;
5583 				break;
5584 			}
5585 		}
5586 		if (!found) {		/* create a new one */
5587 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5588 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5589 			psce->psce_first =
5590 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5591 			    KM_SLEEP);
5592 			psce->psce_in = psce->psce_out = psce->psce_first;
5593 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5594 			psce->psce_references = 1;
5595 			pscc->pscc_entries = psce;
5596 		}
5597 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5598 		rw_exit(&pm_pscc_direct_rwlock);
5599 	} else {
5600 		ASSERT(!pm_interest_registered(clone));
5601 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5602 #ifdef DEBUG
5603 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5604 			/*
5605 			 * Should not be an entry for this clone!
5606 			 */
5607 			ASSERT(p->pscc_clone != clone);
5608 		}
5609 #endif
5610 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5611 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5612 		    PSCCOUNT, KM_SLEEP);
5613 		psce->psce_in = psce->psce_out = psce->psce_first;
5614 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5615 		psce->psce_references = 1;
5616 		pscc->pscc_entries = psce;
5617 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5618 		pm_interest[clone] = 1;
5619 		rw_exit(&pm_pscc_interest_rwlock);
5620 	}
5621 }
5622 
5623 /*
5624  * Remove the given entry from the blocked list
5625  */
5626 void
5627 pm_dequeue_blocked(pm_rsvp_t *p)
5628 {
5629 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5630 	if (pm_blocked_list == p) {
5631 		ASSERT(p->pr_prev == NULL);
5632 		if (p->pr_next != NULL)
5633 			p->pr_next->pr_prev = NULL;
5634 		pm_blocked_list = p->pr_next;
5635 	} else {
5636 		ASSERT(p->pr_prev != NULL);
5637 		p->pr_prev->pr_next = p->pr_next;
5638 		if (p->pr_next != NULL)
5639 			p->pr_next->pr_prev = p->pr_prev;
5640 	}
5641 }
5642 
5643 /*
5644  * Remove the given control struct from the given list
5645  */
5646 static void
5647 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5648 {
5649 	if (*list == p) {
5650 		ASSERT(p->pscc_prev == NULL);
5651 		if (p->pscc_next != NULL)
5652 			p->pscc_next->pscc_prev = NULL;
5653 		*list = p->pscc_next;
5654 	} else {
5655 		ASSERT(p->pscc_prev != NULL);
5656 		p->pscc_prev->pscc_next = p->pscc_next;
5657 		if (p->pscc_next != NULL)
5658 			p->pscc_next->pscc_prev = p->pscc_prev;
5659 	}
5660 }
5661 
5662 /*
5663  * Stick the control struct specified on the front of the list
5664  */
5665 static void
5666 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5667 {
5668 	pscc_t *h;	/* entry at head of list */
5669 	if ((h = *list) == NULL) {
5670 		*list = p;
5671 		ASSERT(p->pscc_next == NULL);
5672 		ASSERT(p->pscc_prev == NULL);
5673 	} else {
5674 		p->pscc_next = h;
5675 		ASSERT(h->pscc_prev == NULL);
5676 		h->pscc_prev = p;
5677 		ASSERT(p->pscc_prev == NULL);
5678 		*list = p;
5679 	}
5680 }
5681 
5682 /*
5683  * If dip is NULL, process is closing "clone" clean up all its registrations.
5684  * Otherwise only clean up those for dip because process is just giving up
5685  * control of a direct device.
5686  */
5687 void
5688 pm_deregister_watcher(int clone, dev_info_t *dip)
5689 {
5690 	pscc_t	*p, *pn;
5691 	psce_t	*psce;
5692 	int found = 0;
5693 
5694 	if (dip == NULL) {
5695 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5696 		for (p = pm_pscc_interest; p; p = pn) {
5697 			pn = p->pscc_next;
5698 			if (p->pscc_clone == clone) {
5699 				pm_dequeue_pscc(p, &pm_pscc_interest);
5700 				psce = p->pscc_entries;
5701 				ASSERT(psce->psce_references == 1);
5702 				mutex_destroy(&psce->psce_lock);
5703 				kmem_free(psce->psce_first,
5704 				    sizeof (pm_state_change_t) * PSCCOUNT);
5705 				kmem_free(psce, sizeof (*psce));
5706 				kmem_free(p, sizeof (*p));
5707 			}
5708 		}
5709 		pm_interest[clone] = 0;
5710 		rw_exit(&pm_pscc_interest_rwlock);
5711 	}
5712 	found = 0;
5713 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5714 	for (p = pm_pscc_direct; p; p = pn) {
5715 		pn = p->pscc_next;
5716 		if ((dip && p->pscc_dip == dip) ||
5717 		    (dip == NULL && clone == p->pscc_clone)) {
5718 			ASSERT(clone == p->pscc_clone);
5719 			found++;
5720 			/*
5721 			 * Remove from control list
5722 			 */
5723 			pm_dequeue_pscc(p, &pm_pscc_direct);
5724 			/*
5725 			 * If we're the last reference, free the
5726 			 * entries struct.
5727 			 */
5728 			psce = p->pscc_entries;
5729 			ASSERT(psce);
5730 			if (psce->psce_references == 1) {
5731 				kmem_free(psce->psce_first,
5732 				    PSCCOUNT * sizeof (pm_state_change_t));
5733 				kmem_free(psce, sizeof (*psce));
5734 			} else {
5735 				psce->psce_references--;
5736 			}
5737 			kmem_free(p, sizeof (*p));
5738 		}
5739 	}
5740 	ASSERT(dip == NULL || found);
5741 	rw_exit(&pm_pscc_direct_rwlock);
5742 }
5743 
5744 /*
5745  * Search the indicated list for an entry that matches clone, and return a
5746  * pointer to it.  To be interesting, the entry must have something ready to
5747  * be passed up to the controlling process.
5748  * The returned entry will be locked upon return from this call.
5749  */
5750 static psce_t *
5751 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5752 {
5753 	pscc_t	*p;
5754 	psce_t	*psce;
5755 	rw_enter(lock, RW_READER);
5756 	for (p = *list; p; p = p->pscc_next) {
5757 		if (clone == p->pscc_clone) {
5758 			psce = p->pscc_entries;
5759 			mutex_enter(&psce->psce_lock);
5760 			if (psce->psce_out->size) {
5761 				rw_exit(lock);
5762 				return (psce);
5763 			} else {
5764 				mutex_exit(&psce->psce_lock);
5765 			}
5766 		}
5767 	}
5768 	rw_exit(lock);
5769 	return (NULL);
5770 }
5771 
5772 /*
5773  * Find an entry for a particular clone in the direct list.
5774  */
5775 psce_t *
5776 pm_psc_clone_to_direct(int clone)
5777 {
5778 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5779 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5780 	    &pm_pscc_direct_rwlock));
5781 }
5782 
5783 /*
5784  * Find an entry for a particular clone in the interest list.
5785  */
5786 psce_t *
5787 pm_psc_clone_to_interest(int clone)
5788 {
5789 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5790 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5791 	    &pm_pscc_interest_rwlock));
5792 }
5793 
5794 /*
5795  * Put the given entry at the head of the blocked list
5796  */
5797 void
5798 pm_enqueue_blocked(pm_rsvp_t *p)
5799 {
5800 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5801 	ASSERT(p->pr_next == NULL);
5802 	ASSERT(p->pr_prev == NULL);
5803 	if (pm_blocked_list != NULL) {
5804 		p->pr_next = pm_blocked_list;
5805 		ASSERT(pm_blocked_list->pr_prev == NULL);
5806 		pm_blocked_list->pr_prev = p;
5807 		pm_blocked_list = p;
5808 	} else {
5809 		pm_blocked_list = p;
5810 	}
5811 }
5812 
5813 /*
5814  * Sets every power managed device back to its default threshold
5815  */
5816 void
5817 pm_all_to_default_thresholds(void)
5818 {
5819 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5820 	    (void *) &pm_system_idle_threshold);
5821 }
5822 
5823 static int
5824 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5825 {
5826 	int thr = (int)(*(int *)arg);
5827 
5828 	if (!PM_GET_PM_INFO(dip))
5829 		return (DDI_WALK_CONTINUE);
5830 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5831 	return (DDI_WALK_CONTINUE);
5832 }
5833 
5834 /*
5835  * Returns the current threshold value (in seconds) for the indicated component
5836  */
5837 int
5838 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5839 {
5840 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5841 		return (DDI_FAILURE);
5842 	} else {
5843 		*threshp = cur_threshold(dip, comp);
5844 		return (DDI_SUCCESS);
5845 	}
5846 }
5847 
5848 /*
5849  * To be called when changing the power level of a component of a device.
5850  * On some platforms, changing power on one device may require that power
5851  * be changed on other, related devices in the same transaction.  Thus, we
5852  * always pass this request to the platform power manager so that all the
5853  * affected devices will be locked.
5854  */
5855 void
5856 pm_lock_power(dev_info_t *dip, int *circp)
5857 {
5858 	power_req_t power_req;
5859 	int result;
5860 
5861 	power_req.request_type = PMR_PPM_LOCK_POWER;
5862 	power_req.req.ppm_lock_power_req.who = dip;
5863 	power_req.req.ppm_lock_power_req.circp = circp;
5864 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5865 }
5866 
5867 /*
5868  * Release the lock (or locks) acquired to change the power of a device.
5869  * See comments for pm_lock_power.
5870  */
5871 void
5872 pm_unlock_power(dev_info_t *dip, int circ)
5873 {
5874 	power_req_t power_req;
5875 	int result;
5876 
5877 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5878 	power_req.req.ppm_unlock_power_req.who = dip;
5879 	power_req.req.ppm_unlock_power_req.circ = circ;
5880 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5881 }
5882 
5883 
5884 /*
5885  * Attempt (without blocking) to acquire the lock(s) needed to change the
5886  * power of a component of a device.  See comments for pm_lock_power.
5887  *
5888  * Return: 1 if lock(s) acquired, 0 if not.
5889  */
5890 int
5891 pm_try_locking_power(dev_info_t *dip, int *circp)
5892 {
5893 	power_req_t power_req;
5894 	int result;
5895 
5896 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5897 	power_req.req.ppm_lock_power_req.who = dip;
5898 	power_req.req.ppm_lock_power_req.circp = circp;
5899 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5900 	return (result);
5901 }
5902 
5903 
5904 /*
5905  * Lock power state of a device.
5906  *
5907  * The implementation handles a special case where another thread may have
5908  * acquired the lock and created/launched this thread to do the work.  If
5909  * the lock cannot be acquired immediately, we check to see if this thread
5910  * is registered as a borrower of the lock.  If so, we may proceed without
5911  * the lock.  This assumes that the lending thread blocks on the completion
5912  * of this thread.
5913  *
5914  * Note 1: for use by ppm only.
5915  *
5916  * Note 2: On failing to get the lock immediately, we search lock_loan list
5917  * for curthread (as borrower of the lock).  On a hit, we check that the
5918  * lending thread already owns the lock we want.  It is safe to compare
5919  * devi_busy_thread and thread id of the lender because in the == case (the
5920  * only one we care about) we know that the owner is blocked.  Similarly,
5921  * If we find that curthread isn't registered as a lock borrower, it is safe
5922  * to use the blocking call (ndi_devi_enter) because we know that if we
5923  * weren't already listed as a borrower (upstream on the call stack) we won't
5924  * become one.
5925  */
5926 void
5927 pm_lock_power_single(dev_info_t *dip, int *circp)
5928 {
5929 	lock_loan_t *cur;
5930 
5931 	/* if the lock is available, we are done. */
5932 	if (ndi_devi_tryenter(dip, circp))
5933 		return;
5934 
5935 	mutex_enter(&pm_loan_lock);
5936 	/* see if our thread is registered as a lock borrower. */
5937 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5938 		if (cur->pmlk_borrower == curthread)
5939 			break;
5940 	mutex_exit(&pm_loan_lock);
5941 
5942 	/* if this thread not already registered, it is safe to block */
5943 	if (cur == NULL)
5944 		ndi_devi_enter(dip, circp);
5945 	else {
5946 		/* registered: does lender own the lock we want? */
5947 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5948 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5949 			cur->pmlk_dip = dip;
5950 		} else /* no: just block for it */
5951 			ndi_devi_enter(dip, circp);
5952 
5953 	}
5954 }
5955 
5956 /*
5957  * Drop the lock on the device's power state.  See comment for
5958  * pm_lock_power_single() for special implementation considerations.
5959  *
5960  * Note: for use by ppm only.
5961  */
5962 void
5963 pm_unlock_power_single(dev_info_t *dip, int circ)
5964 {
5965 	lock_loan_t *cur;
5966 
5967 	/* optimization: mutex not needed to check empty list */
5968 	if (lock_loan_head.pmlk_next == NULL) {
5969 		ndi_devi_exit(dip, circ);
5970 		return;
5971 	}
5972 
5973 	mutex_enter(&pm_loan_lock);
5974 	/* see if our thread is registered as a lock borrower. */
5975 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5976 		if (cur->pmlk_borrower == curthread)
5977 			break;
5978 	mutex_exit(&pm_loan_lock);
5979 
5980 	if (cur == NULL || cur->pmlk_dip != dip)
5981 		/* we acquired the lock directly, so return it */
5982 		ndi_devi_exit(dip, circ);
5983 }
5984 
5985 /*
5986  * Try to take the lock for changing the power level of a component.
5987  *
5988  * Note: for use by ppm only.
5989  */
5990 int
5991 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5992 {
5993 	return (ndi_devi_tryenter(dip, circp));
5994 }
5995 
5996 #ifdef	DEBUG
5997 /*
5998  * The following are used only to print out data structures for debugging
5999  */
6000 void
6001 prdeps(char *msg)
6002 {
6003 
6004 	pm_pdr_t *rp;
6005 	int i;
6006 
6007 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
6008 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
6009 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
6010 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
6011 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
6012 		    (void *)rp->pdr_next);
6013 		if (rp->pdr_kept_count != 0) {
6014 			pm_log("kept list = ");
6015 			i = 0;
6016 			while (i < rp->pdr_kept_count) {
6017 				pm_log("%s ", rp->pdr_kept_paths[i]);
6018 				i++;
6019 			}
6020 			pm_log("\n");
6021 		}
6022 	}
6023 }
6024 
6025 void
6026 pr_noinvol(char *hdr)
6027 {
6028 	pm_noinvol_t *ip;
6029 
6030 	pm_log("%s\n", hdr);
6031 	rw_enter(&pm_noinvol_rwlock, RW_READER);
6032 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
6033 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
6034 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
6035 	rw_exit(&pm_noinvol_rwlock);
6036 }
6037 #endif
6038 
6039 /*
6040  * Attempt to apply the thresholds indicated by rp to the node specified by
6041  * dip.
6042  */
6043 void
6044 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6045 {
6046 	PMD_FUNC(pmf, "apply_recorded_thresh")
6047 	int i, j;
6048 	int comps = PM_NUMCMPTS(dip);
6049 	struct pm_component *cp;
6050 	pm_pte_t *ep;
6051 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
6052 
6053 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
6054 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
6055 	PM_LOCK_DIP(dip);
6056 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
6057 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
6058 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
6059 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
6060 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
6061 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
6062 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
6063 		PM_UNLOCK_DIP(dip);
6064 		return;
6065 	}
6066 
6067 	ep = rp->ptr_entries;
6068 	/*
6069 	 * Here we do the special case of a device threshold
6070 	 */
6071 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6072 		ASSERT(ep && ep->pte_numthresh == 1);
6073 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6074 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6075 		PM_UNLOCK_DIP(dip);
6076 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6077 		if (PM_SCANABLE(dip))
6078 			pm_rescan(dip);
6079 		return;
6080 	}
6081 	for (i = 0; i < comps; i++) {
6082 		cp = PM_CP(dip, i);
6083 		for (j = 0; j < ep->pte_numthresh; j++) {
6084 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6085 			    "to %x\n", pmf, j, PM_DEVICE(dip),
6086 			    i, ep->pte_thresh[j]))
6087 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6088 		}
6089 		ep++;
6090 	}
6091 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6092 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6093 	PM_UNLOCK_DIP(dip);
6094 
6095 	if (PM_SCANABLE(dip))
6096 		pm_rescan(dip);
6097 }
6098 
6099 /*
6100  * Returns true if the threshold specified by rp could be applied to dip
6101  * (that is, the number of components and transitions are the same)
6102  */
6103 int
6104 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6105 {
6106 	PMD_FUNC(pmf, "valid_thresh")
6107 	int comps, i;
6108 	pm_component_t *cp;
6109 	pm_pte_t *ep;
6110 
6111 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6112 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6113 		    rp->ptr_physpath))
6114 		return (0);
6115 	}
6116 	/*
6117 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6118 	 * an entry with numcomps == 0, (since we don't know how many
6119 	 * components there are in advance).  This is always a valid
6120 	 * spec.
6121 	 */
6122 	if (rp->ptr_numcomps == 0) {
6123 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6124 		return (1);
6125 	}
6126 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6127 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6128 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6129 		return (0);
6130 	}
6131 	ep = rp->ptr_entries;
6132 	for (i = 0; i < comps; i++) {
6133 		cp = PM_CP(dip, i);
6134 		if ((ep + i)->pte_numthresh !=
6135 		    cp->pmc_comp.pmc_numlevels - 1) {
6136 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6137 			    pmf, rp->ptr_physpath, i,
6138 			    cp->pmc_comp.pmc_numlevels - 1,
6139 			    (ep + i)->pte_numthresh))
6140 			return (0);
6141 		}
6142 	}
6143 	return (1);
6144 }
6145 
6146 /*
6147  * Remove any recorded threshold for device physpath
6148  * We know there will be at most one.
6149  */
6150 void
6151 pm_unrecord_threshold(char *physpath)
6152 {
6153 	pm_thresh_rec_t *pptr, *ptr;
6154 
6155 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6156 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6157 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6158 			if (pptr) {
6159 				pptr->ptr_next = ptr->ptr_next;
6160 			} else {
6161 				ASSERT(pm_thresh_head == ptr);
6162 				pm_thresh_head = ptr->ptr_next;
6163 			}
6164 			kmem_free(ptr, ptr->ptr_size);
6165 			break;
6166 		}
6167 		pptr = ptr;
6168 	}
6169 	rw_exit(&pm_thresh_rwlock);
6170 }
6171 
6172 /*
6173  * Discard all recorded thresholds.  We are returning to the default pm state.
6174  */
6175 void
6176 pm_discard_thresholds(void)
6177 {
6178 	pm_thresh_rec_t *rp;
6179 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6180 	while (pm_thresh_head) {
6181 		rp = pm_thresh_head;
6182 		pm_thresh_head = rp->ptr_next;
6183 		kmem_free(rp, rp->ptr_size);
6184 	}
6185 	rw_exit(&pm_thresh_rwlock);
6186 }
6187 
6188 /*
6189  * Discard all recorded dependencies.  We are returning to the default pm state.
6190  */
6191 void
6192 pm_discard_dependencies(void)
6193 {
6194 	pm_pdr_t *rp;
6195 	int i;
6196 	size_t length;
6197 
6198 #ifdef DEBUG
6199 	if (pm_debug & PMD_DEP)
6200 		prdeps("Before discard\n");
6201 #endif
6202 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6203 
6204 #ifdef DEBUG
6205 	if (pm_debug & PMD_DEP)
6206 		prdeps("After discard\n");
6207 #endif
6208 	while (pm_dep_head) {
6209 		rp = pm_dep_head;
6210 		if (!rp->pdr_isprop) {
6211 			ASSERT(rp->pdr_satisfied == 0);
6212 			ASSERT(pm_unresolved_deps);
6213 			pm_unresolved_deps--;
6214 		} else {
6215 			ASSERT(pm_prop_deps);
6216 			pm_prop_deps--;
6217 		}
6218 		pm_dep_head = rp->pdr_next;
6219 		if (rp->pdr_kept_count)  {
6220 			for (i = 0; i < rp->pdr_kept_count; i++) {
6221 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6222 				kmem_free(rp->pdr_kept_paths[i], length);
6223 			}
6224 			kmem_free(rp->pdr_kept_paths,
6225 			    rp->pdr_kept_count * sizeof (char **));
6226 		}
6227 		kmem_free(rp, rp->pdr_size);
6228 	}
6229 }
6230 
6231 
6232 static int
6233 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6234 {
6235 	_NOTE(ARGUNUSED(arg))
6236 	char *pathbuf;
6237 
6238 	if (PM_GET_PM_INFO(dip) == NULL)
6239 		return (DDI_WALK_CONTINUE);
6240 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6241 	(void) ddi_pathname(dip, pathbuf);
6242 	pm_free_keeper(pathbuf, 0);
6243 	kmem_free(pathbuf, MAXPATHLEN);
6244 	return (DDI_WALK_CONTINUE);
6245 }
6246 
6247 static int
6248 pm_kept_walk(dev_info_t *dip, void *arg)
6249 {
6250 	_NOTE(ARGUNUSED(arg))
6251 	char *pathbuf;
6252 
6253 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6254 	(void) ddi_pathname(dip, pathbuf);
6255 	(void) pm_kept(pathbuf);
6256 	kmem_free(pathbuf, MAXPATHLEN);
6257 
6258 	return (DDI_WALK_CONTINUE);
6259 }
6260 
6261 static int
6262 pm_keeper_walk(dev_info_t *dip, void *arg)
6263 {
6264 	_NOTE(ARGUNUSED(arg))
6265 	char *pathbuf;
6266 
6267 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6268 	(void) ddi_pathname(dip, pathbuf);
6269 	(void) pm_keeper(pathbuf);
6270 	kmem_free(pathbuf, MAXPATHLEN);
6271 
6272 	return (DDI_WALK_CONTINUE);
6273 }
6274 
6275 static char *
6276 pdw_type_decode(int type)
6277 {
6278 	switch (type) {
6279 	case PM_DEP_WK_POWER_ON:
6280 		return ("power on");
6281 	case PM_DEP_WK_POWER_OFF:
6282 		return ("power off");
6283 	case PM_DEP_WK_DETACH:
6284 		return ("detach");
6285 	case PM_DEP_WK_REMOVE_DEP:
6286 		return ("remove dep");
6287 	case PM_DEP_WK_BRINGUP_SELF:
6288 		return ("bringup self");
6289 	case PM_DEP_WK_RECORD_KEEPER:
6290 		return ("add dependent");
6291 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6292 		return ("add dependent property");
6293 	case PM_DEP_WK_KEPT:
6294 		return ("kept");
6295 	case PM_DEP_WK_KEEPER:
6296 		return ("keeper");
6297 	case PM_DEP_WK_ATTACH:
6298 		return ("attach");
6299 	case PM_DEP_WK_CHECK_KEPT:
6300 		return ("check kept");
6301 	case PM_DEP_WK_CPR_SUSPEND:
6302 		return ("suspend");
6303 	case PM_DEP_WK_CPR_RESUME:
6304 		return ("resume");
6305 	default:
6306 		return ("unknown");
6307 	}
6308 
6309 }
6310 
6311 static void
6312 pm_rele_dep(char *keeper)
6313 {
6314 	PMD_FUNC(pmf, "rele_dep")
6315 	pm_pdr_t *dp;
6316 	char *kept_path = NULL;
6317 	dev_info_t *kept = NULL;
6318 	int count = 0;
6319 
6320 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6321 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6322 			continue;
6323 		for (count = 0; count < dp->pdr_kept_count; count++) {
6324 			kept_path = dp->pdr_kept_paths[count];
6325 			if (kept_path == NULL)
6326 				continue;
6327 			kept = pm_name_to_dip(kept_path, 1);
6328 			if (kept) {
6329 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6330 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6331 				    keeper))
6332 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6333 				pm_rele_power(kept);
6334 				ddi_release_devi(kept);
6335 			}
6336 		}
6337 	}
6338 }
6339 
6340 /*
6341  * Called when we are just released from direct PM.  Bring ourself up
6342  * if our keeper is up since dependency is not honored while a kept
6343  * device is under direct PM.
6344  */
6345 static void
6346 pm_bring_self_up(char *keptpath)
6347 {
6348 	PMD_FUNC(pmf, "bring_self_up")
6349 	dev_info_t *kept;
6350 	dev_info_t *keeper;
6351 	pm_pdr_t *dp;
6352 	int i, j;
6353 	int up = 0, circ;
6354 
6355 	kept = pm_name_to_dip(keptpath, 1);
6356 	if (kept == NULL)
6357 		return;
6358 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6359 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6360 		if (dp->pdr_kept_count == 0)
6361 			continue;
6362 		for (i = 0; i < dp->pdr_kept_count; i++) {
6363 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6364 				continue;
6365 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6366 			if (keeper) {
6367 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6368 				    pmf, PM_DEVICE(keeper)))
6369 				PM_LOCK_POWER(keeper, &circ);
6370 				for (j = 0; j < PM_NUMCMPTS(keeper);
6371 				    j++) {
6372 					if (PM_CURPOWER(keeper, j)) {
6373 						PMD(PMD_KEEPS, ("%s: comp="
6374 						    "%d is up\n", pmf, j))
6375 						up++;
6376 					}
6377 				}
6378 				if (up) {
6379 					if (PM_SKBU(kept))
6380 						DEVI(kept)->devi_pm_flags &=
6381 						    ~PMC_SKIP_BRINGUP;
6382 					bring_pmdep_up(kept, 1);
6383 				}
6384 				PM_UNLOCK_POWER(keeper, circ);
6385 				ddi_release_devi(keeper);
6386 			}
6387 		}
6388 	}
6389 	ddi_release_devi(kept);
6390 }
6391 
6392 static void
6393 pm_process_dep_request(pm_dep_wk_t *work)
6394 {
6395 	PMD_FUNC(pmf, "dep_req")
6396 	int ret;
6397 
6398 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6399 	    pdw_type_decode(work->pdw_type)))
6400 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6401 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6402 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6403 
6404 	switch (work->pdw_type) {
6405 	case PM_DEP_WK_POWER_ON:
6406 		/* Bring up the kept devices and put a hold on them */
6407 		bring_wekeeps_up(work->pdw_keeper);
6408 		break;
6409 	case PM_DEP_WK_POWER_OFF:
6410 		/* Release the kept devices */
6411 		pm_rele_dep(work->pdw_keeper);
6412 		break;
6413 	case PM_DEP_WK_DETACH:
6414 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6415 		break;
6416 	case PM_DEP_WK_REMOVE_DEP:
6417 		pm_discard_dependencies();
6418 		break;
6419 	case PM_DEP_WK_BRINGUP_SELF:
6420 		/*
6421 		 * We deferred satisfying our dependency till now, so satisfy
6422 		 * it again and bring ourselves up.
6423 		 */
6424 		pm_bring_self_up(work->pdw_kept);
6425 		break;
6426 	case PM_DEP_WK_RECORD_KEEPER:
6427 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6428 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6429 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6430 		break;
6431 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6432 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6433 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6434 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6435 		break;
6436 	case PM_DEP_WK_KEPT:
6437 		ret = pm_kept(work->pdw_kept);
6438 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6439 		    ret))
6440 		break;
6441 	case PM_DEP_WK_KEEPER:
6442 		ret = pm_keeper(work->pdw_keeper);
6443 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6444 		    pmf, ret))
6445 		break;
6446 	case PM_DEP_WK_ATTACH:
6447 		ret = pm_keeper(work->pdw_keeper);
6448 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6449 		    pmf, ret))
6450 		ret = pm_kept(work->pdw_kept);
6451 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6452 		    pmf, ret))
6453 		break;
6454 	case PM_DEP_WK_CHECK_KEPT:
6455 		ret = pm_is_kept(work->pdw_kept);
6456 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6457 		    pmf, work->pdw_kept, ret))
6458 		break;
6459 	case PM_DEP_WK_CPR_SUSPEND:
6460 		pm_discard_dependencies();
6461 		break;
6462 	case PM_DEP_WK_CPR_RESUME:
6463 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6464 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6465 		break;
6466 	default:
6467 		ASSERT(0);
6468 		break;
6469 	}
6470 	/*
6471 	 * Free the work structure if the requester is not waiting
6472 	 * Otherwise it is the requester's responsiblity to free it.
6473 	 */
6474 	if (!work->pdw_wait) {
6475 		if (work->pdw_keeper)
6476 			kmem_free(work->pdw_keeper,
6477 			    strlen(work->pdw_keeper) + 1);
6478 		if (work->pdw_kept)
6479 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6480 		kmem_free(work, sizeof (pm_dep_wk_t));
6481 	} else {
6482 		/*
6483 		 * Notify requester if it is waiting for it.
6484 		 */
6485 		work->pdw_ret = ret;
6486 		work->pdw_done = 1;
6487 		cv_signal(&work->pdw_cv);
6488 	}
6489 }
6490 
6491 /*
6492  * Process PM dependency requests.
6493  */
6494 static void
6495 pm_dep_thread(void)
6496 {
6497 	pm_dep_wk_t *work;
6498 	callb_cpr_t cprinfo;
6499 
6500 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6501 	    "pm_dep_thread");
6502 	for (;;) {
6503 		mutex_enter(&pm_dep_thread_lock);
6504 		if (pm_dep_thread_workq == NULL) {
6505 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6506 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6507 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6508 		}
6509 		work = pm_dep_thread_workq;
6510 		pm_dep_thread_workq = work->pdw_next;
6511 		if (pm_dep_thread_tail == work)
6512 			pm_dep_thread_tail = work->pdw_next;
6513 		mutex_exit(&pm_dep_thread_lock);
6514 		pm_process_dep_request(work);
6515 
6516 	}
6517 	/*NOTREACHED*/
6518 }
6519 
6520 /*
6521  * Set the power level of the indicated device to unknown (if it is not a
6522  * backwards compatible device), as it has just been resumed, and it won't
6523  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6524  */
6525 void
6526 pm_forget_power_level(dev_info_t *dip)
6527 {
6528 	dev_info_t *pdip = ddi_get_parent(dip);
6529 	int i, count = 0;
6530 
6531 	if (!PM_ISBC(dip)) {
6532 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6533 			count += (PM_CURPOWER(dip, i) == 0);
6534 
6535 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6536 			e_pm_hold_rele_power(pdip, count);
6537 
6538 		/*
6539 		 * Count this as a power cycle if we care
6540 		 */
6541 		if (DEVI(dip)->devi_pm_volpmd &&
6542 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6543 			DEVI(dip)->devi_pm_volpmd = 0;
6544 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6545 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6546 	}
6547 }
6548 
6549 /*
6550  * This function advises the caller whether it should make a power-off
6551  * transition at this time or not.  If the transition is not advised
6552  * at this time, the time that the next power-off transition can
6553  * be made from now is returned through "intervalp" pointer.
6554  * This function returns:
6555  *
6556  *  1  power-off advised
6557  *  0  power-off not advised, intervalp will point to seconds from
6558  *	  now that a power-off is advised.  If it is passed the number
6559  *	  of years that policy specifies the device should last,
6560  *	  a large number is returned as the time interval.
6561  *  -1  error
6562  */
6563 int
6564 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6565 {
6566 	PMD_FUNC(pmf, "pm_trans_check")
6567 	char dbuf[DC_SCSI_MFR_LEN];
6568 	struct pm_scsi_cycles *scp;
6569 	int service_years, service_weeks, full_years;
6570 	time_t now, service_seconds, tdiff;
6571 	time_t within_year, when_allowed;
6572 	char *ptr;
6573 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6574 	int cycles_diff, cycles_over;
6575 
6576 	if (datap == NULL) {
6577 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6578 		return (-1);
6579 	}
6580 
6581 	if (datap->format == DC_SCSI_FORMAT) {
6582 		/*
6583 		 * Power cycles of the scsi drives are distributed
6584 		 * over 5 years with the following percentage ratio:
6585 		 *
6586 		 *	30%, 25%, 20%, 15%, and 10%
6587 		 *
6588 		 * The power cycle quota for each year is distributed
6589 		 * linearly through out the year.  The equation for
6590 		 * determining the expected cycles is:
6591 		 *
6592 		 *	e = a * (n / y)
6593 		 *
6594 		 * e = expected cycles
6595 		 * a = allocated cycles for this year
6596 		 * n = number of seconds since beginning of this year
6597 		 * y = number of seconds in a year
6598 		 *
6599 		 * Note that beginning of the year starts the day that
6600 		 * the drive has been put on service.
6601 		 *
6602 		 * If the drive has passed its expected cycles, we
6603 		 * can determine when it can start to power cycle
6604 		 * again to keep it on track to meet the 5-year
6605 		 * life expectancy.  The equation for determining
6606 		 * when to power cycle is:
6607 		 *
6608 		 *	w = y * (c / a)
6609 		 *
6610 		 * w = when it can power cycle again
6611 		 * y = number of seconds in a year
6612 		 * c = current number of cycles
6613 		 * a = allocated cycles for the year
6614 		 *
6615 		 */
6616 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6617 
6618 		scp = &datap->un.scsi_cycles;
6619 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6620 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6621 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6622 		if (scp->ncycles < 0 || scp->flag != 0) {
6623 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6624 			return (-1);
6625 		}
6626 
6627 		if (scp->ncycles > scp->lifemax) {
6628 			*intervalp = (LONG_MAX / hz);
6629 			return (0);
6630 		}
6631 
6632 		/*
6633 		 * convert service date to time_t
6634 		 */
6635 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6636 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6637 		ptr = dbuf;
6638 		service_years = stoi(&ptr) - EPOCH_YEAR;
6639 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6640 		    DC_SCSI_WEEK_LEN);
6641 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6642 
6643 		/*
6644 		 * scsi standard does not specify WW data,
6645 		 * could be (00-51) or (01-52)
6646 		 */
6647 		ptr = dbuf;
6648 		service_weeks = stoi(&ptr);
6649 		if (service_years < 0 ||
6650 		    service_weeks < 0 || service_weeks > 52) {
6651 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6652 			    pmf, service_years, service_weeks))
6653 			return (-1);
6654 		}
6655 
6656 		/*
6657 		 * calculate service date in seconds-since-epoch,
6658 		 * adding one day for each leap-year.
6659 		 *
6660 		 * (years-since-epoch + 2) fixes integer truncation,
6661 		 * example: (8) leap-years during [1972, 2000]
6662 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6663 		 */
6664 		service_seconds = (service_years * DC_SPY) +
6665 		    (service_weeks * DC_SPW) +
6666 		    (((service_years + 2) / 4) * DC_SPD);
6667 
6668 		now = gethrestime_sec();
6669 		/*
6670 		 * since the granularity of 'svc_date' is day not second,
6671 		 * 'now' should be rounded up to full day.
6672 		 */
6673 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6674 		if (service_seconds > now) {
6675 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6676 			    "than now (%ld)!\n", pmf, service_seconds, now))
6677 			return (-1);
6678 		}
6679 
6680 		tdiff = now - service_seconds;
6681 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6682 
6683 		/*
6684 		 * NOTE - Leap years are not considered in the calculations
6685 		 * below.
6686 		 */
6687 		full_years = (tdiff / DC_SPY);
6688 		if ((full_years >= DC_SCSI_NPY) &&
6689 		    (scp->ncycles <= scp->lifemax))
6690 			return (1);
6691 
6692 		/*
6693 		 * Determine what is the normal cycle usage for the
6694 		 * device at the beginning and the end of this year.
6695 		 */
6696 		lower_bound_cycles = (!full_years) ? 0 :
6697 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6698 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6699 
6700 		if (scp->ncycles <= lower_bound_cycles)
6701 			return (1);
6702 
6703 		/*
6704 		 * The linear slope that determines how many cycles
6705 		 * are allowed this year is number of seconds
6706 		 * passed this year over total number of seconds in a year.
6707 		 */
6708 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6709 		within_year = (tdiff % DC_SPY);
6710 		cycles_allowed = lower_bound_cycles +
6711 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6712 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6713 		    full_years, within_year))
6714 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6715 		    cycles_allowed))
6716 
6717 		if (scp->ncycles <= cycles_allowed)
6718 			return (1);
6719 
6720 		/*
6721 		 * The transition is not advised now but we can
6722 		 * determine when the next transition can be made.
6723 		 *
6724 		 * Depending on how many cycles the device has been
6725 		 * over-used, we may need to skip years with
6726 		 * different percentage quota in order to determine
6727 		 * when the next transition can be made.
6728 		 */
6729 		cycles_over = (scp->ncycles - lower_bound_cycles);
6730 		while (cycles_over > cycles_diff) {
6731 			full_years++;
6732 			if (full_years >= DC_SCSI_NPY) {
6733 				*intervalp = (LONG_MAX / hz);
6734 				return (0);
6735 			}
6736 			cycles_over -= cycles_diff;
6737 			lower_bound_cycles = upper_bound_cycles;
6738 			upper_bound_cycles =
6739 			    (scp->lifemax * pcnt[full_years]) / 100;
6740 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6741 		}
6742 
6743 		/*
6744 		 * The linear slope that determines when the next transition
6745 		 * can be made is the relative position of used cycles within a
6746 		 * year over total number of cycles within that year.
6747 		 */
6748 		when_allowed = service_seconds + (full_years * DC_SPY) +
6749 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6750 		*intervalp = (when_allowed - now);
6751 		if (*intervalp > (LONG_MAX / hz))
6752 			*intervalp = (LONG_MAX / hz);
6753 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6754 		    *intervalp))
6755 		return (0);
6756 	}
6757 
6758 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6759 	return (-1);
6760 }
6761 
6762 /*
6763  * Nexus drivers call into pm framework to indicate which child driver is about
6764  * to be installed.  In some platforms, ppm may need to configure the hardware
6765  * for successful installation of a driver.
6766  */
6767 int
6768 pm_init_child(dev_info_t *dip)
6769 {
6770 	power_req_t power_req;
6771 
6772 	ASSERT(ddi_binding_name(dip));
6773 	ASSERT(ddi_get_name_addr(dip));
6774 	pm_ppm_claim(dip);
6775 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6776 		power_req.request_type = PMR_PPM_INIT_CHILD;
6777 		power_req.req.ppm_config_req.who = dip;
6778 		ASSERT(PPM(dip) != NULL);
6779 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6780 		    NULL));
6781 	} else {
6782 #ifdef DEBUG
6783 		/* pass it to the default handler so we can debug things */
6784 		power_req.request_type = PMR_PPM_INIT_CHILD;
6785 		power_req.req.ppm_config_req.who = dip;
6786 		(void) pm_ctlops(NULL, dip,
6787 		    DDI_CTLOPS_POWER, &power_req, NULL);
6788 #endif
6789 	}
6790 	return (DDI_SUCCESS);
6791 }
6792 
6793 /*
6794  * Bring parent of a node that is about to be probed up to full power, and
6795  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6796  * it is time to let it go down again
6797  */
6798 void
6799 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6800 {
6801 	int result;
6802 	power_req_t power_req;
6803 
6804 	bzero(cp, sizeof (*cp));
6805 	cp->ppc_dip = dip;
6806 
6807 	pm_ppm_claim(dip);
6808 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6809 		power_req.request_type = PMR_PPM_PRE_PROBE;
6810 		power_req.req.ppm_config_req.who = dip;
6811 		ASSERT(PPM(dip) != NULL);
6812 		(void) pm_ctlops(PPM(dip), dip,
6813 		    DDI_CTLOPS_POWER, &power_req, &result);
6814 		cp->ppc_ppm = PPM(dip);
6815 	} else {
6816 #ifdef DEBUG
6817 		/* pass it to the default handler so we can debug things */
6818 		power_req.request_type = PMR_PPM_PRE_PROBE;
6819 		power_req.req.ppm_config_req.who = dip;
6820 		(void) pm_ctlops(NULL, dip,
6821 		    DDI_CTLOPS_POWER, &power_req, &result);
6822 #endif
6823 		cp->ppc_ppm = NULL;
6824 	}
6825 }
6826 
6827 int
6828 pm_pre_config(dev_info_t *dip, char *devnm)
6829 {
6830 	PMD_FUNC(pmf, "pre_config")
6831 	int ret;
6832 
6833 	if (MDI_VHCI(dip)) {
6834 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6835 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6836 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6837 	} else if (!PM_GET_PM_INFO(dip))
6838 		return (DDI_SUCCESS);
6839 
6840 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6841 	pm_hold_power(dip);
6842 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6843 	if (ret != DDI_SUCCESS)
6844 		pm_rele_power(dip);
6845 	return (ret);
6846 }
6847 
6848 /*
6849  * This routine is called by devfs during its walk to unconfigue a node.
6850  * If the call is due to auto mod_unloads and the dip is not at its
6851  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6852  * return DDI_SUCCESS.
6853  */
6854 int
6855 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6856 {
6857 	PMD_FUNC(pmf, "pre_unconfig")
6858 	int ret;
6859 
6860 	if (MDI_VHCI(dip)) {
6861 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6862 		    PM_DEVICE(dip), flags))
6863 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6864 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6865 	} else if (!PM_GET_PM_INFO(dip))
6866 		return (DDI_SUCCESS);
6867 
6868 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6869 	    flags))
6870 	*held = 0;
6871 
6872 	/*
6873 	 * If the dip is a leaf node, don't power it up.
6874 	 */
6875 	if (!ddi_get_child(dip))
6876 		return (DDI_SUCCESS);
6877 
6878 	/*
6879 	 * Do not power up the node if it is called due to auto-modunload.
6880 	 */
6881 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6882 		return (DDI_FAILURE);
6883 
6884 	pm_hold_power(dip);
6885 	*held = 1;
6886 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6887 	if (ret != DDI_SUCCESS) {
6888 		pm_rele_power(dip);
6889 		*held = 0;
6890 	}
6891 	return (ret);
6892 }
6893 
6894 /*
6895  * Notify ppm of attach action.  Parent is already held at full power by
6896  * probe action.
6897  */
6898 void
6899 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6900 {
6901 	static char *me = "pm_pre_attach";
6902 	power_req_t power_req;
6903 	int result;
6904 
6905 	/*
6906 	 * Initialize and fill in the PPM cookie
6907 	 */
6908 	bzero(cp, sizeof (*cp));
6909 	cp->ppc_cmd = (int)cmd;
6910 	cp->ppc_ppm = PPM(dip);
6911 	cp->ppc_dip = dip;
6912 
6913 	/*
6914 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6915 	 * Power Management stuff. DDI_RESUME also has to purge it's
6916 	 * powerlevel information.
6917 	 */
6918 	switch (cmd) {
6919 	case DDI_ATTACH:
6920 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6921 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6922 			power_req.req.ppm_config_req.who = dip;
6923 			ASSERT(PPM(dip));
6924 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6925 			    &power_req, &result);
6926 		}
6927 #ifdef DEBUG
6928 		else {
6929 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6930 			power_req.req.ppm_config_req.who = dip;
6931 			(void) pm_ctlops(NULL, dip,
6932 			    DDI_CTLOPS_POWER, &power_req, &result);
6933 		}
6934 #endif
6935 		break;
6936 	case DDI_RESUME:
6937 		pm_forget_power_level(dip);
6938 
6939 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6940 			power_req.request_type = PMR_PPM_PRE_RESUME;
6941 			power_req.req.resume_req.who = cp->ppc_dip;
6942 			power_req.req.resume_req.cmd =
6943 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6944 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6945 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6946 			    DDI_CTLOPS_POWER, &power_req, &result);
6947 		}
6948 #ifdef DEBUG
6949 		else {
6950 			power_req.request_type = PMR_PPM_PRE_RESUME;
6951 			power_req.req.resume_req.who = cp->ppc_dip;
6952 			power_req.req.resume_req.cmd =
6953 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6954 			(void) pm_ctlops(NULL, cp->ppc_dip,
6955 			    DDI_CTLOPS_POWER, &power_req, &result);
6956 		}
6957 #endif
6958 		break;
6959 
6960 	case DDI_PM_RESUME:
6961 		break;
6962 
6963 	default:
6964 		panic(me);
6965 	}
6966 }
6967 
6968 /*
6969  * Nexus drivers call into pm framework to indicate which child driver is
6970  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6971  * hardware since the device driver is no longer installed.
6972  */
6973 int
6974 pm_uninit_child(dev_info_t *dip)
6975 {
6976 	power_req_t power_req;
6977 
6978 	ASSERT(ddi_binding_name(dip));
6979 	ASSERT(ddi_get_name_addr(dip));
6980 	pm_ppm_claim(dip);
6981 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6982 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6983 		power_req.req.ppm_config_req.who = dip;
6984 		ASSERT(PPM(dip));
6985 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6986 		    NULL));
6987 	} else {
6988 #ifdef DEBUG
6989 		/* pass it to the default handler so we can debug things */
6990 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6991 		power_req.req.ppm_config_req.who = dip;
6992 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6993 #endif
6994 	}
6995 	return (DDI_SUCCESS);
6996 }
6997 /*
6998  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
6999  * Also notify ppm of result of probe if there is a ppm that cares
7000  */
7001 void
7002 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
7003 {
7004 	_NOTE(ARGUNUSED(probe_failed))
7005 	int result;
7006 	power_req_t power_req;
7007 
7008 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7009 		power_req.request_type = PMR_PPM_POST_PROBE;
7010 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7011 		power_req.req.ppm_config_req.result = ret;
7012 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7013 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
7014 		    &power_req, &result);
7015 	}
7016 #ifdef DEBUG
7017 	else {
7018 		power_req.request_type = PMR_PPM_POST_PROBE;
7019 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7020 		power_req.req.ppm_config_req.result = ret;
7021 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
7022 		    &power_req, &result);
7023 	}
7024 #endif
7025 }
7026 
7027 void
7028 pm_post_config(dev_info_t *dip, char *devnm)
7029 {
7030 	PMD_FUNC(pmf, "post_config")
7031 
7032 	if (MDI_VHCI(dip)) {
7033 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7034 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
7035 		return;
7036 	} else if (!PM_GET_PM_INFO(dip))
7037 		return;
7038 
7039 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7040 	pm_rele_power(dip);
7041 }
7042 
7043 void
7044 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
7045 {
7046 	PMD_FUNC(pmf, "post_unconfig")
7047 
7048 	if (MDI_VHCI(dip)) {
7049 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
7050 		    PM_DEVICE(dip), held))
7051 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
7052 		return;
7053 	} else if (!PM_GET_PM_INFO(dip))
7054 		return;
7055 
7056 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
7057 	    held))
7058 	if (!held)
7059 		return;
7060 	/*
7061 	 * We have held power in pre_unconfig, release it here.
7062 	 */
7063 	pm_rele_power(dip);
7064 }
7065 
7066 /*
7067  * Notify ppm of result of attach if there is a ppm that cares
7068  */
7069 void
7070 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
7071 {
7072 	int result;
7073 	power_req_t power_req;
7074 	dev_info_t	*dip;
7075 
7076 	if (cp->ppc_cmd != DDI_ATTACH)
7077 		return;
7078 
7079 	dip = cp->ppc_dip;
7080 
7081 	if (ret == DDI_SUCCESS) {
7082 		/*
7083 		 * Attach succeeded, so proceed to doing post-attach pm tasks
7084 		 */
7085 		if (PM_GET_PM_INFO(dip) == NULL)
7086 			(void) pm_start(dip);
7087 	} else {
7088 		/*
7089 		 * Attach may have got pm started before failing
7090 		 */
7091 		pm_stop(dip);
7092 	}
7093 
7094 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7095 		power_req.request_type = PMR_PPM_POST_ATTACH;
7096 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7097 		power_req.req.ppm_config_req.result = ret;
7098 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7099 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7100 		    DDI_CTLOPS_POWER, &power_req, &result);
7101 	}
7102 #ifdef DEBUG
7103 	else {
7104 		power_req.request_type = PMR_PPM_POST_ATTACH;
7105 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7106 		power_req.req.ppm_config_req.result = ret;
7107 		(void) pm_ctlops(NULL, cp->ppc_dip,
7108 		    DDI_CTLOPS_POWER, &power_req, &result);
7109 	}
7110 #endif
7111 }
7112 
7113 /*
7114  * Notify ppm of attach action.  Parent is already held at full power by
7115  * probe action.
7116  */
7117 void
7118 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7119 {
7120 	int result;
7121 	power_req_t power_req;
7122 
7123 	bzero(cp, sizeof (*cp));
7124 	cp->ppc_dip = dip;
7125 	cp->ppc_cmd = (int)cmd;
7126 
7127 	switch (cmd) {
7128 	case DDI_DETACH:
7129 		pm_detaching(dip);		/* suspend pm while detaching */
7130 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7131 			power_req.request_type = PMR_PPM_PRE_DETACH;
7132 			power_req.req.ppm_config_req.who = dip;
7133 			ASSERT(PPM(dip));
7134 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7135 			    &power_req, &result);
7136 			cp->ppc_ppm = PPM(dip);
7137 		} else {
7138 #ifdef DEBUG
7139 			/* pass to the default handler so we can debug things */
7140 			power_req.request_type = PMR_PPM_PRE_DETACH;
7141 			power_req.req.ppm_config_req.who = dip;
7142 			(void) pm_ctlops(NULL, dip,
7143 			    DDI_CTLOPS_POWER, &power_req, &result);
7144 #endif
7145 			cp->ppc_ppm = NULL;
7146 		}
7147 		break;
7148 
7149 	default:
7150 		break;
7151 	}
7152 }
7153 
7154 /*
7155  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7156  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7157  * make an entry to record the details, which includes certain flag settings.
7158  */
7159 static void
7160 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7161     int wasvolpmd, major_t major)
7162 {
7163 	PMD_FUNC(pmf, "record_invol_path")
7164 	major_t pm_path_to_major(char *);
7165 	size_t plen;
7166 	pm_noinvol_t *ip, *np, *pp;
7167 	pp = NULL;
7168 
7169 	plen = strlen(path) + 1;
7170 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7171 	np->ni_size = plen;
7172 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7173 	np->ni_noinvolpm = noinvolpm;
7174 	np->ni_volpmd = volpmd;
7175 	np->ni_wasvolpmd = wasvolpmd;
7176 	np->ni_flags = flags;
7177 	(void) strcpy(np->ni_path, path);
7178 	/*
7179 	 * If we haven't actually seen the node attached, it is hard to figure
7180 	 * out its major.  If we could hold the node by path, we would be much
7181 	 * happier here.
7182 	 */
7183 	if (major == (major_t)-1) {
7184 		np->ni_major = pm_path_to_major(path);
7185 	} else {
7186 		np->ni_major = major;
7187 	}
7188 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7189 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7190 		int comp = strcmp(path, ip->ni_path);
7191 		if (comp < 0) {
7192 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7193 			    pmf, path, ip->ni_path))
7194 			/* insert before current entry */
7195 			np->ni_next = ip;
7196 			if (pp) {
7197 				pp->ni_next = np;
7198 			} else {
7199 				pm_noinvol_head = np;
7200 			}
7201 			rw_exit(&pm_noinvol_rwlock);
7202 #ifdef DEBUG
7203 			if (pm_debug & PMD_NOINVOL)
7204 				pr_noinvol("record_invol_path exit0");
7205 #endif
7206 			return;
7207 		} else if (comp == 0) {
7208 			panic("%s already in pm_noinvol list", path);
7209 		}
7210 	}
7211 	/*
7212 	 * If we did not find an entry in the list that this should go before,
7213 	 * then it must go at the end
7214 	 */
7215 	if (pp) {
7216 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7217 		    pp->ni_path))
7218 		ASSERT(pp->ni_next == 0);
7219 		pp->ni_next = np;
7220 	} else {
7221 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7222 		ASSERT(!pm_noinvol_head);
7223 		pm_noinvol_head = np;
7224 	}
7225 	rw_exit(&pm_noinvol_rwlock);
7226 #ifdef DEBUG
7227 	if (pm_debug & PMD_NOINVOL)
7228 		pr_noinvol("record_invol_path exit");
7229 #endif
7230 }
7231 
7232 void
7233 pm_record_invol(dev_info_t *dip)
7234 {
7235 	char *pathbuf;
7236 	int pm_all_components_off(dev_info_t *);
7237 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7238 
7239 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7240 	(void) ddi_pathname(dip, pathbuf);
7241 
7242 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7243 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7244 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7245 
7246 	/*
7247 	 * If this child's detach will be holding up its ancestors, then we
7248 	 * allow for an exception to that if all children of this type have
7249 	 * gone down voluntarily.
7250 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7251 	 */
7252 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7253 	    dip);
7254 	kmem_free(pathbuf, MAXPATHLEN);
7255 }
7256 
7257 void
7258 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7259 {
7260 	dev_info_t *dip = cp->ppc_dip;
7261 	int result;
7262 	power_req_t power_req;
7263 
7264 	switch (cp->ppc_cmd) {
7265 	case DDI_DETACH:
7266 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7267 			power_req.request_type = PMR_PPM_POST_DETACH;
7268 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7269 			power_req.req.ppm_config_req.result = ret;
7270 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7271 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7272 			    DDI_CTLOPS_POWER, &power_req, &result);
7273 		}
7274 #ifdef DEBUG
7275 		else {
7276 			power_req.request_type = PMR_PPM_POST_DETACH;
7277 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7278 			power_req.req.ppm_config_req.result = ret;
7279 			(void) pm_ctlops(NULL, cp->ppc_dip,
7280 			    DDI_CTLOPS_POWER, &power_req, &result);
7281 		}
7282 #endif
7283 		if (ret == DDI_SUCCESS) {
7284 			/*
7285 			 * For hotplug detach we assume it is *really* gone
7286 			 */
7287 			if (cp->ppc_cmd == DDI_DETACH &&
7288 			    ((DEVI(dip)->devi_pm_flags &
7289 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7290 			    DEVI(dip)->devi_pm_noinvolpm))
7291 				pm_record_invol(dip);
7292 			DEVI(dip)->devi_pm_flags &=
7293 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7294 
7295 			/*
7296 			 * If console fb is detaching, then we don't need to
7297 			 * worry any more about it going off (pm_detaching has
7298 			 * brought up all components)
7299 			 */
7300 			if (PM_IS_CFB(dip)) {
7301 				mutex_enter(&pm_cfb_lock);
7302 				ASSERT(cfb_dip_detaching);
7303 				ASSERT(cfb_dip == NULL);
7304 				ASSERT(pm_cfb_comps_off == 0);
7305 				cfb_dip_detaching = NULL;
7306 				mutex_exit(&pm_cfb_lock);
7307 			}
7308 			pm_stop(dip);	/* make it permanent */
7309 		} else {
7310 			if (PM_IS_CFB(dip)) {
7311 				mutex_enter(&pm_cfb_lock);
7312 				ASSERT(cfb_dip_detaching);
7313 				ASSERT(cfb_dip == NULL);
7314 				ASSERT(pm_cfb_comps_off == 0);
7315 				cfb_dip = cfb_dip_detaching;
7316 				cfb_dip_detaching = NULL;
7317 				mutex_exit(&pm_cfb_lock);
7318 			}
7319 			pm_detach_failed(dip);	/* resume power management */
7320 		}
7321 		break;
7322 	case DDI_PM_SUSPEND:
7323 		break;
7324 	case DDI_SUSPEND:
7325 		break;				/* legal, but nothing to do */
7326 	default:
7327 #ifdef DEBUG
7328 		panic("pm_post_detach: unrecognized cmd %d for detach",
7329 		    cp->ppc_cmd);
7330 		/*NOTREACHED*/
7331 #else
7332 		break;
7333 #endif
7334 	}
7335 }
7336 
7337 /*
7338  * Called after vfs_mountroot has got the clock started to fix up timestamps
7339  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7340  * devices look busy but have a 0 busycnt
7341  */
7342 int
7343 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7344 {
7345 	_NOTE(ARGUNUSED(arg))
7346 
7347 	pm_info_t *info = PM_GET_PM_INFO(dip);
7348 	struct pm_component *cp;
7349 	int i;
7350 
7351 	if (!info)
7352 		return (DDI_WALK_CONTINUE);
7353 	PM_LOCK_BUSY(dip);
7354 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7355 		cp = PM_CP(dip, i);
7356 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7357 			cp->pmc_timestamp = gethrestime_sec();
7358 	}
7359 	PM_UNLOCK_BUSY(dip);
7360 	return (DDI_WALK_CONTINUE);
7361 }
7362 
7363 /*
7364  * Called at attach time to see if the device being attached has a record in
7365  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7366  * parents and set a flag in the dip
7367  */
7368 void
7369 pm_noinvol_specd(dev_info_t *dip)
7370 {
7371 	PMD_FUNC(pmf, "noinvol_specd")
7372 	char *pathbuf;
7373 	pm_noinvol_t *ip, *pp = NULL;
7374 	int wasvolpmd;
7375 	int found = 0;
7376 
7377 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7378 		return;
7379 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7380 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7381 	(void) ddi_pathname(dip, pathbuf);
7382 
7383 	PM_LOCK_DIP(dip);
7384 	DEVI(dip)->devi_pm_volpmd = 0;
7385 	DEVI(dip)->devi_pm_noinvolpm = 0;
7386 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7387 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7388 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7389 		    pmf, pathbuf, ip->ni_path))
7390 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7391 			found++;
7392 			break;
7393 		}
7394 	}
7395 	rw_exit(&pm_noinvol_rwlock);
7396 	if (!found) {
7397 		PM_UNLOCK_DIP(dip);
7398 		kmem_free(pathbuf, MAXPATHLEN);
7399 		return;
7400 	}
7401 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7402 	pp = NULL;
7403 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7404 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7405 		    pmf, pathbuf, ip->ni_path))
7406 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7407 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7408 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7409 			/*
7410 			 * Handle special case of console fb
7411 			 */
7412 			if (PM_IS_CFB(dip)) {
7413 				mutex_enter(&pm_cfb_lock);
7414 				cfb_dip = dip;
7415 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7416 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7417 				mutex_exit(&pm_cfb_lock);
7418 			}
7419 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7420 			ASSERT((DEVI(dip)->devi_pm_flags &
7421 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7422 			    DEVI(dip)->devi_pm_noinvolpm);
7423 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7424 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7425 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7426 			    ip->ni_noinvolpm, ip->ni_volpmd,
7427 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7428 			/*
7429 			 * free the entry in hopes the list will now be empty
7430 			 * and we won't have to search it any more until the
7431 			 * device detaches
7432 			 */
7433 			if (pp) {
7434 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7435 				    pmf, ip->ni_path, pp->ni_path))
7436 				pp->ni_next = ip->ni_next;
7437 			} else {
7438 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7439 				    pmf, ip->ni_path))
7440 				ASSERT(pm_noinvol_head == ip);
7441 				pm_noinvol_head = ip->ni_next;
7442 			}
7443 			PM_UNLOCK_DIP(dip);
7444 			wasvolpmd = ip->ni_wasvolpmd;
7445 			rw_exit(&pm_noinvol_rwlock);
7446 			kmem_free(ip->ni_path, ip->ni_size);
7447 			kmem_free(ip, sizeof (*ip));
7448 			/*
7449 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7450 			 * (and volpmd if appropriate)
7451 			 */
7452 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7453 			    wasvolpmd, pathbuf, dip);
7454 #ifdef DEBUG
7455 			if (pm_debug & PMD_NOINVOL)
7456 				pr_noinvol("noinvol_specd exit");
7457 #endif
7458 			kmem_free(pathbuf, MAXPATHLEN);
7459 			return;
7460 		}
7461 	}
7462 	kmem_free(pathbuf, MAXPATHLEN);
7463 	rw_exit(&pm_noinvol_rwlock);
7464 	PM_UNLOCK_DIP(dip);
7465 }
7466 
7467 int
7468 pm_all_components_off(dev_info_t *dip)
7469 {
7470 	int i;
7471 	pm_component_t *cp;
7472 
7473 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7474 		cp = PM_CP(dip, i);
7475 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7476 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7477 			return (0);
7478 	}
7479 	return (1);	/* all off */
7480 }
7481 
7482 /*
7483  * Make sure that all "no involuntary power cycles" devices are attached.
7484  * Called before doing a cpr suspend to make sure the driver has a say about
7485  * the power cycle
7486  */
7487 int
7488 pm_reattach_noinvol(void)
7489 {
7490 	PMD_FUNC(pmf, "reattach_noinvol")
7491 	pm_noinvol_t *ip;
7492 	char *path;
7493 	dev_info_t *dip;
7494 
7495 	/*
7496 	 * Prevent the modunload thread from unloading any modules until we
7497 	 * have completely stopped all kernel threads.
7498 	 */
7499 	modunload_disable();
7500 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7501 		/*
7502 		 * Forget we'v ever seen any entry
7503 		 */
7504 		ip->ni_persistent = 0;
7505 	}
7506 restart:
7507 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7508 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7509 #ifdef PMDDEBUG
7510 		major_t maj;
7511 		maj = ip->ni_major;
7512 #endif
7513 		path = ip->ni_path;
7514 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7515 			if (ip->ni_persistent) {
7516 				/*
7517 				 * If we weren't able to make this entry
7518 				 * go away, then we give up, as
7519 				 * holding/attaching the driver ought to have
7520 				 * resulted in this entry being deleted
7521 				 */
7522 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7523 				    "(%s|%d)\n", pmf, ip->ni_path,
7524 				    ddi_major_to_name(maj), (int)maj))
7525 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7526 				    ip->ni_path);
7527 				modunload_enable();
7528 				rw_exit(&pm_noinvol_rwlock);
7529 				return (0);
7530 			}
7531 			ip->ni_persistent++;
7532 			rw_exit(&pm_noinvol_rwlock);
7533 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7534 			dip = e_ddi_hold_devi_by_path(path, 0);
7535 			if (dip == NULL) {
7536 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7537 				    pmf, path, (int)maj))
7538 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7539 				    "driver", path);
7540 				modunload_enable();
7541 				return (0);
7542 			} else {
7543 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7544 				/*
7545 				 * Since the modunload thread is stopped, we
7546 				 * don't have to keep the driver held, which
7547 				 * saves a ton of bookkeeping
7548 				 */
7549 				ddi_release_devi(dip);
7550 				goto restart;
7551 			}
7552 		} else {
7553 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7554 			    pmf, ip->ni_path))
7555 			continue;
7556 		}
7557 	}
7558 	rw_exit(&pm_noinvol_rwlock);
7559 	return (1);
7560 }
7561 
7562 void
7563 pm_reattach_noinvol_fini(void)
7564 {
7565 	modunload_enable();
7566 }
7567 
7568 /*
7569  * Display pm support code
7570  */
7571 
7572 
7573 /*
7574  * console frame-buffer power-mgmt gets enabled when debugging
7575  * services are not present or console fbpm override is set
7576  */
7577 void
7578 pm_cfb_setup(const char *stdout_path)
7579 {
7580 	PMD_FUNC(pmf, "cfb_setup")
7581 	extern int obpdebug;
7582 	char *devname;
7583 	dev_info_t *dip;
7584 	int devname_len;
7585 	extern dev_info_t *fbdip;
7586 
7587 	/*
7588 	 * By virtue of this function being called (from consconfig),
7589 	 * we know stdout is a framebuffer.
7590 	 */
7591 	stdout_is_framebuffer = 1;
7592 
7593 	if (obpdebug || (boothowto & RB_DEBUG)) {
7594 		if (pm_cfb_override == 0) {
7595 			/*
7596 			 * Console is frame buffer, but we want to suppress
7597 			 * pm on it because of debugging setup
7598 			 */
7599 			pm_cfb_enabled = 0;
7600 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7601 			    "console power management.");
7602 			/*
7603 			 * however, we still need to know which is the console
7604 			 * fb in order to suppress pm on it
7605 			 */
7606 		} else {
7607 			cmn_err(CE_WARN, "Kernel debugger present: see "
7608 			    "kmdb(1M) for interaction with power management.");
7609 		}
7610 	}
7611 #ifdef DEBUG
7612 	/*
7613 	 * IF console is fb and is power managed, don't do prom_printfs from
7614 	 * pm debug macro
7615 	 */
7616 	if (pm_cfb_enabled && !pm_debug_to_console) {
7617 		if (pm_debug)
7618 			prom_printf("pm debug output will be to log only\n");
7619 		pm_divertdebug++;
7620 	}
7621 #endif
7622 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7623 	devname_len = strlen(devname) + 1;
7624 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7625 	/* if the driver is attached */
7626 	if ((dip = fbdip) != NULL) {
7627 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7628 		    PM_DEVICE(dip)))
7629 		/*
7630 		 * We set up here as if the driver were power manageable in case
7631 		 * we get a later attach of a pm'able driver (which would result
7632 		 * in a panic later)
7633 		 */
7634 		cfb_dip = dip;
7635 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7636 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7637 		    PM_DEVICE(dip)))
7638 #ifdef DEBUG
7639 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7640 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7641 			    pmf, PM_DEVICE(dip)))
7642 		}
7643 #endif
7644 	} else {
7645 		char *ep;
7646 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7647 		pm_record_invol_path(devname,
7648 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7649 		    (major_t)-1);
7650 		for (ep = strrchr(devname, '/'); ep != devname;
7651 		    ep = strrchr(devname, '/')) {
7652 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7653 			*ep = '\0';
7654 			dip = pm_name_to_dip(devname, 0);
7655 			if (dip != NULL) {
7656 				/*
7657 				 * Walk up the tree incrementing
7658 				 * devi_pm_noinvolpm
7659 				 */
7660 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7661 				    0, 0, devname, dip);
7662 				break;
7663 			} else {
7664 				pm_record_invol_path(devname,
7665 				    PMC_NO_INVOL, 1, 0, 0, (major_t)-1);
7666 			}
7667 		}
7668 	}
7669 	kmem_free(devname, devname_len);
7670 }
7671 
7672 void
7673 pm_cfb_rele(void)
7674 {
7675 	mutex_enter(&pm_cfb_lock);
7676 	/*
7677 	 * this call isn't using the console any  more, it is ok to take it
7678 	 * down if the count goes to 0
7679 	 */
7680 	cfb_inuse--;
7681 	mutex_exit(&pm_cfb_lock);
7682 }
7683 
7684 /*
7685  * software interrupt handler for fbpm; this function exists because we can't
7686  * bring up the frame buffer power from above lock level.  So if we need to,
7687  * we instead schedule a softint that runs this routine and takes us into
7688  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7689  */
7690 static uint_t
7691 pm_cfb_softint(caddr_t int_handler_arg)
7692 {
7693 	_NOTE(ARGUNUSED(int_handler_arg))
7694 	int rval = DDI_INTR_UNCLAIMED;
7695 
7696 	mutex_enter(&pm_cfb_lock);
7697 	if (pm_soft_pending) {
7698 		mutex_exit(&pm_cfb_lock);
7699 		debug_enter((char *)NULL);
7700 		/* acquired in debug_enter before calling pm_cfb_trigger */
7701 		pm_cfb_rele();
7702 		mutex_enter(&pm_cfb_lock);
7703 		pm_soft_pending = 0;
7704 		mutex_exit(&pm_cfb_lock);
7705 		rval = DDI_INTR_CLAIMED;
7706 	} else
7707 		mutex_exit(&pm_cfb_lock);
7708 
7709 	return (rval);
7710 }
7711 
7712 void
7713 pm_cfb_setup_intr(void)
7714 {
7715 	PMD_FUNC(pmf, "cfb_setup_intr")
7716 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7717 	void pm_cfb_check_and_powerup(void);
7718 
7719 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7720 #ifdef PMDDEBUG
7721 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7722 #endif
7723 
7724 	if (!stdout_is_framebuffer) {
7725 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7726 		return;
7727 	}
7728 
7729 	/*
7730 	 * setup software interrupt handler
7731 	 */
7732 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7733 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7734 		panic("pm: unable to register soft intr.");
7735 
7736 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7737 }
7738 
7739 /*
7740  * Checks to see if it is safe to write to the console wrt power management
7741  * (i.e. if the console is a framebuffer, then it must be at full power)
7742  * returns 1 when power is off (power-up is needed)
7743  * returns 0 when power is on (power-up not needed)
7744  */
7745 int
7746 pm_cfb_check_and_hold(void)
7747 {
7748 	/*
7749 	 * cfb_dip is set iff console is a power manageable frame buffer
7750 	 * device
7751 	 */
7752 	extern int modrootloaded;
7753 
7754 	mutex_enter(&pm_cfb_lock);
7755 	cfb_inuse++;
7756 	ASSERT(cfb_inuse);	/* wrap? */
7757 	if (modrootloaded && cfb_dip) {
7758 		/*
7759 		 * don't power down the frame buffer, the prom is using it
7760 		 */
7761 		if (pm_cfb_comps_off) {
7762 			mutex_exit(&pm_cfb_lock);
7763 			return (1);
7764 		}
7765 	}
7766 	mutex_exit(&pm_cfb_lock);
7767 	return (0);
7768 }
7769 
7770 /*
7771  * turn on cfb power (which is known to be off).
7772  * Must be called below lock level!
7773  */
7774 void
7775 pm_cfb_powerup(void)
7776 {
7777 	pm_info_t *info;
7778 	int norm;
7779 	int ccount, ci;
7780 	int unused;
7781 #ifdef DEBUG
7782 	/*
7783 	 * Can't reenter prom_prekern, so suppress pm debug messages
7784 	 * (still go to circular buffer).
7785 	 */
7786 	mutex_enter(&pm_debug_lock);
7787 	pm_divertdebug++;
7788 	mutex_exit(&pm_debug_lock);
7789 #endif
7790 	info = PM_GET_PM_INFO(cfb_dip);
7791 	ASSERT(info);
7792 
7793 	ccount = PM_NUMCMPTS(cfb_dip);
7794 	for (ci = 0; ci < ccount; ci++) {
7795 		norm = pm_get_normal_power(cfb_dip, ci);
7796 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7797 		    PM_CANBLOCK_BYPASS, 0, &unused);
7798 	}
7799 #ifdef DEBUG
7800 	mutex_enter(&pm_debug_lock);
7801 	pm_divertdebug--;
7802 	mutex_exit(&pm_debug_lock);
7803 #endif
7804 }
7805 
7806 /*
7807  * Check if the console framebuffer is powered up.  If not power it up.
7808  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7809  * must be released by calling pm_cfb_rele when the console fb operation
7810  * is completed.
7811  */
7812 void
7813 pm_cfb_check_and_powerup(void)
7814 {
7815 	if (pm_cfb_check_and_hold())
7816 		pm_cfb_powerup();
7817 }
7818 
7819 /*
7820  * Trigger a low level interrupt to power up console frame buffer.
7821  */
7822 void
7823 pm_cfb_trigger(void)
7824 {
7825 	if (cfb_dip == NULL)
7826 		return;
7827 
7828 	mutex_enter(&pm_cfb_lock);
7829 	/*
7830 	 * If machine appears to be hung, pulling the keyboard connector of
7831 	 * the console will cause a high level interrupt and go to debug_enter.
7832 	 * But, if the fb is powered down, this routine will be called to bring
7833 	 * it up (by generating a softint to do the work).  If soft interrupts
7834 	 * are not running, and the keyboard connector is pulled again, the
7835 	 * following code detects this condition and calls panic which allows
7836 	 * the fb to be brought up from high level.
7837 	 *
7838 	 * If two nearly simultaneous calls to debug_enter occur (both from
7839 	 * high level) the code described above will cause a panic.
7840 	 */
7841 	if (lbolt <= pm_soft_pending) {
7842 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7843 		panic(panicstr);	/* does a power up at any intr level */
7844 		/* NOTREACHED */
7845 	}
7846 	pm_soft_pending = lbolt;
7847 	mutex_exit(&pm_cfb_lock);
7848 	ddi_trigger_softintr(pm_soft_id);
7849 }
7850 
7851 major_t
7852 pm_path_to_major(char *path)
7853 {
7854 	PMD_FUNC(pmf, "path_to_major")
7855 	char *np, *ap, *bp;
7856 	major_t ret;
7857 	size_t len;
7858 	static major_t i_path_to_major(char *, char *);
7859 
7860 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7861 
7862 	np = strrchr(path, '/');
7863 	if (np != NULL)
7864 		np++;
7865 	else
7866 		np = path;
7867 	len = strlen(np) + 1;
7868 	bp = kmem_alloc(len, KM_SLEEP);
7869 	(void) strcpy(bp, np);
7870 	if ((ap = strchr(bp, '@')) != NULL) {
7871 		*ap = '\0';
7872 	}
7873 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7874 	ret = i_path_to_major(path, np);
7875 	kmem_free(bp, len);
7876 	return (ret);
7877 }
7878 
7879 #ifdef DEBUG
7880 #ifndef sparc
7881 clock_t pt_sleep = 1;
7882 #endif
7883 
7884 char	*pm_msgp;
7885 char	*pm_bufend;
7886 char	*pm_msgbuf = NULL;
7887 int	pm_logpages = 0x100;
7888 #include <sys/sunldi.h>
7889 #include <sys/uio.h>
7890 clock_t	pm_log_sleep = 1000;
7891 int	pm_extra_cr = 1;
7892 volatile int pm_tty = 1;
7893 
7894 #define	PMLOGPGS	pm_logpages
7895 
7896 #if defined(__x86)
7897 void pm_printf(char *s);
7898 #endif
7899 
7900 /*PRINTFLIKE1*/
7901 void
7902 pm_log(const char *fmt, ...)
7903 {
7904 	va_list adx;
7905 	size_t size;
7906 
7907 	mutex_enter(&pm_debug_lock);
7908 	if (pm_msgbuf == NULL) {
7909 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7910 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7911 		pm_msgp = pm_msgbuf;
7912 	}
7913 	va_start(adx, fmt);
7914 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7915 	va_end(adx);
7916 	va_start(adx, fmt);
7917 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7918 		bzero(pm_msgp, pm_bufend - pm_msgp);
7919 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7920 		if (!pm_divertdebug)
7921 			prom_printf("%s", pm_msgp);
7922 #if defined(__x86)
7923 		if (pm_tty) {
7924 			pm_printf(pm_msgp);
7925 			if (pm_extra_cr)
7926 				pm_printf("\r");
7927 		}
7928 #endif
7929 		pm_msgp = pm_msgbuf + size;
7930 	} else {
7931 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7932 #if defined(__x86)
7933 		if (pm_tty) {
7934 			pm_printf(pm_msgp);
7935 			if (pm_extra_cr)
7936 				pm_printf("\r");
7937 		}
7938 #endif
7939 		if (!pm_divertdebug)
7940 			prom_printf("%s", pm_msgp);
7941 		pm_msgp += size;
7942 	}
7943 	va_end(adx);
7944 	mutex_exit(&pm_debug_lock);
7945 	drv_usecwait((clock_t)pm_log_sleep);
7946 }
7947 #endif	/* DEBUG */
7948 
7949 /*
7950  * We want to save the state of any directly pm'd devices over the suspend/
7951  * resume process so that we can put them back the way the controlling
7952  * process left them.
7953  */
7954 void
7955 pm_save_direct_levels(void)
7956 {
7957 	pm_processes_stopped = 1;
7958 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7959 }
7960 
7961 static int
7962 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7963 {
7964 	_NOTE(ARGUNUSED(arg))
7965 	int i;
7966 	int *ip;
7967 	pm_info_t *info = PM_GET_PM_INFO(dip);
7968 
7969 	if (!info)
7970 		return (DDI_WALK_CONTINUE);
7971 
7972 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7973 		if (PM_NUMCMPTS(dip) > 2) {
7974 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7975 			    sizeof (int), KM_SLEEP);
7976 			ip = info->pmi_lp;
7977 		} else {
7978 			ip = info->pmi_levels;
7979 		}
7980 		/* autopm and processes are stopped, ok not to lock power */
7981 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7982 			*ip++ = PM_CURPOWER(dip, i);
7983 		/*
7984 		 * There is a small window between stopping the
7985 		 * processes and setting pm_processes_stopped where
7986 		 * a driver could get hung up in a pm_raise_power()
7987 		 * call.  Free any such driver now.
7988 		 */
7989 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7990 	}
7991 
7992 	return (DDI_WALK_CONTINUE);
7993 }
7994 
7995 void
7996 pm_restore_direct_levels(void)
7997 {
7998 	/*
7999 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
8000 	 * threads failed) then we don't want to try to restore them
8001 	 */
8002 	if (!pm_processes_stopped)
8003 		return;
8004 
8005 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
8006 	pm_processes_stopped = 0;
8007 }
8008 
8009 static int
8010 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
8011 {
8012 	_NOTE(ARGUNUSED(arg))
8013 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
8014 	int i, nc, result;
8015 	int *ip;
8016 
8017 	pm_info_t *info = PM_GET_PM_INFO(dip);
8018 	if (!info)
8019 		return (DDI_WALK_CONTINUE);
8020 
8021 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8022 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
8023 			ip = &info->pmi_lp[nc - 1];
8024 		} else {
8025 			ip = &info->pmi_levels[nc - 1];
8026 		}
8027 		/*
8028 		 * Because fb drivers fail attempts to turn off the
8029 		 * fb when the monitor is on, but treat a request to
8030 		 * turn on the monitor as a request to turn on the
8031 		 * fb too, we process components in descending order
8032 		 * Because autopm is disabled and processes aren't
8033 		 * running, it is ok to examine current power outside
8034 		 * of the power lock
8035 		 */
8036 		for (i = nc - 1; i >= 0; i--, ip--) {
8037 			if (PM_CURPOWER(dip, i) == *ip)
8038 				continue;
8039 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
8040 			    PM_CANBLOCK_BYPASS, 0, &result) != DDI_SUCCESS) {
8041 				cmn_err(CE_WARN, "cpr: unable "
8042 				    "to restore power level of "
8043 				    "component %d of directly "
8044 				    "power manged device %s@%s"
8045 				    " to %d",
8046 				    i, PM_NAME(dip),
8047 				    PM_ADDR(dip), *ip);
8048 				PMD(PMD_FAIL, ("%s: failed to restore "
8049 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
8050 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
8051 				    PM_CURPOWER(dip, i), *ip, result))
8052 			}
8053 		}
8054 		if (nc > 2) {
8055 			kmem_free(info->pmi_lp, nc * sizeof (int));
8056 			info->pmi_lp = NULL;
8057 		}
8058 	}
8059 	return (DDI_WALK_CONTINUE);
8060 }
8061 
8062 /*
8063  * Stolen from the bootdev module
8064  * attempt to convert a path to a major number
8065  */
8066 static major_t
8067 i_path_to_major(char *path, char *leaf_name)
8068 {
8069 	extern major_t path_to_major(char *pathname);
8070 	major_t maj;
8071 
8072 	if ((maj = path_to_major(path)) == (major_t)-1) {
8073 		maj = ddi_name_to_major(leaf_name);
8074 	}
8075 
8076 	return (maj);
8077 }
8078 
8079 /*
8080  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
8081  * An entry in the list means that the device is detached, so we need to
8082  * adjust its ancestors as if they had just seen this attach, and any detached
8083  * ancestors need to have their list entries adjusted.
8084  */
8085 void
8086 pm_driver_removed(major_t major)
8087 {
8088 	static void i_pm_driver_removed(major_t major);
8089 
8090 	/*
8091 	 * Serialize removal of drivers. This is to keep ancestors of
8092 	 * a node that is being deleted from getting deleted and added back
8093 	 * with different counters.
8094 	 */
8095 	mutex_enter(&pm_remdrv_lock);
8096 	i_pm_driver_removed(major);
8097 	mutex_exit(&pm_remdrv_lock);
8098 }
8099 
8100 /*
8101  * This routine is called recursively by pm_noinvol_process_ancestors()
8102  */
8103 static void
8104 i_pm_driver_removed(major_t major)
8105 {
8106 	PMD_FUNC(pmf, "driver_removed")
8107 	static void adjust_ancestors(char *, int);
8108 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
8109 	static void pm_noinvol_process_ancestors(char *);
8110 	pm_noinvol_t *ip, *pp = NULL;
8111 	int wasvolpmd;
8112 	ASSERT(major != (major_t)-1);
8113 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
8114 again:
8115 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8116 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
8117 		if (major != ip->ni_major)
8118 			continue;
8119 		/*
8120 		 * If it is an ancestor of no-invol node, which is
8121 		 * not removed, skip it. This is to cover the case of
8122 		 * ancestor removed without removing its descendants.
8123 		 */
8124 		if (pm_is_noinvol_ancestor(ip)) {
8125 			ip->ni_flags |= PMC_DRIVER_REMOVED;
8126 			continue;
8127 		}
8128 		wasvolpmd = ip->ni_wasvolpmd;
8129 		/*
8130 		 * remove the entry from the list
8131 		 */
8132 		if (pp) {
8133 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8134 			    pmf, ip->ni_path, pp->ni_path))
8135 			pp->ni_next = ip->ni_next;
8136 		} else {
8137 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8138 			    ip->ni_path))
8139 			ASSERT(pm_noinvol_head == ip);
8140 			pm_noinvol_head = ip->ni_next;
8141 		}
8142 		rw_exit(&pm_noinvol_rwlock);
8143 		adjust_ancestors(ip->ni_path, wasvolpmd);
8144 		/*
8145 		 * Had an ancestor been removed before this node, it would have
8146 		 * been skipped. Adjust the no-invol counters for such skipped
8147 		 * ancestors.
8148 		 */
8149 		pm_noinvol_process_ancestors(ip->ni_path);
8150 		kmem_free(ip->ni_path, ip->ni_size);
8151 		kmem_free(ip, sizeof (*ip));
8152 		goto again;
8153 	}
8154 	rw_exit(&pm_noinvol_rwlock);
8155 }
8156 
8157 /*
8158  * returns 1, if *aip is a ancestor of a no-invol node
8159  *	   0, otherwise
8160  */
8161 static int
8162 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8163 {
8164 	pm_noinvol_t *ip;
8165 
8166 	ASSERT(strlen(aip->ni_path) != 0);
8167 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8168 		if (ip == aip)
8169 			continue;
8170 		/*
8171 		 * To be an ancestor, the path must be an initial substring of
8172 		 * the descendent, and end just before a '/' in the
8173 		 * descendent's path.
8174 		 */
8175 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8176 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8177 			return (1);
8178 	}
8179 	return (0);
8180 }
8181 
8182 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8183 /*
8184  * scan through the pm_noinvolpm list adjusting ancestors of the current
8185  * node;  Modifies string *path.
8186  */
8187 static void
8188 adjust_ancestors(char *path, int wasvolpmd)
8189 {
8190 	PMD_FUNC(pmf, "adjust_ancestors")
8191 	char *cp;
8192 	pm_noinvol_t *lp;
8193 	pm_noinvol_t *pp = NULL;
8194 	major_t locked = (major_t)UINT_MAX;
8195 	dev_info_t *dip;
8196 	char	*pathbuf;
8197 	size_t pathbuflen = strlen(path) + 1;
8198 
8199 	/*
8200 	 * First we look up the ancestor's dip.  If we find it, then we
8201 	 * adjust counts up the tree
8202 	 */
8203 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8204 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8205 	(void) strcpy(pathbuf, path);
8206 	cp = strrchr(pathbuf, '/');
8207 	if (cp == NULL)	{
8208 		/* if no ancestors, then nothing to do */
8209 		kmem_free(pathbuf, pathbuflen);
8210 		return;
8211 	}
8212 	*cp = '\0';
8213 	dip = pm_name_to_dip(pathbuf, 1);
8214 	if (dip != NULL) {
8215 		locked = PM_MAJOR(dip);
8216 
8217 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8218 		    path, dip);
8219 
8220 		if (locked != (major_t)UINT_MAX)
8221 			ddi_release_devi(dip);
8222 	} else {
8223 		char *apath;
8224 		size_t len = strlen(pathbuf) + 1;
8225 		int  lock_held = 1;
8226 
8227 		/*
8228 		 * Now check for ancestors that exist only in the list
8229 		 */
8230 		apath = kmem_alloc(len, KM_SLEEP);
8231 		(void) strcpy(apath, pathbuf);
8232 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8233 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8234 			/*
8235 			 * This can only happen once.  Since we have to drop
8236 			 * the lock, we need to extract the relevant info.
8237 			 */
8238 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8239 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8240 				    lp->ni_path, lp->ni_noinvolpm,
8241 				    lp->ni_noinvolpm - 1))
8242 				lp->ni_noinvolpm--;
8243 				if (wasvolpmd && lp->ni_volpmd) {
8244 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8245 					    "%d\n", pmf, lp->ni_path,
8246 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8247 					lp->ni_volpmd--;
8248 				}
8249 				/*
8250 				 * remove the entry from the list, if there
8251 				 * are no more no-invol descendants and node
8252 				 * itself is not a no-invol node.
8253 				 */
8254 				if (!(lp->ni_noinvolpm ||
8255 				    (lp->ni_flags & PMC_NO_INVOL))) {
8256 					ASSERT(lp->ni_volpmd == 0);
8257 					if (pp) {
8258 						PMD(PMD_NOINVOL, ("%s: freeing "
8259 						    "%s, prev is %s\n", pmf,
8260 						    lp->ni_path, pp->ni_path))
8261 						pp->ni_next = lp->ni_next;
8262 					} else {
8263 						PMD(PMD_NOINVOL, ("%s: free %s "
8264 						    "head\n", pmf, lp->ni_path))
8265 						ASSERT(pm_noinvol_head == lp);
8266 						pm_noinvol_head = lp->ni_next;
8267 					}
8268 					lock_held = 0;
8269 					rw_exit(&pm_noinvol_rwlock);
8270 					adjust_ancestors(apath, wasvolpmd);
8271 					/* restore apath */
8272 					(void) strcpy(apath, pathbuf);
8273 					kmem_free(lp->ni_path, lp->ni_size);
8274 					kmem_free(lp, sizeof (*lp));
8275 				}
8276 				break;
8277 			}
8278 		}
8279 		if (lock_held)
8280 			rw_exit(&pm_noinvol_rwlock);
8281 		adjust_ancestors(apath, wasvolpmd);
8282 		kmem_free(apath, len);
8283 	}
8284 	kmem_free(pathbuf, pathbuflen);
8285 }
8286 
8287 /*
8288  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8289  * which were skipped even though their drivers were removed.
8290  */
8291 static void
8292 pm_noinvol_process_ancestors(char *path)
8293 {
8294 	pm_noinvol_t *lp;
8295 
8296 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8297 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8298 		if (strstr(path, lp->ni_path) &&
8299 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8300 			rw_exit(&pm_noinvol_rwlock);
8301 			i_pm_driver_removed(lp->ni_major);
8302 			return;
8303 		}
8304 	}
8305 	rw_exit(&pm_noinvol_rwlock);
8306 }
8307 
8308 /*
8309  * Returns true if (detached) device needs to be kept up because it exported the
8310  * "no-involuntary-power-cycles" property or we're pretending it did (console
8311  * fb case) or it is an ancestor of such a device and has used up the "one
8312  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8313  * upon detach.  In any event, we need an exact hit on the path or we return
8314  * false.
8315  */
8316 int
8317 pm_noinvol_detached(char *path)
8318 {
8319 	PMD_FUNC(pmf, "noinvol_detached")
8320 	pm_noinvol_t *ip;
8321 	int ret = 0;
8322 
8323 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8324 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8325 		if (strcmp(path, ip->ni_path) == 0) {
8326 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8327 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8328 				    "%s\n", pmf, path))
8329 				ret = 1;
8330 				break;
8331 			}
8332 #ifdef	DEBUG
8333 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8334 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8335 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8336 				    path))
8337 #endif
8338 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8339 			break;
8340 		}
8341 	}
8342 	rw_exit(&pm_noinvol_rwlock);
8343 	return (ret);
8344 }
8345 
8346 int
8347 pm_is_cfb(dev_info_t *dip)
8348 {
8349 	return (dip == cfb_dip);
8350 }
8351 
8352 #ifdef	DEBUG
8353 /*
8354  * Return true if all components of the console frame buffer are at
8355  * "normal" power, i.e., fully on.  For the case where the console is not
8356  * a framebuffer, we also return true
8357  */
8358 int
8359 pm_cfb_is_up(void)
8360 {
8361 	return (pm_cfb_comps_off == 0);
8362 }
8363 #endif
8364 
8365 /*
8366  * Preventing scan from powering down the node by incrementing the
8367  * kidsupcnt.
8368  */
8369 void
8370 pm_hold_power(dev_info_t *dip)
8371 {
8372 	e_pm_hold_rele_power(dip, 1);
8373 }
8374 
8375 /*
8376  * Releasing the hold by decrementing the kidsupcnt allowing scan
8377  * to power down the node if all conditions are met.
8378  */
8379 void
8380 pm_rele_power(dev_info_t *dip)
8381 {
8382 	e_pm_hold_rele_power(dip, -1);
8383 }
8384 
8385 /*
8386  * A wrapper of pm_all_to_normal() to power up a dip
8387  * to its normal level
8388  */
8389 int
8390 pm_powerup(dev_info_t *dip)
8391 {
8392 	PMD_FUNC(pmf, "pm_powerup")
8393 
8394 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8395 	ASSERT(!(servicing_interrupt()));
8396 
8397 	/*
8398 	 * in case this node is not already participating pm
8399 	 */
8400 	if (!PM_GET_PM_INFO(dip)) {
8401 		if (!DEVI_IS_ATTACHING(dip))
8402 			return (DDI_SUCCESS);
8403 		if (pm_start(dip) != DDI_SUCCESS)
8404 			return (DDI_FAILURE);
8405 		if (!PM_GET_PM_INFO(dip))
8406 			return (DDI_SUCCESS);
8407 	}
8408 
8409 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8410 }
8411 
8412 int
8413 pm_rescan_walk(dev_info_t *dip, void *arg)
8414 {
8415 	_NOTE(ARGUNUSED(arg))
8416 
8417 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8418 		return (DDI_WALK_CONTINUE);
8419 
8420 	/*
8421 	 * Currently pm_cpr_callb/resume code is the only caller
8422 	 * and it needs to make sure that stopped scan get
8423 	 * reactivated. Otherwise, rescan walk needn't reactive
8424 	 * stopped scan.
8425 	 */
8426 	pm_scan_init(dip);
8427 
8428 	(void) pm_rescan(dip);
8429 	return (DDI_WALK_CONTINUE);
8430 }
8431 
8432 static dev_info_t *
8433 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8434 {
8435 	dev_info_t *wdip, *pdip;
8436 
8437 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8438 		pdip = ddi_get_parent(wdip);
8439 		if (pdip == dip)
8440 			return (wdip);
8441 	}
8442 	return (NULL);
8443 }
8444 
8445 int
8446 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8447     void *arg, void *result)
8448 {
8449 	PMD_FUNC(pmf, "bp_bus_power")
8450 	dev_info_t	*cdip;
8451 	pm_info_t	*cinfo;
8452 	pm_bp_child_pwrchg_t	*bpc;
8453 	pm_sp_misc_t		*pspm;
8454 	pm_bp_nexus_pwrup_t *bpn;
8455 	pm_bp_child_pwrchg_t new_bpc;
8456 	pm_bp_noinvol_t *bpi;
8457 	dev_info_t *tdip;
8458 	char *pathbuf;
8459 	int		ret = DDI_SUCCESS;
8460 	int		errno = 0;
8461 	pm_component_t *cp;
8462 
8463 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8464 	    pm_decode_op(op)))
8465 	switch (op) {
8466 	case BUS_POWER_CHILD_PWRCHG:
8467 		bpc = (pm_bp_child_pwrchg_t *)arg;
8468 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8469 		tdip = bpc->bpc_dip;
8470 		cdip = pm_get_next_descendent(dip, tdip);
8471 		cinfo = PM_GET_PM_INFO(cdip);
8472 		if (cdip != tdip) {
8473 			/*
8474 			 * If the node is an involved parent, it needs to
8475 			 * power up the node as it is needed.  There is nothing
8476 			 * else the framework can do here.
8477 			 */
8478 			if (PM_WANTS_NOTIFICATION(cdip)) {
8479 				PMD(PMD_SET, ("%s: call bus_power for "
8480 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8481 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8482 				    impl_arg, op, arg, result));
8483 			}
8484 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8485 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8486 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8487 			/*
8488 			 * we presume that the parent needs to be up in
8489 			 * order for the child to change state (either
8490 			 * because it must already be on if the child is on
8491 			 * (and the pm_all_to_normal_nexus() will be a nop)
8492 			 * or because it will need to be on for the child
8493 			 * to come on; so we make the call regardless
8494 			 */
8495 			pm_hold_power(cdip);
8496 			if (cinfo) {
8497 				pm_canblock_t canblock = pspm->pspm_canblock;
8498 				ret = pm_all_to_normal_nexus(cdip, canblock);
8499 				if (ret != DDI_SUCCESS) {
8500 					pm_rele_power(cdip);
8501 					return (ret);
8502 				}
8503 			}
8504 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8505 			    PM_DEVICE(cdip)))
8506 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8507 			    result);
8508 			pm_rele_power(cdip);
8509 		} else {
8510 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8511 			    result);
8512 		}
8513 		return (ret);
8514 
8515 	case BUS_POWER_NEXUS_PWRUP:
8516 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8517 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8518 
8519 		if (!e_pm_valid_info(dip, NULL) ||
8520 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8521 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8522 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8523 			    pmf, PM_DEVICE(dip)))
8524 			*pspm->pspm_errnop = EIO;
8525 			*(int *)result = DDI_FAILURE;
8526 			return (DDI_FAILURE);
8527 		}
8528 
8529 		ASSERT(bpn->bpn_dip == dip);
8530 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8531 		    PM_DEVICE(dip)))
8532 		new_bpc.bpc_dip = dip;
8533 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8534 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8535 		new_bpc.bpc_comp = bpn->bpn_comp;
8536 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8537 		new_bpc.bpc_nlevel = bpn->bpn_level;
8538 		new_bpc.bpc_private = bpn->bpn_private;
8539 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8540 		    PM_LEVEL_UPONLY;
8541 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8542 		    &errno;
8543 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8544 		    (void *)&new_bpc, result);
8545 		kmem_free(pathbuf, MAXPATHLEN);
8546 		return (ret);
8547 
8548 	case BUS_POWER_NOINVOL:
8549 		bpi = (pm_bp_noinvol_t *)arg;
8550 		tdip = bpi->bpni_dip;
8551 		cdip = pm_get_next_descendent(dip, tdip);
8552 
8553 		/* In case of rem_drv, the leaf node has been removed */
8554 		if (cdip == NULL)
8555 			return (DDI_SUCCESS);
8556 
8557 		cinfo = PM_GET_PM_INFO(cdip);
8558 		if (cdip != tdip) {
8559 			if (PM_WANTS_NOTIFICATION(cdip)) {
8560 				PMD(PMD_NOINVOL,
8561 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8562 				    pmf, PM_DEVICE(cdip)))
8563 				ret = (*PM_BUS_POWER_FUNC(cdip))
8564 				    (cdip, NULL, op, arg, result);
8565 				if ((cinfo) && (ret == DDI_SUCCESS))
8566 					(void) pm_noinvol_update_node(cdip,
8567 					    bpi);
8568 				return (ret);
8569 			} else {
8570 				PMD(PMD_NOINVOL,
8571 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8572 				    PM_DEVICE(cdip)))
8573 				ret = pm_busop_bus_power(cdip, NULL, op,
8574 				    arg, result);
8575 				/*
8576 				 * Update the current node.
8577 				 */
8578 				if ((cinfo) && (ret == DDI_SUCCESS))
8579 					(void) pm_noinvol_update_node(cdip,
8580 					    bpi);
8581 				return (ret);
8582 			}
8583 		} else {
8584 			/*
8585 			 * For attach, detach, power up:
8586 			 * Do nothing for leaf node since its
8587 			 * counts are already updated.
8588 			 * For CFB and driver removal, since the
8589 			 * path and the target dip passed in is up to and incl.
8590 			 * the immediate ancestor, need to do the update.
8591 			 */
8592 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8593 			    "reached\n", pmf, PM_DEVICE(cdip)))
8594 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8595 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8596 				(void) pm_noinvol_update_node(cdip, bpi);
8597 			return (DDI_SUCCESS);
8598 		}
8599 
8600 	default:
8601 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8602 		return (DDI_FAILURE);
8603 	}
8604 }
8605 
8606 static int
8607 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8608     void *arg, void *resultp)
8609 {
8610 	_NOTE(ARGUNUSED(impl_arg))
8611 	PMD_FUNC(pmf, "bp_set_power")
8612 	pm_ppm_devlist_t *devl = NULL;
8613 	int clevel, circ;
8614 #ifdef	DEBUG
8615 	int circ_db, ccirc_db;
8616 #endif
8617 	int ret = DDI_SUCCESS;
8618 	dev_info_t *cdip;
8619 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8620 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8621 	pm_canblock_t canblock = pspm->pspm_canblock;
8622 	int scan = pspm->pspm_scan;
8623 	int comp = bpc->bpc_comp;
8624 	int olevel = bpc->bpc_olevel;
8625 	int nlevel = bpc->bpc_nlevel;
8626 	int comps_off_incr = 0;
8627 	dev_info_t *pdip = ddi_get_parent(dip);
8628 	int dodeps;
8629 	int direction = pspm->pspm_direction;
8630 	int *errnop = pspm->pspm_errnop;
8631 #ifdef PMDDEBUG
8632 	char *dir = pm_decode_direction(direction);
8633 #endif
8634 	int *iresp = (int *)resultp;
8635 	time_t	idletime, thresh;
8636 	pm_component_t *cp = PM_CP(dip, comp);
8637 	int work_type;
8638 
8639 	*iresp = DDI_SUCCESS;
8640 	*errnop = 0;
8641 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8642 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8643 	    pm_decode_op(op)))
8644 
8645 	/*
8646 	 * The following set of conditions indicate we are here to handle a
8647 	 * driver's pm_[raise|lower]_power request, but the device is being
8648 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8649 	 * we want to pm_block and pass a status back to the caller based
8650 	 * on whether the controlling process's next activity on the device
8651 	 * matches the current request or not.  This distinction tells
8652 	 * downstream functions to avoid calling into a driver or changing
8653 	 * the framework's power state.  To actually block, we need:
8654 	 *
8655 	 * PM_ISDIRECT(dip)
8656 	 *	no reason to block unless a process is directly controlling dev
8657 	 * direction != PM_LEVEL_EXACT
8658 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8659 	 * !pm_processes_stopped
8660 	 *	don't block if controlling proc already be stopped for cpr
8661 	 * canblock != PM_CANBLOCK_BYPASS
8662 	 *	our caller must not have explicitly prevented blocking
8663 	 */
8664 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8665 		PM_LOCK_DIP(dip);
8666 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8667 			/* releases dip lock */
8668 			ret = pm_busop_match_request(dip, bpc);
8669 			if (ret == EAGAIN) {
8670 				PM_LOCK_DIP(dip);
8671 				continue;
8672 			}
8673 			return (*iresp = ret);
8674 		}
8675 		PM_UNLOCK_DIP(dip);
8676 	}
8677 	/* BC device is never scanned, so power will stick until we are done */
8678 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8679 	    direction != PM_LEVEL_DOWNONLY) {
8680 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8681 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8682 		    canblock, 0, resultp) != DDI_SUCCESS) {
8683 			/* *resultp set by pm_set_power */
8684 			return (DDI_FAILURE);
8685 		}
8686 	}
8687 	if (PM_WANTS_NOTIFICATION(pdip)) {
8688 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8689 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8690 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8691 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8692 		if (ret != DDI_SUCCESS) {
8693 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8694 			    pmf, PM_DEVICE(pdip)))
8695 			return (DDI_FAILURE);
8696 		}
8697 	} else {
8698 		/*
8699 		 * Since we don't know what the actual power level is,
8700 		 * we place a power hold on the parent no matter what
8701 		 * component and level is changing.
8702 		 */
8703 		pm_hold_power(pdip);
8704 	}
8705 	PM_LOCK_POWER(dip, &circ);
8706 	clevel = PM_CURPOWER(dip, comp);
8707 	/*
8708 	 * It's possible that a call was made to pm_update_maxpower()
8709 	 * on another thread before we took the lock above. So, we need to
8710 	 * make sure that this request isn't processed after the
8711 	 * change of power executed on behalf of pm_update_maxpower().
8712 	 */
8713 	if (nlevel > pm_get_normal_power(dip, comp)) {
8714 		PMD(PMD_SET, ("%s: requested level is higher than normal.\n",
8715 		    pmf))
8716 		ret = DDI_FAILURE;
8717 		*iresp = DDI_FAILURE;
8718 		goto post_notify;
8719 	}
8720 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8721 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8722 	    clevel, dir))
8723 	switch (direction) {
8724 	case PM_LEVEL_UPONLY:
8725 		/* Powering up */
8726 		if (clevel >= nlevel) {
8727 			PMD(PMD_SET, ("%s: current level is already "
8728 			    "at or above the requested level.\n", pmf))
8729 			*iresp = DDI_SUCCESS;
8730 			ret = DDI_SUCCESS;
8731 			goto post_notify;
8732 		}
8733 		break;
8734 	case PM_LEVEL_EXACT:
8735 		/* specific level request */
8736 		if (clevel == nlevel && !PM_ISBC(dip)) {
8737 			PMD(PMD_SET, ("%s: current level is already "
8738 			    "at the requested level.\n", pmf))
8739 			*iresp = DDI_SUCCESS;
8740 			ret = DDI_SUCCESS;
8741 			goto post_notify;
8742 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8743 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8744 			if (!pm_cfb_enabled) {
8745 				PMD(PMD_ERROR | PMD_CFB,
8746 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8747 				*errnop = EINVAL;
8748 				*iresp = DDI_FAILURE;
8749 				ret = DDI_FAILURE;
8750 				goto post_notify;
8751 			}
8752 			mutex_enter(&pm_cfb_lock);
8753 			while (cfb_inuse) {
8754 				mutex_exit(&pm_cfb_lock);
8755 				if (delay_sig(1) == EINTR) {
8756 					ret = DDI_FAILURE;
8757 					*iresp = DDI_FAILURE;
8758 					*errnop = EINTR;
8759 					goto post_notify;
8760 				}
8761 				mutex_enter(&pm_cfb_lock);
8762 			}
8763 			mutex_exit(&pm_cfb_lock);
8764 		}
8765 		break;
8766 	case PM_LEVEL_DOWNONLY:
8767 		/* Powering down */
8768 		thresh = cur_threshold(dip, comp);
8769 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8770 		if (scan && ((PM_KUC(dip) != 0) ||
8771 		    (cp->pmc_busycount > 0) ||
8772 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8773 #ifdef	DEBUG
8774 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8775 				PMD(PMD_SET, ("%s: scan failed: "
8776 				    "kidsupcnt != 0\n", pmf))
8777 			if (cp->pmc_busycount > 0)
8778 				PMD(PMD_SET, ("%s: scan failed: "
8779 				    "device become busy\n", pmf))
8780 			if (idletime < thresh)
8781 				PMD(PMD_SET, ("%s: scan failed: device "
8782 				    "hasn't been idle long enough\n", pmf))
8783 #endif
8784 			*iresp = DDI_FAILURE;
8785 			*errnop = EBUSY;
8786 			ret = DDI_FAILURE;
8787 			goto post_notify;
8788 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8789 			PMD(PMD_SET, ("%s: current level is already at "
8790 			    "or below the requested level.\n", pmf))
8791 			*iresp = DDI_SUCCESS;
8792 			ret = DDI_SUCCESS;
8793 			goto post_notify;
8794 		}
8795 		break;
8796 	}
8797 
8798 	if (PM_IS_CFB(dip) && (comps_off_incr =
8799 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8800 		/*
8801 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8802 		 * component from full power.  Remember that we tried to
8803 		 * lower power in case it fails and we need to back out
8804 		 * the adjustment.
8805 		 */
8806 		update_comps_off(comps_off_incr, dip);
8807 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8808 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8809 		    pm_cfb_comps_off))
8810 	}
8811 
8812 	if ((*iresp = power_dev(dip,
8813 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8814 #ifdef DEBUG
8815 		/*
8816 		 * All descendents of this node should already be powered off.
8817 		 */
8818 		if (PM_CURPOWER(dip, comp) == 0) {
8819 			pm_desc_pwrchk_t pdpchk;
8820 			pdpchk.pdpc_dip = dip;
8821 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8822 			ndi_devi_enter(dip, &circ_db);
8823 			for (cdip = ddi_get_child(dip); cdip != NULL;
8824 			    cdip = ddi_get_next_sibling(cdip)) {
8825 				ndi_devi_enter(cdip, &ccirc_db);
8826 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8827 				    (void *)&pdpchk);
8828 				ndi_devi_exit(cdip, ccirc_db);
8829 			}
8830 			ndi_devi_exit(dip, circ_db);
8831 		}
8832 #endif
8833 		/*
8834 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8835 		 * back up to full power.
8836 		 */
8837 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8838 			update_comps_off(comps_off_incr, dip);
8839 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8840 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8841 			    comp, clevel, nlevel, pm_cfb_comps_off))
8842 		}
8843 		dodeps = 0;
8844 		if (POWERING_OFF(clevel, nlevel)) {
8845 			if (PM_ISBC(dip)) {
8846 				dodeps = (comp == 0);
8847 			} else {
8848 				int i;
8849 				dodeps = 1;
8850 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8851 					/* if some component still on */
8852 					if (PM_CURPOWER(dip, i)) {
8853 						dodeps = 0;
8854 						break;
8855 					}
8856 				}
8857 			}
8858 			if (dodeps)
8859 				work_type = PM_DEP_WK_POWER_OFF;
8860 		} else if (POWERING_ON(clevel, nlevel)) {
8861 			if (PM_ISBC(dip)) {
8862 				dodeps = (comp == 0);
8863 			} else {
8864 				int i;
8865 				dodeps = 1;
8866 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8867 					if (i == comp)
8868 						continue;
8869 					if (PM_CURPOWER(dip, i) > 0) {
8870 						dodeps = 0;
8871 						break;
8872 					}
8873 				}
8874 			}
8875 			if (dodeps)
8876 				work_type = PM_DEP_WK_POWER_ON;
8877 		}
8878 
8879 		if (dodeps) {
8880 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8881 
8882 			(void) ddi_pathname(dip, pathbuf);
8883 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8884 			    PM_DEP_NOWAIT, NULL, 0);
8885 			kmem_free(pathbuf, MAXPATHLEN);
8886 		}
8887 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8888 			int old;
8889 
8890 			/* If old power cached during deadlock, use it. */
8891 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8892 			    cp->pmc_phc_pwr : olevel);
8893 			mutex_enter(&pm_rsvp_lock);
8894 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8895 			    old, canblock);
8896 			pm_enqueue_notify_others(&devl, canblock);
8897 			mutex_exit(&pm_rsvp_lock);
8898 		} else {
8899 			pm_ppm_devlist_t *p;
8900 			pm_ppm_devlist_t *next;
8901 			for (p = devl; p != NULL; p = next) {
8902 				next = p->ppd_next;
8903 				kmem_free(p, sizeof (pm_ppm_devlist_t));
8904 			}
8905 			devl = NULL;
8906 		}
8907 
8908 		/*
8909 		 * If we are coming from a scan, don't do it again,
8910 		 * else we can have infinite loops.
8911 		 */
8912 		if (!scan)
8913 			pm_rescan(dip);
8914 	} else {
8915 		/* if we incremented pm_comps_off_count, but failed */
8916 		if (comps_off_incr > 0) {
8917 			update_comps_off(-comps_off_incr, dip);
8918 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8919 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8920 			    comp, clevel, nlevel, pm_cfb_comps_off))
8921 		}
8922 		*errnop = EIO;
8923 	}
8924 
8925 post_notify:
8926 	/*
8927 	 * This thread may have been in deadlock with pm_power_has_changed.
8928 	 * Before releasing power lock, clear the flag which marks this
8929 	 * condition.
8930 	 */
8931 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8932 
8933 	/*
8934 	 * Update the old power level in the bus power structure with the
8935 	 * actual power level before the transition was made to the new level.
8936 	 * Some involved parents depend on this information to keep track of
8937 	 * their children's power transition.
8938 	 */
8939 	if (*iresp != DDI_FAILURE)
8940 		bpc->bpc_olevel = clevel;
8941 
8942 	if (PM_WANTS_NOTIFICATION(pdip)) {
8943 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8944 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8945 		PM_UNLOCK_POWER(dip, circ);
8946 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8947 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8948 		    PM_DEVICE(dip), ret))
8949 	} else {
8950 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8951 		PM_UNLOCK_POWER(dip, circ);
8952 		/*
8953 		 * Now that we know what power transition has occurred
8954 		 * (if any), release the power hold.  Leave the hold
8955 		 * in effect in the case of OFF->ON transition.
8956 		 */
8957 		if (!(clevel == 0 && nlevel > 0 &&
8958 		    (!PM_ISBC(dip) || comp == 0)))
8959 			pm_rele_power(pdip);
8960 		/*
8961 		 * If the power transition was an ON->OFF transition,
8962 		 * remove the power hold from the parent.
8963 		 */
8964 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8965 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8966 			pm_rele_power(pdip);
8967 	}
8968 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8969 		return (DDI_FAILURE);
8970 	else
8971 		return (DDI_SUCCESS);
8972 }
8973 
8974 /*
8975  * If an app (SunVTS or Xsun) has taken control, then block until it
8976  * gives it up or makes the requested power level change, unless
8977  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8978  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8979  */
8980 static int
8981 pm_busop_match_request(dev_info_t *dip, void *arg)
8982 {
8983 	PMD_FUNC(pmf, "bp_match_request")
8984 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8985 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8986 	int comp = bpc->bpc_comp;
8987 	int nlevel = bpc->bpc_nlevel;
8988 	pm_canblock_t canblock = pspm->pspm_canblock;
8989 	int direction = pspm->pspm_direction;
8990 	int clevel, circ;
8991 
8992 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8993 	PM_LOCK_POWER(dip, &circ);
8994 	clevel = PM_CURPOWER(dip, comp);
8995 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
8996 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
8997 	if (direction == PM_LEVEL_UPONLY) {
8998 		if (clevel >= nlevel) {
8999 			PM_UNLOCK_POWER(dip, circ);
9000 			PM_UNLOCK_DIP(dip);
9001 			return (DDI_SUCCESS);
9002 		}
9003 	} else if (clevel == nlevel) {
9004 		PM_UNLOCK_POWER(dip, circ);
9005 		PM_UNLOCK_DIP(dip);
9006 		return (DDI_SUCCESS);
9007 	}
9008 	if (canblock == PM_CANBLOCK_FAIL) {
9009 		PM_UNLOCK_POWER(dip, circ);
9010 		PM_UNLOCK_DIP(dip);
9011 		return (DDI_FAILURE);
9012 	}
9013 	if (canblock == PM_CANBLOCK_BLOCK) {
9014 		/*
9015 		 * To avoid a deadlock, we must not hold the
9016 		 * power lock when we pm_block.
9017 		 */
9018 		PM_UNLOCK_POWER(dip, circ);
9019 		PMD(PMD_SET, ("%s: blocking\n", pmf))
9020 		/* pm_block releases dip lock */
9021 		switch (pm_block(dip, comp, nlevel, clevel)) {
9022 		case PMP_RELEASE:
9023 			return (EAGAIN);
9024 		case PMP_SUCCEED:
9025 			return (DDI_SUCCESS);
9026 		case PMP_FAIL:
9027 			return (DDI_FAILURE);
9028 		}
9029 	} else {
9030 		ASSERT(0);
9031 	}
9032 	_NOTE(NOTREACHED);
9033 	return (DDI_FAILURE);	/* keep gcc happy */
9034 }
9035 
9036 static int
9037 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
9038 {
9039 	PMD_FUNC(pmf, "all_to_normal_nexus")
9040 	int		*normal;
9041 	int		i, ncomps;
9042 	size_t		size;
9043 	int		changefailed = 0;
9044 	int		ret, result = DDI_SUCCESS;
9045 	pm_bp_nexus_pwrup_t	bpn;
9046 	pm_sp_misc_t	pspm;
9047 
9048 	ASSERT(PM_GET_PM_INFO(dip));
9049 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9050 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
9051 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
9052 		return (DDI_FAILURE);
9053 	}
9054 	ncomps = PM_NUMCMPTS(dip);
9055 	for (i = 0; i < ncomps; i++) {
9056 		bpn.bpn_dip = dip;
9057 		bpn.bpn_comp = i;
9058 		bpn.bpn_level = normal[i];
9059 		pspm.pspm_canblock = canblock;
9060 		pspm.pspm_scan = 0;
9061 		bpn.bpn_private = &pspm;
9062 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
9063 		    (void *)&bpn, (void *)&result);
9064 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
9065 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
9066 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
9067 			    i, normal[i], result))
9068 			changefailed++;
9069 		}
9070 	}
9071 	kmem_free(normal, size);
9072 	if (changefailed) {
9073 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
9074 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
9075 		return (DDI_FAILURE);
9076 	}
9077 	return (DDI_SUCCESS);
9078 }
9079 
9080 int
9081 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
9082     dev_info_t *tdip)
9083 {
9084 	PMD_FUNC(pmf, "noinvol_update")
9085 	pm_bp_noinvol_t args;
9086 	int ret;
9087 	int result = DDI_SUCCESS;
9088 
9089 	args.bpni_path = path;
9090 	args.bpni_dip = tdip;
9091 	args.bpni_cmd = subcmd;
9092 	args.bpni_wasvolpmd = wasvolpmd;
9093 	args.bpni_volpmd = volpmd;
9094 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
9095 	    "volpmd %d wasvolpmd %d\n", pmf,
9096 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
9097 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
9098 	    &args, &result);
9099 	return (ret);
9100 }
9101 
9102 void
9103 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
9104 {
9105 	PMD_FUNC(pmf, "noinvol_update_node")
9106 
9107 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9108 	switch (req->bpni_cmd) {
9109 	case PM_BP_NOINVOL_ATTACH:
9110 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
9111 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9112 		    DEVI(dip)->devi_pm_noinvolpm,
9113 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9114 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9115 		PM_LOCK_DIP(dip);
9116 		DEVI(dip)->devi_pm_noinvolpm--;
9117 		if (req->bpni_wasvolpmd) {
9118 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
9119 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9120 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9121 			    DEVI(dip)->devi_pm_volpmd - 1))
9122 			if (DEVI(dip)->devi_pm_volpmd)
9123 				DEVI(dip)->devi_pm_volpmd--;
9124 		}
9125 		PM_UNLOCK_DIP(dip);
9126 		break;
9127 
9128 	case PM_BP_NOINVOL_DETACH:
9129 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
9130 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
9131 		    DEVI(dip)->devi_pm_noinvolpm,
9132 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9133 		PM_LOCK_DIP(dip);
9134 		DEVI(dip)->devi_pm_noinvolpm++;
9135 		if (req->bpni_wasvolpmd) {
9136 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
9137 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9138 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9139 			    DEVI(dip)->devi_pm_volpmd + 1))
9140 			DEVI(dip)->devi_pm_volpmd++;
9141 		}
9142 		PM_UNLOCK_DIP(dip);
9143 		break;
9144 
9145 	case PM_BP_NOINVOL_REMDRV:
9146 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9147 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9148 		    DEVI(dip)->devi_pm_noinvolpm,
9149 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9150 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9151 		PM_LOCK_DIP(dip);
9152 		DEVI(dip)->devi_pm_noinvolpm--;
9153 		if (req->bpni_wasvolpmd) {
9154 			PMD(PMD_NOINVOL,
9155 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9156 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9157 			    DEVI(dip)->devi_pm_volpmd,
9158 			    DEVI(dip)->devi_pm_volpmd - 1))
9159 			/*
9160 			 * A power up could come in between and
9161 			 * clear the volpmd, if that's the case,
9162 			 * volpmd would be clear.
9163 			 */
9164 			if (DEVI(dip)->devi_pm_volpmd)
9165 				DEVI(dip)->devi_pm_volpmd--;
9166 		}
9167 		PM_UNLOCK_DIP(dip);
9168 		break;
9169 
9170 	case PM_BP_NOINVOL_CFB:
9171 		PMD(PMD_NOINVOL,
9172 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9173 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9174 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9175 		PM_LOCK_DIP(dip);
9176 		DEVI(dip)->devi_pm_noinvolpm++;
9177 		PM_UNLOCK_DIP(dip);
9178 		break;
9179 
9180 	case PM_BP_NOINVOL_POWER:
9181 		PMD(PMD_NOINVOL,
9182 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9183 		    pmf, PM_DEVICE(dip),
9184 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9185 		    req->bpni_volpmd))
9186 		PM_LOCK_DIP(dip);
9187 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9188 		PM_UNLOCK_DIP(dip);
9189 		break;
9190 
9191 	default:
9192 		break;
9193 	}
9194 
9195 }
9196 
9197 #ifdef DEBUG
9198 static int
9199 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9200 {
9201 	PMD_FUNC(pmf, "desc_pwrchk")
9202 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9203 	pm_info_t *info = PM_GET_PM_INFO(dip);
9204 	int i;
9205 	/* LINTED */
9206 	int curpwr, ce_level;
9207 
9208 	if (!info)
9209 		return (DDI_WALK_CONTINUE);
9210 
9211 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9212 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9213 		/* LINTED */
9214 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9215 			continue;
9216 		/* E_FUNC_SET_NOT_USED */
9217 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9218 		    CE_WARN;
9219 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9220 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9221 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9222 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9223 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9224 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9225 	}
9226 	return (DDI_WALK_CONTINUE);
9227 }
9228 #endif
9229 
9230 /*
9231  * Record the fact that one thread is borrowing the lock on a device node.
9232  * Use is restricted to the case where the lending thread will block until
9233  * the borrowing thread (always curthread) completes.
9234  */
9235 void
9236 pm_borrow_lock(kthread_t *lender)
9237 {
9238 	lock_loan_t *prev = &lock_loan_head;
9239 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9240 
9241 	cur->pmlk_borrower = curthread;
9242 	cur->pmlk_lender = lender;
9243 	mutex_enter(&pm_loan_lock);
9244 	cur->pmlk_next = prev->pmlk_next;
9245 	prev->pmlk_next = cur;
9246 	mutex_exit(&pm_loan_lock);
9247 }
9248 
9249 /*
9250  * Return the borrowed lock.  A thread can borrow only one.
9251  */
9252 void
9253 pm_return_lock(void)
9254 {
9255 	lock_loan_t *cur;
9256 	lock_loan_t *prev = &lock_loan_head;
9257 
9258 	mutex_enter(&pm_loan_lock);
9259 	ASSERT(prev->pmlk_next != NULL);
9260 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9261 		if (cur->pmlk_borrower == curthread)
9262 			break;
9263 
9264 	ASSERT(cur != NULL);
9265 	prev->pmlk_next = cur->pmlk_next;
9266 	mutex_exit(&pm_loan_lock);
9267 	kmem_free(cur, sizeof (*cur));
9268 }
9269 
9270 #if defined(__x86)
9271 
9272 #define	CPR_RXR	0x1
9273 #define	CPR_TXR	0x20
9274 #define	CPR_DATAREG	0x3f8
9275 #define	CPR_LSTAT	0x3fd
9276 #define	CPR_INTRCTL	0x3f9
9277 
9278 char
9279 pm_getchar(void)
9280 {
9281 	while ((inb(CPR_LSTAT) & CPR_RXR) != CPR_RXR)
9282 		drv_usecwait(10);
9283 
9284 	return (inb(CPR_DATAREG));
9285 
9286 }
9287 
9288 void
9289 pm_putchar(char c)
9290 {
9291 	while ((inb(CPR_LSTAT) & CPR_TXR) == 0)
9292 		drv_usecwait(10);
9293 
9294 	outb(CPR_DATAREG, c);
9295 }
9296 
9297 void
9298 pm_printf(char *s)
9299 {
9300 	while (*s) {
9301 		pm_putchar(*s++);
9302 	}
9303 }
9304 
9305 #endif
9306 
9307 int
9308 pm_ppm_searchlist(pm_searchargs_t *sp)
9309 {
9310 	power_req_t power_req;
9311 	int result = 0;
9312 	/* LINTED */
9313 	int ret;
9314 
9315 	power_req.request_type = PMR_PPM_SEARCH_LIST;
9316 	power_req.req.ppm_search_list_req.searchlist = sp;
9317 	ASSERT(DEVI(ddi_root_node())->devi_pm_ppm);
9318 	ret = pm_ctlops((dev_info_t *)DEVI(ddi_root_node())->devi_pm_ppm,
9319 	    ddi_root_node(), DDI_CTLOPS_POWER, &power_req, &result);
9320 	PMD(PMD_SX, ("pm_ppm_searchlist returns %d, result %d\n",
9321 	    ret, result))
9322 	return (result);
9323 }
9324