xref: /titanic_41/usr/src/uts/common/os/sunpm.c (revision 66ea84940ca8687745ad2a165ef9bf49ec13996f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * sunpm.c builds sunpm.o	"power management framework"
28  *	kernel-resident power management code.  Implements power management
29  *	policy
30  *	Assumes: all backwards compat. device components wake up on &
31  *		 the pm_info pointer in dev_info is initially NULL
32  *
33  * PM - (device) Power Management
34  *
35  * Each device may have 0 or more components.  If a device has no components,
36  * then it can't be power managed.  Each component has 2 or more
37  * power states.
38  *
39  * "Backwards Compatible" (bc) devices:
40  * There are two different types of devices from the point of view of this
41  * code.  The original type, left over from the original PM implementation on
42  * the voyager platform are known in this code as "backwards compatible"
43  * devices (PM_ISBC(dip) returns true).
44  * They are recognized by the pm code by the lack of a pm-components property
45  * and a call made by the driver to pm_create_components(9F).
46  * For these devices, component 0 is special, and represents the power state
47  * of the device.  If component 0 is to be set to power level 0 (off), then
48  * the framework must first call into the driver's detach(9E) routine with
49  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
50  * After setting component 0 from 0 to a non-zero power level, a call must be
51  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
52  *
53  * Currently, the only way to get a bc device power managed is via a set of
54  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
55  *
56  * For non-bc devices, the driver describes the components by exporting a
57  * pm-components(9P) property that tells how many components there are,
58  * tells what each component's power state values are, and provides human
59  * readable strings (currently unused) for each component name and power state.
60  * Devices which export pm-components(9P) are automatically power managed
61  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
62  * after parsing power.conf(4)). The exception to this rule is that power
63  * manageable CPU devices may be automatically managed independently of autopm
64  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
65  * ioctls) cpupm. If the CPU devices are not managed independently, then they
66  * are managed by autopm. In either case, for automatically power managed
67  * devices, all components are considered independent of each other, and it is
68  * up to the driver to decide when a transition requires saving or restoring
69  * hardware state.
70  *
71  * Each device component also has a threshold time associated with each power
72  * transition (see power.conf(4)), and a busy/idle state maintained by the
73  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
74  * Components are created idle.
75  *
76  * The PM framework provides several functions:
77  * -implement PM policy as described in power.conf(4)
78  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
79  *  Policies consist of:
80  *    -set threshold values (defaults if none provided by pmconfig)
81  *    -set dependencies among devices
82  *    -enable/disable autopm
83  *    -enable/disable cpupm
84  *    -turn down idle components based on thresholds (if autopm or cpupm is
85  *     enabled) (aka scanning)
86  *    -maintain power states based on dependencies among devices
87  *    -upon request, or when the frame buffer powers off, attempt to turn off
88  *     all components that are idle or become idle over the next (10 sec)
89  *     period in an attempt to get down to an EnergyStar compliant state
90  *    -prevent powering off of a device which exported the
91  *     pm-no-involuntary-power-cycles property without active involvement of
92  *     the device's driver (so no removing power when the device driver is
93  *     not attached)
94  * -provide a mechanism for a device driver to request that a device's component
95  *  be brought back to the power level necessary for the use of the device
96  * -allow a process to directly control the power levels of device components
97  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
98  * -ensure that the console frame buffer is powered up before being referenced
99  *  via prom_printf() or other prom calls that might generate console output
100  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
101  * -provide "backwards compatible" behavior for devices without pm-components
102  *  property
103  *
104  * Scanning:
105  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
106  * component of each managed device to its lowest power based on the threshold
107  * of idleness associated with each transition and the busy/idle state of the
108  * component.
109  *
110  * The actual work of this is done by pm_scan_dev(), which cycles through each
111  * component of a device, checking its idleness against its current threshold,
112  * and calling pm_set_power() as appropriate to change the power level.
113  * This function also indicates when it would next be profitable to scan the
114  * device again, and a new scan is scheduled after that time.
115  *
116  * Dependencies:
117  * It is possible to establish a dependency between the power states of two
118  * otherwise unrelated devices.  This is currently done to ensure that the
119  * cdrom is always up whenever the console framebuffer is up, so that the user
120  * can insert a cdrom and see a popup as a result.
121  *
122  * The dependency terminology used in power.conf(4) is not easy to understand,
123  * so we've adopted a different terminology in the implementation.  We write
124  * of a "keeps up" and a "kept up" device.  A relationship can be established
125  * where one device keeps up another.  That means that if the keepsup device
126  * has any component that is at a non-zero power level, all components of the
127  * "kept up" device must be brought to full power.  This relationship is
128  * asynchronous.  When the keeping device is powered up, a request is queued
129  * to a worker thread to bring up the kept device.  The caller does not wait.
130  * Scan will not turn down a kept up device.
131  *
132  * Direct PM:
133  * A device may be directly power managed by a process.  If a device is
134  * directly pm'd, then it will not be scanned, and dependencies will not be
135  * enforced.  * If a directly pm'd device's driver requests a power change (via
136  * pm_raise_power(9F)), then the request is blocked and notification is sent
137  * to the controlling process, which must issue the requested power change for
138  * the driver to proceed.
139  *
140  */
141 
142 #include <sys/types.h>
143 #include <sys/errno.h>
144 #include <sys/callb.h>		/* callback registration during CPR */
145 #include <sys/conf.h>		/* driver flags and functions */
146 #include <sys/open.h>		/* OTYP_CHR definition */
147 #include <sys/stat.h>		/* S_IFCHR definition */
148 #include <sys/pathname.h>	/* name -> dev_info xlation */
149 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
150 #include <sys/kmem.h>		/* memory alloc stuff */
151 #include <sys/debug.h>
152 #include <sys/archsystm.h>
153 #include <sys/pm.h>
154 #include <sys/ddi.h>
155 #include <sys/sunddi.h>
156 #include <sys/sunndi.h>
157 #include <sys/sunpm.h>
158 #include <sys/epm.h>
159 #include <sys/vfs.h>
160 #include <sys/mode.h>
161 #include <sys/mkdev.h>
162 #include <sys/promif.h>
163 #include <sys/consdev.h>
164 #include <sys/esunddi.h>
165 #include <sys/modctl.h>
166 #include <sys/fs/ufs_fs.h>
167 #include <sys/note.h>
168 #include <sys/taskq.h>
169 #include <sys/bootconf.h>
170 #include <sys/reboot.h>
171 #include <sys/spl.h>
172 #include <sys/disp.h>
173 #include <sys/sobject.h>
174 #include <sys/sunmdi.h>
175 #include <sys/systm.h>
176 #include <sys/cpuvar.h>
177 #include <sys/cyclic.h>
178 #include <sys/uadmin.h>
179 #include <sys/srn.h>
180 
181 
182 /*
183  * PM LOCKING
184  *	The list of locks:
185  * Global pm mutex locks.
186  *
187  * pm_scan_lock:
188  *		It protects the timeout id of the scan thread, and the value
189  *		of autopm_enabled and cpupm.  This lock is not held
190  *		concurrently with any other PM locks.
191  *
192  * pm_clone_lock:	Protects the clone list and count of poll events
193  *		pending for the pm driver.
194  *		Lock ordering:
195  *			pm_clone_lock -> pm_pscc_interest_rwlock,
196  *			pm_clone_lock -> pm_pscc_direct_rwlock.
197  *
198  * pm_rsvp_lock:
199  *		Used to synchronize the data structures used for processes
200  *		to rendezvous with state change information when doing
201  *		direct PM.
202  *		Lock ordering:
203  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
204  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
205  *			pm_rsvp_lock -> pm_clone_lock.
206  *
207  * ppm_lock:	protects the list of registered ppm drivers
208  *		Lock ordering:
209  *			ppm_lock -> ppm driver unit_lock
210  *
211  * pm_compcnt_lock:
212  *		Protects count of components that are not at their lowest
213  *		power level.
214  *		Lock ordering:
215  *			pm_compcnt_lock -> ppm_lock.
216  *
217  * pm_dep_thread_lock:
218  *		Protects work list for pm_dep_thread.  Not taken concurrently
219  *		with any other pm lock.
220  *
221  * pm_remdrv_lock:
222  *		Serializes the operation of removing noinvol data structure
223  *		entries for a branch of the tree when a driver has been
224  *		removed from the system (modctl_rem_major).
225  *		Lock ordering:
226  *			pm_remdrv_lock -> pm_noinvol_rwlock.
227  *
228  * pm_cfb_lock: (High level spin lock)
229  *		Protects the count of how many components of the console
230  *		frame buffer are off (so we know if we have to bring up the
231  *		console as a result of a prom_printf, etc.
232  *		No other locks are taken while holding this lock.
233  *
234  * pm_loan_lock:
235  *		Protects the lock_loan list.  List is used to record that one
236  *		thread has acquired a power lock but has launched another thread
237  *		to complete its processing.  An entry in the list indicates that
238  *		the worker thread can borrow the lock held by the other thread,
239  *		which must block on the completion of the worker.  Use is
240  *		specific to module loading.
241  *		No other locks are taken while holding this lock.
242  *
243  * Global PM rwlocks
244  *
245  * pm_thresh_rwlock:
246  *		Protects the list of thresholds recorded for future use (when
247  *		devices attach).
248  *		Lock ordering:
249  *			pm_thresh_rwlock -> devi_pm_lock
250  *
251  * pm_noinvol_rwlock:
252  *		Protects list of detached nodes that had noinvol registered.
253  *		No other PM locks are taken while holding pm_noinvol_rwlock.
254  *
255  * pm_pscc_direct_rwlock:
256  *		Protects the list that maps devices being directly power
257  *		managed to the processes that manage them.
258  *		Lock ordering:
259  *			pm_pscc_direct_rwlock -> psce_lock
260  *
261  * pm_pscc_interest_rwlock;
262  *		Protects the list that maps state change events to processes
263  *		that want to know about them.
264  *		Lock ordering:
265  *			pm_pscc_interest_rwlock -> psce_lock
266  *
267  * per-dip locks:
268  *
269  * Each node has these per-dip locks, which are only used if the device is
270  * a candidate for power management (e.g. has pm components)
271  *
272  * devi_pm_lock:
273  *		Protects all power management state of the node except for
274  *		power level, which is protected by ndi_devi_enter().
275  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
276  *		Lock ordering:
277  *			devi_pm_lock -> pm_rsvp_lock,
278  *			devi_pm_lock -> pm_dep_thread_lock,
279  *			devi_pm_lock -> pm_noinvol_rwlock,
280  *			devi_pm_lock -> power lock
281  *
282  * power lock (ndi_devi_enter()):
283  *		Since changing power level is possibly a slow operation (30
284  *		seconds to spin up a disk drive), this is locked separately.
285  *		Since a call into the driver to change the power level of one
286  *		component may result in a call back into the framework to change
287  *		the power level of another, this lock allows re-entrancy by
288  *		the same thread (ndi_devi_enter is used for this because
289  *		the USB framework uses ndi_devi_enter in its power entry point,
290  *		and use of any other lock would produce a deadlock.
291  *
292  * devi_pm_busy_lock:
293  *		This lock protects the integrity of the busy count.  It is
294  *		only taken by pm_busy_component() and pm_idle_component and
295  *		some code that adjust the busy time after the timer gets set
296  *		up or after a CPR operation.  It is per-dip to keep from
297  *		single-threading all the disk drivers on a system.
298  *		It could be per component instead, but most devices have
299  *		only one component.
300  *		No other PM locks are taken while holding this lock.
301  *
302  */
303 
304 static int stdout_is_framebuffer;
305 static kmutex_t	e_pm_power_lock;
306 static kmutex_t pm_loan_lock;
307 kmutex_t	pm_scan_lock;
308 callb_id_t	pm_cpr_cb_id;
309 callb_id_t	pm_panic_cb_id;
310 callb_id_t	pm_halt_cb_id;
311 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
312 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
313 
314 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
315 clock_t pm_default_min_scan = PM_DEFAULT_MIN_SCAN;
316 clock_t pm_cpu_min_scan = PM_CPU_MIN_SCAN;
317 
318 #define	PM_MIN_SCAN(dip)	(PM_ISCPU(dip) ? pm_cpu_min_scan : \
319 				    pm_default_min_scan)
320 
321 static int pm_busop_set_power(dev_info_t *,
322     void *, pm_bus_power_op_t, void *, void *);
323 static int pm_busop_match_request(dev_info_t *, void *);
324 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
325 static void e_pm_set_max_power(dev_info_t *, int, int);
326 static int e_pm_get_max_power(dev_info_t *, int);
327 
328 /*
329  * Dependency Processing is done thru a seperate thread.
330  */
331 kmutex_t	pm_dep_thread_lock;
332 kcondvar_t	pm_dep_thread_cv;
333 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
334 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
335 
336 /*
337  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
338  * power managing things in single user mode that have been suppressed via
339  * power.conf entries.  Protected by pm_scan_lock.
340  */
341 int		autopm_enabled;
342 
343 /*
344  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
345  * to define the power management behavior of CPU devices separate from
346  * autopm. Protected by pm_scan_lock.
347  */
348 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
349 
350 /*
351  * AutoS3 depends on autopm being enabled, and must be enabled by
352  * PM_START_AUTOS3 command.
353  */
354 int		autoS3_enabled;
355 
356 #if !defined(__sparc)
357 /*
358  * on sparc these live in fillsysinfo.c
359  *
360  * If this variable is non-zero, cpr should return "not supported" when
361  * it is queried even though it would normally be supported on this platform.
362  */
363 int cpr_supported_override;
364 
365 /*
366  * Some platforms may need to support CPR even in the absence of
367  * having the correct platform id information.  If this
368  * variable is non-zero, cpr should proceed even in the absence
369  * of otherwise being qualified.
370  */
371 int cpr_platform_enable = 0;
372 
373 #endif
374 
375 /*
376  * pm_S3_enabled indicates that we believe the platform can support S3,
377  * which we get from pmconfig(1M)
378  */
379 int		pm_S3_enabled;
380 
381 /*
382  * This flag is true while processes are stopped for a checkpoint/resume.
383  * Controlling processes of direct pm'd devices are not available to
384  * participate in power level changes, so we bypass them when this is set.
385  */
386 static int	pm_processes_stopped;
387 
388 #ifdef	DEBUG
389 
390 /*
391  * see common/sys/epm.h for PMD_* values
392  */
393 
394 uint_t		pm_debug = 0;
395 
396 /*
397  * If pm_divertdebug is set, then no prom_printf calls will be made by
398  * PMD(), which will prevent debug output from bringing up the console
399  * frame buffer.  Clearing this variable before setting pm_debug will result
400  * in PMD output going to the console.
401  *
402  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
403  * deadlocks and decremented at the end of pm_set_power()
404  */
405 uint_t		pm_divertdebug = 1;
406 volatile uint_t pm_debug_to_console = 0;
407 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
408 
409 void prdeps(char *);
410 #endif
411 
412 /* Globals */
413 
414 /*
415  * List of recorded thresholds and dependencies
416  */
417 pm_thresh_rec_t *pm_thresh_head;
418 krwlock_t pm_thresh_rwlock;
419 
420 pm_pdr_t *pm_dep_head;
421 static int pm_unresolved_deps = 0;
422 static int pm_prop_deps = 0;
423 
424 /*
425  * List of devices that exported no-involuntary-power-cycles property
426  */
427 pm_noinvol_t *pm_noinvol_head;
428 
429 /*
430  * Locks used in noinvol processing
431  */
432 krwlock_t pm_noinvol_rwlock;
433 kmutex_t pm_remdrv_lock;
434 
435 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
436 int pm_system_idle_threshold;
437 int pm_cpu_idle_threshold;
438 
439 /*
440  * By default nexus has 0 threshold, and depends on its children to keep it up
441  */
442 int pm_default_nexus_threshold = 0;
443 
444 /*
445  * Data structures shared with common/io/pm.c
446  */
447 kmutex_t	pm_clone_lock;
448 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
449 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
450 unsigned char	pm_interest[PM_MAX_CLONE];
451 struct pollhead	pm_pollhead;
452 
453 /*
454  * Data structures shared with common/io/srn.c
455  */
456 kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
457 void (*srn_signal)(int type, int event);
458 int srn_inuse;				/* stop srn detach */
459 
460 extern int	hz;
461 extern char	*platform_module_list[];
462 
463 /*
464  * Wrappers for use in ddi_walk_devs
465  */
466 
467 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
468 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
469 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
470 static int		pm_discard_dep_walk(dev_info_t *, void *);
471 #ifdef DEBUG
472 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
473 #endif
474 
475 /*
476  * Routines for managing noinvol devices
477  */
478 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
479 void			pm_noinvol_update_node(dev_info_t *,
480 			    pm_bp_noinvol_t *req);
481 
482 kmutex_t pm_rsvp_lock;
483 kmutex_t pm_compcnt_lock;
484 krwlock_t pm_pscc_direct_rwlock;
485 krwlock_t pm_pscc_interest_rwlock;
486 
487 #define	PSC_INTEREST	0	/* belongs to interest psc list */
488 #define	PSC_DIRECT	1	/* belongs to direct psc list */
489 
490 pscc_t *pm_pscc_interest;
491 pscc_t *pm_pscc_direct;
492 
493 #define	PM_MAJOR(dip) ddi_driver_major(dip)
494 #define	PM_IS_NEXUS(dip) ((PM_MAJOR(dip) == DDI_MAJOR_T_NONE) ? 0 : \
495 	NEXUS_DRV(devopsp[PM_MAJOR(dip)]))
496 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
497 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
498 
499 #define	PM_INCR_NOTLOWEST(dip) {					\
500 	mutex_enter(&pm_compcnt_lock);					\
501 	if (!PM_IS_NEXUS(dip) ||					\
502 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
503 		if (pm_comps_notlowest == 0)				\
504 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
505 		pm_comps_notlowest++;					\
506 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
507 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
508 	}								\
509 	mutex_exit(&pm_compcnt_lock);					\
510 }
511 #define	PM_DECR_NOTLOWEST(dip) {					\
512 	mutex_enter(&pm_compcnt_lock);					\
513 	if (!PM_IS_NEXUS(dip) ||					\
514 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
515 		ASSERT(pm_comps_notlowest);				\
516 		pm_comps_notlowest--;					\
517 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
518 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
519 		if (pm_comps_notlowest == 0)				\
520 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
521 	}								\
522 	mutex_exit(&pm_compcnt_lock);					\
523 }
524 
525 /*
526  * console frame-buffer power-management is not enabled when
527  * debugging services are present.  to override, set pm_cfb_override
528  * to non-zero.
529  */
530 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
531 kmutex_t pm_cfb_lock;
532 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
533 #ifdef DEBUG
534 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
535 #else
536 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
537 #endif
538 
539 static dev_info_t *cfb_dip = 0;
540 static dev_info_t *cfb_dip_detaching = 0;
541 uint_t cfb_inuse = 0;
542 static ddi_softintr_t pm_soft_id;
543 static clock_t pm_soft_pending;
544 int	pm_scans_disabled = 0;
545 
546 /*
547  * A structure to record the fact that one thread has borrowed a lock held
548  * by another thread.  The context requires that the lender block on the
549  * completion of the borrower.
550  */
551 typedef struct lock_loan {
552 	struct lock_loan	*pmlk_next;
553 	kthread_t		*pmlk_borrower;
554 	kthread_t		*pmlk_lender;
555 	dev_info_t		*pmlk_dip;
556 } lock_loan_t;
557 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
558 
559 #ifdef	DEBUG
560 #ifdef	PMDDEBUG
561 #define	PMD_FUNC(func, name)	char *(func) = (name);
562 #else	/* !PMDDEBUG */
563 #define	PMD_FUNC(func, name)
564 #endif	/* PMDDEBUG */
565 #else	/* !DEBUG */
566 #define	PMD_FUNC(func, name)
567 #endif	/* DEBUG */
568 
569 
570 /*
571  * Must be called before first device (including pseudo) attach
572  */
573 void
574 pm_init_locks(void)
575 {
576 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
577 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
578 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
579 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
580 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
581 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
582 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
583 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
584 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
585 }
586 
587 static boolean_t
588 pm_cpr_callb(void *arg, int code)
589 {
590 	_NOTE(ARGUNUSED(arg))
591 	static int auto_save;
592 	static pm_cpupm_t cpupm_save;
593 	static int pm_reset_timestamps(dev_info_t *, void *);
594 
595 	switch (code) {
596 	case CB_CODE_CPR_CHKPT:
597 		/*
598 		 * Cancel scan or wait for scan in progress to finish
599 		 * Other threads may be trying to restart the scan, so we
600 		 * have to keep at it unil it sticks
601 		 */
602 		mutex_enter(&pm_scan_lock);
603 		ASSERT(!pm_scans_disabled);
604 		pm_scans_disabled = 1;
605 		auto_save = autopm_enabled;
606 		autopm_enabled = 0;
607 		cpupm_save = cpupm;
608 		cpupm = PM_CPUPM_NOTSET;
609 		mutex_exit(&pm_scan_lock);
610 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
611 		break;
612 
613 	case CB_CODE_CPR_RESUME:
614 		ASSERT(!autopm_enabled);
615 		ASSERT(cpupm == PM_CPUPM_NOTSET);
616 		ASSERT(pm_scans_disabled);
617 		pm_scans_disabled = 0;
618 		/*
619 		 * Call pm_reset_timestamps to reset timestamps of each
620 		 * device to the time when the system is resumed so that their
621 		 * idleness can be re-calculated. That's to avoid devices from
622 		 * being powered down right after resume if the system was in
623 		 * suspended mode long enough.
624 		 */
625 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
626 
627 		autopm_enabled = auto_save;
628 		cpupm = cpupm_save;
629 		/*
630 		 * If there is any auto-pm device, get the scanning
631 		 * going. Otherwise don't bother.
632 		 */
633 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
634 		break;
635 	}
636 	return (B_TRUE);
637 }
638 
639 /*
640  * This callback routine is called when there is a system panic.  This function
641  * exists for prototype matching.
642  */
643 static boolean_t
644 pm_panic_callb(void *arg, int code)
645 {
646 	_NOTE(ARGUNUSED(arg, code))
647 	void pm_cfb_check_and_powerup(void);
648 	PMD(PMD_CFB, ("pm_panic_callb\n"))
649 	pm_cfb_check_and_powerup();
650 	return (B_TRUE);
651 }
652 
653 static boolean_t
654 pm_halt_callb(void *arg, int code)
655 {
656 	_NOTE(ARGUNUSED(arg, code))
657 	return (B_TRUE);
658 }
659 
660 /*
661  * This needs to be called after the root and platform drivers are loaded
662  * and be single-threaded with respect to driver attach/detach
663  */
664 void
665 pm_init(void)
666 {
667 	PMD_FUNC(pmf, "pm_init")
668 	char **mod;
669 	extern pri_t minclsyspri;
670 	static void pm_dep_thread(void);
671 
672 	pm_comps_notlowest = 0;
673 	pm_system_idle_threshold = pm_default_idle_threshold;
674 	pm_cpu_idle_threshold = 0;
675 
676 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
677 	    CB_CL_CPR_PM, "pm_cpr");
678 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
679 	    CB_CL_PANIC, "pm_panic");
680 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
681 	    CB_CL_HALT, "pm_halt");
682 
683 	/*
684 	 * Create a thread to do dependency processing.
685 	 */
686 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
687 	    TS_RUN, minclsyspri);
688 
689 	/*
690 	 * loadrootmodules already loaded these ppm drivers, now get them
691 	 * attached so they can claim the root drivers as they attach
692 	 */
693 	for (mod = platform_module_list; *mod; mod++) {
694 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
695 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
696 			    *mod);
697 		} else {
698 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
699 			    ddi_major_to_name(ddi_name_to_major(*mod))))
700 		}
701 	}
702 }
703 
704 /*
705  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
706  * enabled) when device becomes power managed or after a failed detach and
707  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
708  * a CPR resume to get all the devices scanning again.
709  */
710 void
711 pm_scan_init(dev_info_t *dip)
712 {
713 	PMD_FUNC(pmf, "scan_init")
714 	pm_scan_t	*scanp;
715 
716 	ASSERT(!PM_ISBC(dip));
717 
718 	PM_LOCK_DIP(dip);
719 	scanp = PM_GET_PM_SCAN(dip);
720 	if (!scanp) {
721 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
722 		    pmf, PM_DEVICE(dip)))
723 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
724 		DEVI(dip)->devi_pm_scan = scanp;
725 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
726 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
727 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
728 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
729 	}
730 	PM_UNLOCK_DIP(dip);
731 }
732 
733 /*
734  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
735  */
736 void
737 pm_scan_fini(dev_info_t *dip)
738 {
739 	PMD_FUNC(pmf, "scan_fini")
740 	pm_scan_t	*scanp;
741 
742 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
743 	ASSERT(!PM_ISBC(dip));
744 	PM_LOCK_DIP(dip);
745 	scanp = PM_GET_PM_SCAN(dip);
746 	if (!scanp) {
747 		PM_UNLOCK_DIP(dip);
748 		return;
749 	}
750 
751 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
752 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
753 
754 	kmem_free(scanp, sizeof (pm_scan_t));
755 	DEVI(dip)->devi_pm_scan = NULL;
756 	PM_UNLOCK_DIP(dip);
757 }
758 
759 /*
760  * Given a pointer to a component struct, return the current power level
761  * (struct contains index unless it is a continuous level).
762  * Located here in hopes of getting both this and dev_is_needed into the
763  * cache together
764  */
765 static int
766 cur_power(pm_component_t *cp)
767 {
768 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
769 		return (cp->pmc_cur_pwr);
770 
771 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
772 }
773 
774 static char *
775 pm_decode_direction(int direction)
776 {
777 	switch (direction) {
778 	case PM_LEVEL_UPONLY:
779 		return ("up");
780 
781 	case PM_LEVEL_EXACT:
782 		return ("exact");
783 
784 	case PM_LEVEL_DOWNONLY:
785 		return ("down");
786 
787 	default:
788 		return ("INVALID DIRECTION");
789 	}
790 }
791 
792 char *
793 pm_decode_op(pm_bus_power_op_t op)
794 {
795 	switch (op) {
796 	case BUS_POWER_CHILD_PWRCHG:
797 		return ("CHILD_PWRCHG");
798 	case BUS_POWER_NEXUS_PWRUP:
799 		return ("NEXUS_PWRUP");
800 	case BUS_POWER_PRE_NOTIFICATION:
801 		return ("PRE_NOTIFICATION");
802 	case BUS_POWER_POST_NOTIFICATION:
803 		return ("POST_NOTIFICATION");
804 	case BUS_POWER_HAS_CHANGED:
805 		return ("HAS_CHANGED");
806 	case BUS_POWER_NOINVOL:
807 		return ("NOINVOL");
808 	default:
809 		return ("UNKNOWN OP");
810 	}
811 }
812 
813 /*
814  * Returns true if level is a possible (valid) power level for component
815  */
816 int
817 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
818 {
819 	PMD_FUNC(pmf, "e_pm_valid_power")
820 	pm_component_t *cp = PM_CP(dip, cmpt);
821 	int i;
822 	int *ip = cp->pmc_comp.pmc_lvals;
823 	int limit = cp->pmc_comp.pmc_numlevels;
824 
825 	if (level < 0)
826 		return (0);
827 	for (i = 0; i < limit; i++) {
828 		if (level == *ip++)
829 			return (1);
830 	}
831 #ifdef DEBUG
832 	if (pm_debug & PMD_FAIL) {
833 		ip = cp->pmc_comp.pmc_lvals;
834 
835 		for (i = 0; i < limit; i++)
836 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
837 			    pmf, i, *ip++))
838 	}
839 #endif
840 	return (0);
841 }
842 
843 /*
844  * Returns true if device is pm'd (after calling pm_start if need be)
845  */
846 int
847 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
848 {
849 	pm_info_t *info;
850 	static int pm_start(dev_info_t *dip);
851 
852 	/*
853 	 * Check if the device is power managed if not.
854 	 * To make the common case (device is power managed already)
855 	 * fast, we check without the lock.  If device is not already
856 	 * power managed, then we take the lock and the long route through
857 	 * go get it managed.  Devices never go unmanaged until they
858 	 * detach.
859 	 */
860 	info = PM_GET_PM_INFO(dip);
861 	if (!info) {
862 		if (!DEVI_IS_ATTACHING(dip)) {
863 			return (0);
864 		}
865 		if (pm_start(dip) != DDI_SUCCESS) {
866 			return (0);
867 		}
868 		info = PM_GET_PM_INFO(dip);
869 	}
870 	ASSERT(info);
871 	if (infop != NULL)
872 		*infop = info;
873 	return (1);
874 }
875 
876 int
877 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
878 {
879 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
880 		if (cpp != NULL)
881 			*cpp = PM_CP(dip, cmpt);
882 		return (1);
883 	} else {
884 		return (0);
885 	}
886 }
887 
888 /*
889  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
890  */
891 static int
892 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
893 {
894 	PMD_FUNC(pmf, "din")
895 	pm_component_t *cp;
896 	char *pathbuf;
897 	int result;
898 
899 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
900 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
901 	    !e_pm_valid_power(dip, cmpt, level))
902 		return (DDI_FAILURE);
903 
904 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
905 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
906 	    level, cur_power(cp)))
907 
908 	if (pm_set_power(dip, cmpt, level,  direction,
909 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
910 		if (direction == PM_LEVEL_UPONLY) {
911 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
912 			(void) ddi_pathname(dip, pathbuf);
913 			cmn_err(CE_WARN, "Device %s failed to power up.",
914 			    pathbuf);
915 			kmem_free(pathbuf, MAXPATHLEN);
916 		}
917 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
918 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
919 		    pm_decode_direction(direction), level, result))
920 		return (DDI_FAILURE);
921 	}
922 
923 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
924 	    PM_DEVICE(dip)))
925 	pm_rescan(dip);
926 	return (DDI_SUCCESS);
927 }
928 
929 /*
930  * We can get multiple pm_rescan() threads, if one of them discovers
931  * that no scan is running at the moment, it kicks it into action.
932  * Otherwise, it tells the current scanning thread to scan again when
933  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
934  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
935  * thread at a time runs the pm_scan_dev() code.
936  */
937 void
938 pm_rescan(void *arg)
939 {
940 	PMD_FUNC(pmf, "rescan")
941 	dev_info_t	*dip = (dev_info_t *)arg;
942 	pm_info_t	*info;
943 	pm_scan_t	*scanp;
944 	timeout_id_t	scanid;
945 
946 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
947 	PM_LOCK_DIP(dip);
948 	info = PM_GET_PM_INFO(dip);
949 	scanp = PM_GET_PM_SCAN(dip);
950 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
951 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
952 		PM_UNLOCK_DIP(dip);
953 		return;
954 	}
955 	if (scanp->ps_scan_flags & PM_SCANNING) {
956 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
957 		PM_UNLOCK_DIP(dip);
958 		return;
959 	} else if (scanp->ps_scan_id) {
960 		scanid = scanp->ps_scan_id;
961 		scanp->ps_scan_id = 0;
962 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
963 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
964 		PM_UNLOCK_DIP(dip);
965 		(void) untimeout(scanid);
966 		PM_LOCK_DIP(dip);
967 	}
968 
969 	/*
970 	 * Dispatching pm_scan during attach time is risky due to the fact that
971 	 * attach might soon fail and dip dissolved, and panic may happen while
972 	 * attempting to stop scan. So schedule a pm_rescan instead.
973 	 * (Note that if either of the first two terms are true, taskq_dispatch
974 	 * will not be invoked).
975 	 *
976 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
977 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
978 	 * to regulate the dispatching.
979 	 *
980 	 * Scan is stopped before the device is detached (in pm_detaching())
981 	 * but it may get re-started during the post_detach processing if the
982 	 * driver fails to detach.
983 	 */
984 	if (DEVI_IS_ATTACHING(dip) ||
985 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
986 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
987 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
988 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
989 		if (scanp->ps_scan_id) {
990 			scanid = scanp->ps_scan_id;
991 			scanp->ps_scan_id = 0;
992 			PM_UNLOCK_DIP(dip);
993 			(void) untimeout(scanid);
994 			PM_LOCK_DIP(dip);
995 			if (scanp->ps_scan_id) {
996 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
997 				    "thread scheduled pm_rescan, scanid %lx\n",
998 				    pmf, PM_DEVICE(dip),
999 				    (ulong_t)scanp->ps_scan_id))
1000 				PM_UNLOCK_DIP(dip);
1001 				return;
1002 			}
1003 		}
1004 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1005 		    (scanp->ps_idle_down ? pm_id_ticks :
1006 		    (PM_MIN_SCAN(dip) * hz)));
1007 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1008 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1009 		    (ulong_t)scanp->ps_scan_id))
1010 	} else {
1011 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1012 		    pmf, PM_DEVICE(dip)))
1013 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1014 	}
1015 	PM_UNLOCK_DIP(dip);
1016 }
1017 
1018 void
1019 pm_scan(void *arg)
1020 {
1021 	PMD_FUNC(pmf, "scan")
1022 	dev_info_t	*dip = (dev_info_t *)arg;
1023 	pm_scan_t	*scanp;
1024 	time_t		nextscan;
1025 
1026 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1027 
1028 	PM_LOCK_DIP(dip);
1029 	scanp = PM_GET_PM_SCAN(dip);
1030 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1031 
1032 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1033 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1034 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1035 		PM_UNLOCK_DIP(dip);
1036 		return;
1037 	}
1038 
1039 	if (scanp->ps_idle_down) {
1040 		/*
1041 		 * make sure we remember idledown was in affect until
1042 		 * we've completed the scan
1043 		 */
1044 		PMID_SET_SCANS(scanp->ps_idle_down)
1045 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1046 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1047 	}
1048 
1049 	/* possible having two threads running pm_scan() */
1050 	if (scanp->ps_scan_flags & PM_SCANNING) {
1051 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1052 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1053 		    pmf, PM_DEVICE(dip)))
1054 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1055 		PM_UNLOCK_DIP(dip);
1056 		return;
1057 	}
1058 
1059 	scanp->ps_scan_flags |= PM_SCANNING;
1060 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1061 	do {
1062 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1063 		PM_UNLOCK_DIP(dip);
1064 		nextscan = pm_scan_dev(dip);
1065 		PM_LOCK_DIP(dip);
1066 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1067 
1068 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1069 	scanp->ps_scan_flags &= ~PM_SCANNING;
1070 
1071 	if (scanp->ps_idle_down) {
1072 		scanp->ps_idle_down &= ~PMID_SCANS;
1073 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1074 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1075 	}
1076 
1077 	/* schedule for next idle check */
1078 	if (nextscan != LONG_MAX) {
1079 		if (nextscan > (LONG_MAX / hz))
1080 			nextscan = (LONG_MAX - 1) / hz;
1081 		if (scanp->ps_scan_id) {
1082 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1083 			    "another rescan scheduled scanid(%lx)\n", pmf,
1084 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1085 			PM_UNLOCK_DIP(dip);
1086 			return;
1087 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1088 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1089 			    (clock_t)(nextscan * hz));
1090 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1091 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1092 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1093 		}
1094 	}
1095 	PM_UNLOCK_DIP(dip);
1096 }
1097 
1098 void
1099 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1100 {
1101 	int components = PM_NUMCMPTS(dip);
1102 	int i;
1103 
1104 	ASSERT(components > 0);
1105 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1106 	for (i = 0; i < components; i++) {
1107 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1108 	}
1109 	PM_UNLOCK_BUSY(dip);
1110 }
1111 
1112 /*
1113  * Returns true if device needs to be kept up because it exported the
1114  * "no-involuntary-power-cycles" property or we're pretending it did (console
1115  * fb case) or it is an ancestor of such a device and has used up the "one
1116  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1117  * upon detach
1118  */
1119 int
1120 pm_noinvol(dev_info_t *dip)
1121 {
1122 	PMD_FUNC(pmf, "noinvol")
1123 
1124 	/*
1125 	 * This doesn't change over the life of a driver, so no locking needed
1126 	 */
1127 	if (PM_IS_CFB(dip)) {
1128 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1129 		    pmf, PM_DEVICE(dip)))
1130 		return (1);
1131 	}
1132 	/*
1133 	 * Not an issue if no such kids
1134 	 */
1135 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1136 #ifdef DEBUG
1137 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1138 			dev_info_t *pdip = dip;
1139 			do {
1140 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1141 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1142 				    DEVI(pdip)->devi_pm_noinvolpm,
1143 				    DEVI(pdip)->devi_pm_volpmd))
1144 				pdip = ddi_get_parent(pdip);
1145 			} while (pdip);
1146 		}
1147 #endif
1148 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1149 		return (0);
1150 	}
1151 
1152 	/*
1153 	 * Since we now maintain the counts correct at every node, we no longer
1154 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1155 	 * without the children getting their counts adjusted.
1156 	 */
1157 
1158 #ifdef	DEBUG
1159 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1160 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1161 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1162 		    PM_DEVICE(dip)))
1163 #endif
1164 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1165 }
1166 
1167 /*
1168  * This function performs the actual scanning of the device.
1169  * It attempts to power off the indicated device's components if they have
1170  * been idle and other restrictions are met.
1171  * pm_scan_dev calculates and returns when the next scan should happen for
1172  * this device.
1173  */
1174 time_t
1175 pm_scan_dev(dev_info_t *dip)
1176 {
1177 	PMD_FUNC(pmf, "scan_dev")
1178 	pm_scan_t	*scanp;
1179 	time_t		*timestamp, idletime, now, thresh;
1180 	time_t		timeleft = 0;
1181 #ifdef PMDDEBUG
1182 	int		curpwr;
1183 #endif
1184 	int		i, nxtpwr, pwrndx, unused;
1185 	size_t		size;
1186 	pm_component_t	 *cp;
1187 	dev_info_t	*pdip = ddi_get_parent(dip);
1188 	int		circ;
1189 	static int	cur_threshold(dev_info_t *, int);
1190 	static int	pm_next_lower_power(pm_component_t *, int);
1191 	clock_t		min_scan = pm_default_min_scan;
1192 
1193 	/*
1194 	 * skip attaching device
1195 	 */
1196 	if (DEVI_IS_ATTACHING(dip)) {
1197 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1198 		    pmf, PM_DEVICE(dip), min_scan))
1199 		return (min_scan);
1200 	}
1201 
1202 	PM_LOCK_DIP(dip);
1203 	scanp = PM_GET_PM_SCAN(dip);
1204 	min_scan = PM_MIN_SCAN(dip);
1205 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1206 
1207 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1208 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1209 	    PM_KUC(dip)))
1210 
1211 	/* no scan under the following conditions */
1212 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1213 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1214 	    (PM_KUC(dip) != 0) ||
1215 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1216 		PM_UNLOCK_DIP(dip);
1217 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1218 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1219 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1220 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1221 		    cpupm, PM_KUC(dip),
1222 		    PM_ISDIRECT(dip) ? "is" : "is not",
1223 		    pm_noinvol(dip) ? "is" : "is not"))
1224 		return (LONG_MAX);
1225 	}
1226 	PM_UNLOCK_DIP(dip);
1227 
1228 	if (!ndi_devi_tryenter(pdip, &circ)) {
1229 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1230 		    pmf, PM_DEVICE(pdip)))
1231 		return ((time_t)1);
1232 	}
1233 	now = gethrestime_sec();
1234 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1235 	timestamp = kmem_alloc(size, KM_SLEEP);
1236 	pm_get_timestamps(dip, timestamp);
1237 
1238 	/*
1239 	 * Since we removed support for backwards compatible devices,
1240 	 * (see big comment at top of file)
1241 	 * it is no longer required to deal with component 0 last.
1242 	 */
1243 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1244 		/*
1245 		 * If already off (an optimization, perhaps)
1246 		 */
1247 		cp = PM_CP(dip, i);
1248 		pwrndx = cp->pmc_cur_pwr;
1249 #ifdef PMDDEBUG
1250 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1251 		    PM_LEVEL_UNKNOWN :
1252 		    cp->pmc_comp.pmc_lvals[pwrndx];
1253 #endif
1254 
1255 		if (pwrndx == 0) {
1256 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1257 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1258 			/* skip device if off or at its lowest */
1259 			continue;
1260 		}
1261 
1262 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1263 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1264 			/* were busy or newly became busy by another thread */
1265 			if (timeleft == 0)
1266 				timeleft = max(thresh, min_scan);
1267 			else
1268 				timeleft = min(
1269 				    timeleft, max(thresh, min_scan));
1270 			continue;
1271 		}
1272 
1273 		idletime = now - timestamp[i];		/* idle time */
1274 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1275 		    pmf, PM_DEVICE(dip), i, idletime))
1276 		if (idletime >= thresh || PM_IS_PID(dip)) {
1277 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1278 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1279 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1280 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1281 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1282 			    PM_CURPOWER(dip, i) != nxtpwr) {
1283 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1284 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1285 				    i, curpwr, nxtpwr))
1286 				timeleft = min_scan;
1287 				continue;
1288 			} else {
1289 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1290 				    "%d->%d, GOOD curpwr %d\n", pmf,
1291 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1292 				    cur_power(cp)))
1293 
1294 				if (nxtpwr == 0)	/* component went off */
1295 					continue;
1296 
1297 				/*
1298 				 * scan to next lower level
1299 				 */
1300 				if (timeleft == 0)
1301 					timeleft = max(
1302 					    1, cur_threshold(dip, i));
1303 				else
1304 					timeleft = min(timeleft,
1305 					    max(1, cur_threshold(dip, i)));
1306 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1307 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1308 				    i, timeleft))
1309 			}
1310 		} else {	/* comp not idle long enough */
1311 			if (timeleft == 0)
1312 				timeleft = thresh - idletime;
1313 			else
1314 				timeleft = min(timeleft, (thresh - idletime));
1315 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1316 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1317 		}
1318 	}
1319 	ndi_devi_exit(pdip, circ);
1320 	kmem_free(timestamp, size);
1321 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1322 	    PM_DEVICE(dip), timeleft))
1323 
1324 	/*
1325 	 * if components are already at lowest level, timeleft is left 0
1326 	 */
1327 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1328 }
1329 
1330 /*
1331  * pm_scan_stop - cancel scheduled pm_rescan,
1332  *                wait for termination of dispatched pm_scan thread
1333  *                     and active pm_scan_dev thread.
1334  */
1335 void
1336 pm_scan_stop(dev_info_t *dip)
1337 {
1338 	PMD_FUNC(pmf, "scan_stop")
1339 	pm_scan_t	*scanp;
1340 	timeout_id_t	scanid;
1341 
1342 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1343 	PM_LOCK_DIP(dip);
1344 	scanp = PM_GET_PM_SCAN(dip);
1345 	if (!scanp) {
1346 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1347 		    pmf, PM_DEVICE(dip)))
1348 		PM_UNLOCK_DIP(dip);
1349 		return;
1350 	}
1351 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1352 
1353 	/* cancel scheduled scan taskq */
1354 	while (scanp->ps_scan_id) {
1355 		scanid = scanp->ps_scan_id;
1356 		scanp->ps_scan_id = 0;
1357 		PM_UNLOCK_DIP(dip);
1358 		(void) untimeout(scanid);
1359 		PM_LOCK_DIP(dip);
1360 	}
1361 
1362 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1363 		PM_UNLOCK_DIP(dip);
1364 		delay(1);
1365 		PM_LOCK_DIP(dip);
1366 	}
1367 	PM_UNLOCK_DIP(dip);
1368 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1369 }
1370 
1371 int
1372 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1373 {
1374 	_NOTE(ARGUNUSED(arg))
1375 
1376 	if (!PM_GET_PM_SCAN(dip))
1377 		return (DDI_WALK_CONTINUE);
1378 	ASSERT(!PM_ISBC(dip));
1379 	pm_scan_stop(dip);
1380 	return (DDI_WALK_CONTINUE);
1381 }
1382 
1383 /*
1384  * Converts a power level value to its index
1385  */
1386 static int
1387 power_val_to_index(pm_component_t *cp, int val)
1388 {
1389 	int limit, i, *ip;
1390 
1391 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1392 	    val != PM_LEVEL_EXACT);
1393 	/*  convert power value into index (i) */
1394 	limit = cp->pmc_comp.pmc_numlevels;
1395 	ip = cp->pmc_comp.pmc_lvals;
1396 	for (i = 0; i < limit; i++)
1397 		if (val == *ip++)
1398 			return (i);
1399 	return (-1);
1400 }
1401 
1402 /*
1403  * Converts a numeric power level to a printable string
1404  */
1405 static char *
1406 power_val_to_string(pm_component_t *cp, int val)
1407 {
1408 	int index;
1409 
1410 	if (val == PM_LEVEL_UPONLY)
1411 		return ("<UPONLY>");
1412 
1413 	if (val == PM_LEVEL_UNKNOWN ||
1414 	    (index = power_val_to_index(cp, val)) == -1)
1415 		return ("<LEVEL_UNKNOWN>");
1416 
1417 	return (cp->pmc_comp.pmc_lnames[index]);
1418 }
1419 
1420 /*
1421  * Return true if this node has been claimed by a ppm.
1422  */
1423 static int
1424 pm_ppm_claimed(dev_info_t *dip)
1425 {
1426 	return (PPM(dip) != NULL);
1427 }
1428 
1429 /*
1430  * A node which was voluntarily power managed has just used up its "free cycle"
1431  * and need is volpmd field cleared, and the same done to all its descendents
1432  */
1433 static void
1434 pm_clear_volpm_dip(dev_info_t *dip)
1435 {
1436 	PMD_FUNC(pmf, "clear_volpm_dip")
1437 
1438 	if (dip == NULL)
1439 		return;
1440 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1441 	    PM_DEVICE(dip)))
1442 	DEVI(dip)->devi_pm_volpmd = 0;
1443 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1444 		pm_clear_volpm_dip(dip);
1445 	}
1446 }
1447 
1448 /*
1449  * A node which was voluntarily power managed has used up the "free cycles"
1450  * for the subtree that it is the root of.  Scan through the list of detached
1451  * nodes and adjust the counts of any that are descendents of the node.
1452  */
1453 static void
1454 pm_clear_volpm_list(dev_info_t *dip)
1455 {
1456 	PMD_FUNC(pmf, "clear_volpm_list")
1457 	char	*pathbuf;
1458 	size_t	len;
1459 	pm_noinvol_t *ip;
1460 
1461 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1462 	(void) ddi_pathname(dip, pathbuf);
1463 	len = strlen(pathbuf);
1464 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1465 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1466 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1467 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1468 		    ip->ni_path))
1469 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1470 		    ip->ni_path[len] == '/') {
1471 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1472 			    ip->ni_path))
1473 			ip->ni_volpmd = 0;
1474 			ip->ni_wasvolpmd = 0;
1475 		}
1476 	}
1477 	kmem_free(pathbuf, MAXPATHLEN);
1478 	rw_exit(&pm_noinvol_rwlock);
1479 }
1480 
1481 /*
1482  * Powers a device, suspending or resuming the driver if it is a backward
1483  * compatible device, calling into ppm to change power level.
1484  * Called with the component's power lock held.
1485  */
1486 static int
1487 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1488     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1489 {
1490 	PMD_FUNC(pmf, "power_dev")
1491 	power_req_t power_req;
1492 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1493 	int		resume_needed = 0;
1494 	int		suspended = 0;
1495 	int		result;
1496 #ifdef PMDDEBUG
1497 	struct pm_component *cp = PM_CP(dip, comp);
1498 #endif
1499 	int		bc = PM_ISBC(dip);
1500 	int pm_all_components_off(dev_info_t *);
1501 	int		clearvolpmd = 0;
1502 	char		pathbuf[MAXNAMELEN];
1503 #ifdef PMDDEBUG
1504 	char *ppmname, *ppmaddr;
1505 #endif
1506 	/*
1507 	 * If this is comp 0 of a backwards compat device and we are
1508 	 * going to take the power away, we need to detach it with
1509 	 * DDI_PM_SUSPEND command.
1510 	 */
1511 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1512 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1513 			/* We could not suspend before turning cmpt zero off */
1514 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1515 			    pmf, PM_DEVICE(dip)))
1516 			return (DDI_FAILURE);
1517 		} else {
1518 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1519 			suspended++;
1520 		}
1521 	}
1522 	power_req.request_type = PMR_PPM_SET_POWER;
1523 	power_req.req.ppm_set_power_req.who = dip;
1524 	power_req.req.ppm_set_power_req.cmpt = comp;
1525 	power_req.req.ppm_set_power_req.old_level = old_level;
1526 	power_req.req.ppm_set_power_req.new_level = level;
1527 	power_req.req.ppm_set_power_req.canblock = canblock;
1528 	power_req.req.ppm_set_power_req.cookie = NULL;
1529 #ifdef PMDDEBUG
1530 	if (pm_ppm_claimed(dip)) {
1531 		ppmname = PM_NAME(PPM(dip));
1532 		ppmaddr = PM_ADDR(PPM(dip));
1533 
1534 	} else {
1535 		ppmname = "noppm";
1536 		ppmaddr = "0";
1537 	}
1538 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1539 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1540 	    power_val_to_string(cp, old_level), old_level,
1541 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1542 #endif
1543 	/*
1544 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1545 	 * bc device comp 0 is powering on, then we count it as a power cycle
1546 	 * against its voluntary count.
1547 	 */
1548 	if (DEVI(dip)->devi_pm_volpmd &&
1549 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1550 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1551 		clearvolpmd = 1;
1552 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1553 	    &power_req, &result)) == DDI_SUCCESS) {
1554 		/*
1555 		 * Now do involuntary pm accounting;  If we've just cycled power
1556 		 * on a voluntarily pm'd node, and by inference on its entire
1557 		 * subtree, we need to set the subtree (including those nodes
1558 		 * already detached) volpmd counts to 0, and subtract out the
1559 		 * value of the current node's volpmd count from the ancestors
1560 		 */
1561 		if (clearvolpmd) {
1562 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1563 			pm_clear_volpm_dip(dip);
1564 			pm_clear_volpm_list(dip);
1565 			if (volpmd) {
1566 				(void) ddi_pathname(dip, pathbuf);
1567 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1568 				    volpmd, 0, pathbuf, dip);
1569 			}
1570 		}
1571 	} else {
1572 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1573 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1574 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1575 	}
1576 	/*
1577 	 * If some other devices were also powered up (e.g. other cpus in
1578 	 * the same domain) return a pointer to that list
1579 	 */
1580 	if (devlist) {
1581 		*devlist = (pm_ppm_devlist_t *)
1582 		    power_req.req.ppm_set_power_req.cookie;
1583 	}
1584 	/*
1585 	 * We will have to resume the device if the device is backwards compat
1586 	 * device and either of the following is true:
1587 	 * -This is comp 0 and we have successfully powered it up
1588 	 * -This is comp 0 and we have failed to power it down. Resume is
1589 	 *  needed because we have suspended it above
1590 	 */
1591 
1592 	if (bc && comp == 0) {
1593 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1594 		if (power_op_ret == DDI_SUCCESS) {
1595 			if (POWERING_ON(old_level, level)) {
1596 				/*
1597 				 * It must be either suspended or resumed
1598 				 * via pm_power_has_changed path
1599 				 */
1600 				ASSERT((DEVI(dip)->devi_pm_flags &
1601 				    PMC_SUSPENDED) ||
1602 				    (PM_CP(dip, comp)->pmc_flags &
1603 				    PM_PHC_WHILE_SET_POWER));
1604 
1605 					resume_needed = suspended;
1606 			}
1607 		} else {
1608 			if (POWERING_OFF(old_level, level)) {
1609 				/*
1610 				 * It must be either suspended or resumed
1611 				 * via pm_power_has_changed path
1612 				 */
1613 				ASSERT((DEVI(dip)->devi_pm_flags &
1614 				    PMC_SUSPENDED) ||
1615 				    (PM_CP(dip, comp)->pmc_flags &
1616 				    PM_PHC_WHILE_SET_POWER));
1617 
1618 					resume_needed = suspended;
1619 			}
1620 		}
1621 	}
1622 	if (resume_needed) {
1623 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1624 		/* ppm is not interested in DDI_PM_RESUME */
1625 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1626 		    DDI_SUCCESS) {
1627 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1628 		} else
1629 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1630 			    PM_DEVICE(dip));
1631 	}
1632 	return (power_op_ret);
1633 }
1634 
1635 /*
1636  * Return true if we are the owner or a borrower of the devi lock.  See
1637  * pm_lock_power_single() about borrowing the lock.
1638  */
1639 static int
1640 pm_devi_lock_held(dev_info_t *dip)
1641 {
1642 	lock_loan_t *cur;
1643 
1644 	if (DEVI_BUSY_OWNED(dip))
1645 		return (1);
1646 
1647 	/* return false if no locks borrowed */
1648 	if (lock_loan_head.pmlk_next == NULL)
1649 		return (0);
1650 
1651 	mutex_enter(&pm_loan_lock);
1652 	/* see if our thread is registered as a lock borrower. */
1653 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1654 		if (cur->pmlk_borrower == curthread)
1655 			break;
1656 	mutex_exit(&pm_loan_lock);
1657 
1658 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1659 }
1660 
1661 /*
1662  * pm_set_power: adjusts power level of device.	 Assumes device is power
1663  * manageable & component exists.
1664  *
1665  * Cases which require us to bring up devices we keep up ("wekeepups") for
1666  * backwards compatible devices:
1667  *	component 0 is off and we're bringing it up from 0
1668  *		bring up wekeepup first
1669  *	and recursively when component 0 is off and we bring some other
1670  *	component up from 0
1671  * For devices which are not backward compatible, our dependency notion is much
1672  * simpler.  Unless all components are off, then wekeeps must be on.
1673  * We don't treat component 0 differently.
1674  * Canblock tells how to deal with a direct pm'd device.
1675  * Scan arg tells us if we were called from scan, in which case we don't need
1676  * to go back to the root node and walk down to change power.
1677  */
1678 int
1679 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1680     pm_canblock_t canblock, int scan, int *retp)
1681 {
1682 	PMD_FUNC(pmf, "set_power")
1683 	char		*pathbuf;
1684 	pm_bp_child_pwrchg_t bpc;
1685 	pm_sp_misc_t	pspm;
1686 	int		ret = DDI_SUCCESS;
1687 	int		unused = DDI_SUCCESS;
1688 	dev_info_t	*pdip = ddi_get_parent(dip);
1689 
1690 #ifdef DEBUG
1691 	int		diverted = 0;
1692 
1693 	/*
1694 	 * This prevents operations on the console from calling prom_printf and
1695 	 * either deadlocking or bringing up the console because of debug
1696 	 * output
1697 	 */
1698 	if (dip == cfb_dip) {
1699 		diverted++;
1700 		mutex_enter(&pm_debug_lock);
1701 		pm_divertdebug++;
1702 		mutex_exit(&pm_debug_lock);
1703 	}
1704 #endif
1705 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1706 	    direction == PM_LEVEL_EXACT);
1707 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1708 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1709 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1710 	(void) ddi_pathname(dip, pathbuf);
1711 	bpc.bpc_dip = dip;
1712 	bpc.bpc_path = pathbuf;
1713 	bpc.bpc_comp = comp;
1714 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1715 	bpc.bpc_nlevel = level;
1716 	pspm.pspm_direction = direction;
1717 	pspm.pspm_errnop = retp;
1718 	pspm.pspm_canblock = canblock;
1719 	pspm.pspm_scan = scan;
1720 	bpc.bpc_private = &pspm;
1721 
1722 	/*
1723 	 * If a config operation is being done (we've locked the parent) or
1724 	 * we already hold the power lock (we've locked the node)
1725 	 * then we can operate directly on the node because we have already
1726 	 * brought up all the ancestors, otherwise, we have to go back to the
1727 	 * top of the tree.
1728 	 */
1729 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1730 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1731 		    (void *)&bpc, (void *)&unused);
1732 	else
1733 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1734 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1735 #ifdef DEBUG
1736 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1737 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1738 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1739 	}
1740 	if (diverted) {
1741 		mutex_enter(&pm_debug_lock);
1742 		pm_divertdebug--;
1743 		mutex_exit(&pm_debug_lock);
1744 	}
1745 #endif
1746 	kmem_free(pathbuf, MAXPATHLEN);
1747 	return (ret);
1748 }
1749 
1750 /*
1751  * If holddip is set, then if a dip is found we return with the node held.
1752  *
1753  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1754  * (resolve_pathname), but it does not drive attach.
1755  */
1756 dev_info_t *
1757 pm_name_to_dip(char *pathname, int holddip)
1758 {
1759 	struct pathname pn;
1760 	char		*component;
1761 	dev_info_t	*parent, *child;
1762 	int		circ;
1763 
1764 	if ((pathname == NULL) || (*pathname != '/'))
1765 		return (NULL);
1766 
1767 	/* setup pathname and allocate component */
1768 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1769 		return (NULL);
1770 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1771 
1772 	/* start at top, process '/' component */
1773 	parent = child = ddi_root_node();
1774 	ndi_hold_devi(parent);
1775 	pn_skipslash(&pn);
1776 	ASSERT(i_ddi_devi_attached(parent));
1777 
1778 	/* process components of pathname */
1779 	while (pn_pathleft(&pn)) {
1780 		(void) pn_getcomponent(&pn, component);
1781 
1782 		/* enter parent and search for component child */
1783 		ndi_devi_enter(parent, &circ);
1784 		child = ndi_devi_findchild(parent, component);
1785 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1786 			child = NULL;
1787 			ndi_devi_exit(parent, circ);
1788 			ndi_rele_devi(parent);
1789 			goto out;
1790 		}
1791 
1792 		/* attached child found, hold child and release parent */
1793 		ndi_hold_devi(child);
1794 		ndi_devi_exit(parent, circ);
1795 		ndi_rele_devi(parent);
1796 
1797 		/* child becomes parent, and process next component */
1798 		parent = child;
1799 		pn_skipslash(&pn);
1800 
1801 		/* loop with active ndi_devi_hold of child->parent */
1802 	}
1803 
1804 out:
1805 	pn_free(&pn);
1806 	kmem_free(component, MAXNAMELEN);
1807 
1808 	/* if we are not asked to return with hold, drop current hold */
1809 	if (child && !holddip)
1810 		ndi_rele_devi(child);
1811 	return (child);
1812 }
1813 
1814 /*
1815  * Search for a dependency and mark it unsatisfied
1816  */
1817 static void
1818 pm_unsatisfy(char *keeper, char *kept)
1819 {
1820 	PMD_FUNC(pmf, "unsatisfy")
1821 	pm_pdr_t *dp;
1822 
1823 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1824 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1825 		if (!dp->pdr_isprop) {
1826 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1827 			    (dp->pdr_kept_count > 0) &&
1828 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1829 				if (dp->pdr_satisfied) {
1830 					dp->pdr_satisfied = 0;
1831 					pm_unresolved_deps++;
1832 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1833 					    "pm_unresolved_deps now %d\n", pmf,
1834 					    pm_unresolved_deps))
1835 				}
1836 			}
1837 		}
1838 	}
1839 }
1840 
1841 /*
1842  * Device dip is being un power managed, it keeps up count other devices.
1843  * We need to release any hold we have on the kept devices, and also
1844  * mark the dependency no longer satisfied.
1845  */
1846 static void
1847 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1848 {
1849 	PMD_FUNC(pmf, "unkeeps")
1850 	int i, j;
1851 	dev_info_t *kept;
1852 	dev_info_t *dip;
1853 	struct pm_component *cp;
1854 	int keeper_on = 0, circ;
1855 
1856 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1857 	    keeper, (void *)keptpaths))
1858 	/*
1859 	 * Try to grab keeper. Keeper may have gone away by now,
1860 	 * in this case, used the passed in value pwr
1861 	 */
1862 	dip = pm_name_to_dip(keeper, 1);
1863 	for (i = 0; i < count; i++) {
1864 		/* Release power hold */
1865 		kept = pm_name_to_dip(keptpaths[i], 1);
1866 		if (kept) {
1867 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1868 			    PM_DEVICE(kept), i))
1869 			/*
1870 			 * We need to check if we skipped a bringup here
1871 			 * because we could have failed the bringup
1872 			 * (ie DIRECT PM device) and have
1873 			 * not increment the count.
1874 			 */
1875 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1876 				keeper_on = 0;
1877 				PM_LOCK_POWER(dip, &circ);
1878 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1879 					cp = &DEVI(dip)->devi_pm_components[j];
1880 					if (cur_power(cp)) {
1881 						keeper_on++;
1882 						break;
1883 					}
1884 				}
1885 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1886 					pm_rele_power(kept);
1887 					DEVI(kept)->devi_pm_flags
1888 					    &= ~PMC_SKIP_BRINGUP;
1889 				}
1890 				PM_UNLOCK_POWER(dip, circ);
1891 			} else if (pwr) {
1892 				if (PM_SKBU(kept) == 0) {
1893 					pm_rele_power(kept);
1894 					DEVI(kept)->devi_pm_flags
1895 					    &= ~PMC_SKIP_BRINGUP;
1896 				}
1897 			}
1898 			ddi_release_devi(kept);
1899 		}
1900 		/*
1901 		 * mark this dependency not satisfied
1902 		 */
1903 		pm_unsatisfy(keeper, keptpaths[i]);
1904 	}
1905 	if (dip)
1906 		ddi_release_devi(dip);
1907 }
1908 
1909 /*
1910  * Device kept is being un power managed, it is kept up by keeper.
1911  * We need to mark the dependency no longer satisfied.
1912  */
1913 static void
1914 pm_unkepts(char *kept, char *keeper)
1915 {
1916 	PMD_FUNC(pmf, "unkepts")
1917 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1918 	ASSERT(keeper != NULL);
1919 	/*
1920 	 * mark this dependency not satisfied
1921 	 */
1922 	pm_unsatisfy(keeper, kept);
1923 }
1924 
1925 /*
1926  * Removes dependency information and hold on the kepts, if the path is a
1927  * path of a keeper.
1928  */
1929 static void
1930 pm_free_keeper(char *path, int pwr)
1931 {
1932 	pm_pdr_t *dp;
1933 	int i;
1934 	size_t length;
1935 
1936 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1937 		if (strcmp(dp->pdr_keeper, path) != 0)
1938 			continue;
1939 		/*
1940 		 * Remove all our kept holds and the dependency records,
1941 		 * then free up the kept lists.
1942 		 */
1943 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1944 		if (dp->pdr_kept_count)  {
1945 			for (i = 0; i < dp->pdr_kept_count; i++) {
1946 				length = strlen(dp->pdr_kept_paths[i]);
1947 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1948 			}
1949 			kmem_free(dp->pdr_kept_paths,
1950 			    dp->pdr_kept_count * sizeof (char **));
1951 			dp->pdr_kept_paths = NULL;
1952 			dp->pdr_kept_count = 0;
1953 		}
1954 	}
1955 }
1956 
1957 /*
1958  * Removes the device represented by path from the list of kepts, if the
1959  * path is a path of a kept
1960  */
1961 static void
1962 pm_free_kept(char *path)
1963 {
1964 	pm_pdr_t *dp;
1965 	int i;
1966 	int j, count;
1967 	size_t length;
1968 	char **paths;
1969 
1970 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1971 		if (dp->pdr_kept_count == 0)
1972 			continue;
1973 		count = dp->pdr_kept_count;
1974 		/* Remove this device from the kept path lists */
1975 		for (i = 0; i < count; i++) {
1976 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1977 				pm_unkepts(path, dp->pdr_keeper);
1978 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1979 				kmem_free(dp->pdr_kept_paths[i], length);
1980 				dp->pdr_kept_paths[i] = NULL;
1981 				dp->pdr_kept_count--;
1982 			}
1983 		}
1984 		/* Compact the kept paths array */
1985 		if (dp->pdr_kept_count) {
1986 			length = dp->pdr_kept_count * sizeof (char **);
1987 			paths = kmem_zalloc(length, KM_SLEEP);
1988 			j = 0;
1989 			for (i = 0; i < count; i++) {
1990 				if (dp->pdr_kept_paths[i] != NULL) {
1991 					paths[j] = dp->pdr_kept_paths[i];
1992 					j++;
1993 				}
1994 			}
1995 			ASSERT(j == dp->pdr_kept_count);
1996 		}
1997 		/* Now free the old array and point to the new one */
1998 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
1999 		if (dp->pdr_kept_count)
2000 			dp->pdr_kept_paths = paths;
2001 		else
2002 			dp->pdr_kept_paths = NULL;
2003 	}
2004 }
2005 
2006 /*
2007  * Free the dependency information for a device.
2008  */
2009 void
2010 pm_free_keeps(char *path, int pwr)
2011 {
2012 	PMD_FUNC(pmf, "free_keeps")
2013 
2014 #ifdef DEBUG
2015 	int doprdeps = 0;
2016 	void prdeps(char *);
2017 
2018 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2019 	if (pm_debug & PMD_KEEPS) {
2020 		doprdeps = 1;
2021 		prdeps("pm_free_keeps before");
2022 	}
2023 #endif
2024 	/*
2025 	 * First assume we are a keeper and remove all our kepts.
2026 	 */
2027 	pm_free_keeper(path, pwr);
2028 	/*
2029 	 * Now assume we a kept device, and remove all our records.
2030 	 */
2031 	pm_free_kept(path);
2032 #ifdef	DEBUG
2033 	if (doprdeps) {
2034 		prdeps("pm_free_keeps after");
2035 	}
2036 #endif
2037 }
2038 
2039 static int
2040 pm_is_kept(char *path)
2041 {
2042 	pm_pdr_t *dp;
2043 	int i;
2044 
2045 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2046 		if (dp->pdr_kept_count == 0)
2047 			continue;
2048 		for (i = 0; i < dp->pdr_kept_count; i++) {
2049 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2050 				return (1);
2051 		}
2052 	}
2053 	return (0);
2054 }
2055 
2056 static void
2057 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2058 {
2059 	PMD_FUNC(pmf, "hold_rele_power")
2060 	int circ;
2061 
2062 	if ((dip == NULL) ||
2063 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2064 		return;
2065 
2066 	PM_LOCK_POWER(dip, &circ);
2067 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2068 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2069 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2070 
2071 	PM_KUC(dip) += cnt;
2072 
2073 	ASSERT(PM_KUC(dip) >= 0);
2074 	PM_UNLOCK_POWER(dip, circ);
2075 
2076 	if (cnt < 0 && PM_KUC(dip) == 0)
2077 		pm_rescan(dip);
2078 }
2079 
2080 #define	MAX_PPM_HANDLERS	4
2081 
2082 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2083 
2084 struct	ppm_callbacks {
2085 	int (*ppmc_func)(dev_info_t *);
2086 	dev_info_t	*ppmc_dip;
2087 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2088 
2089 
2090 /*
2091  * This routine calls into all the registered ppms to notify them
2092  * that either all components of power-managed devices are at their
2093  * lowest levels or no longer all are at their lowest levels.
2094  */
2095 static void
2096 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2097 {
2098 	struct ppm_callbacks *ppmcp;
2099 	power_req_t power_req;
2100 	int result = 0;
2101 
2102 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2103 	power_req.req.ppm_all_lowest_req.mode = mode;
2104 	mutex_enter(&ppm_lock);
2105 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2106 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2107 		    DDI_CTLOPS_POWER, &power_req, &result);
2108 	mutex_exit(&ppm_lock);
2109 	if (mode == PM_ALL_LOWEST) {
2110 		if (autoS3_enabled) {
2111 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2112 			    "autos3\n"))
2113 			mutex_enter(&srn_clone_lock);
2114 			if (srn_signal) {
2115 				srn_inuse++;
2116 				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2117 				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2118 				srn_inuse--;
2119 			} else {
2120 				PMD(PMD_SX, ("srn_signal NULL\n"))
2121 			}
2122 			mutex_exit(&srn_clone_lock);
2123 		} else {
2124 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2125 			    "disabled\n"));
2126 		}
2127 	}
2128 }
2129 
2130 static void
2131 pm_set_pm_info(dev_info_t *dip, void *value)
2132 {
2133 	DEVI(dip)->devi_pm_info = value;
2134 }
2135 
2136 pm_rsvp_t *pm_blocked_list;
2137 
2138 /*
2139  * Look up an entry in the blocked list by dip and component
2140  */
2141 static pm_rsvp_t *
2142 pm_rsvp_lookup(dev_info_t *dip, int comp)
2143 {
2144 	pm_rsvp_t *p;
2145 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2146 	for (p = pm_blocked_list; p; p = p->pr_next)
2147 		if (p->pr_dip == dip && p->pr_comp == comp) {
2148 			return (p);
2149 		}
2150 	return (NULL);
2151 }
2152 
2153 /*
2154  * Called when a device which is direct power managed (or the parent or
2155  * dependent of such a device) changes power, or when a pm clone is closed
2156  * that was direct power managing a device.  This call results in pm_blocked()
2157  * (below) returning.
2158  */
2159 void
2160 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2161 {
2162 	PMD_FUNC(pmf, "proceed")
2163 	pm_rsvp_t *found = NULL;
2164 	pm_rsvp_t *p;
2165 
2166 	mutex_enter(&pm_rsvp_lock);
2167 	switch (cmd) {
2168 	/*
2169 	 * we're giving up control, let any pending op continue
2170 	 */
2171 	case PMP_RELEASE:
2172 		for (p = pm_blocked_list; p; p = p->pr_next) {
2173 			if (dip == p->pr_dip) {
2174 				p->pr_retval = PMP_RELEASE;
2175 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2176 				    pmf, PM_DEVICE(dip)))
2177 				cv_signal(&p->pr_cv);
2178 			}
2179 		}
2180 		break;
2181 
2182 	/*
2183 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2184 	 * succeed and a non-matching request for the same device fail
2185 	 */
2186 	case PMP_SETPOWER:
2187 		found = pm_rsvp_lookup(dip, comp);
2188 		if (!found)	/* if driver not waiting */
2189 			break;
2190 		/*
2191 		 * This cannot be pm_lower_power, since that can only happen
2192 		 * during detach or probe
2193 		 */
2194 		if (found->pr_newlevel <= newlevel) {
2195 			found->pr_retval = PMP_SUCCEED;
2196 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2197 			    PM_DEVICE(dip)))
2198 		} else {
2199 			found->pr_retval = PMP_FAIL;
2200 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2201 			    PM_DEVICE(dip)))
2202 		}
2203 		cv_signal(&found->pr_cv);
2204 		break;
2205 
2206 	default:
2207 		panic("pm_proceed unknown cmd %d", cmd);
2208 	}
2209 	mutex_exit(&pm_rsvp_lock);
2210 }
2211 
2212 /*
2213  * This routine dispatches new work to the dependency thread. Caller must
2214  * be prepared to block for memory if necessary.
2215  */
2216 void
2217 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2218     int *res, int cached_pwr)
2219 {
2220 	pm_dep_wk_t	*new_work;
2221 
2222 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2223 	new_work->pdw_type = cmd;
2224 	new_work->pdw_wait = wait;
2225 	new_work->pdw_done = 0;
2226 	new_work->pdw_ret = 0;
2227 	new_work->pdw_pwr = cached_pwr;
2228 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2229 	if (keeper != NULL) {
2230 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2231 		    KM_SLEEP);
2232 		(void) strcpy(new_work->pdw_keeper, keeper);
2233 	}
2234 	if (kept != NULL) {
2235 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2236 		(void) strcpy(new_work->pdw_kept, kept);
2237 	}
2238 	mutex_enter(&pm_dep_thread_lock);
2239 	if (pm_dep_thread_workq == NULL) {
2240 		pm_dep_thread_workq = new_work;
2241 		pm_dep_thread_tail = new_work;
2242 		new_work->pdw_next = NULL;
2243 	} else {
2244 		pm_dep_thread_tail->pdw_next = new_work;
2245 		pm_dep_thread_tail = new_work;
2246 		new_work->pdw_next = NULL;
2247 	}
2248 	cv_signal(&pm_dep_thread_cv);
2249 	/* If caller asked for it, wait till it is done. */
2250 	if (wait)  {
2251 		while (!new_work->pdw_done)
2252 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2253 		/*
2254 		 * Pass return status, if any, back.
2255 		 */
2256 		if (res != NULL)
2257 			*res = new_work->pdw_ret;
2258 		/*
2259 		 * If we asked to wait, it is our job to free the request
2260 		 * structure.
2261 		 */
2262 		if (new_work->pdw_keeper)
2263 			kmem_free(new_work->pdw_keeper,
2264 			    strlen(new_work->pdw_keeper) + 1);
2265 		if (new_work->pdw_kept)
2266 			kmem_free(new_work->pdw_kept,
2267 			    strlen(new_work->pdw_kept) + 1);
2268 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2269 	}
2270 	mutex_exit(&pm_dep_thread_lock);
2271 }
2272 
2273 /*
2274  * Release the pm resource for this device.
2275  */
2276 void
2277 pm_rem_info(dev_info_t *dip)
2278 {
2279 	PMD_FUNC(pmf, "rem_info")
2280 	int		i, count = 0;
2281 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2282 	dev_info_t	*pdip = ddi_get_parent(dip);
2283 	char		*pathbuf;
2284 	int		work_type = PM_DEP_WK_DETACH;
2285 
2286 	ASSERT(info);
2287 
2288 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2289 	if (PM_ISDIRECT(dip)) {
2290 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2291 		ASSERT(info->pmi_clone);
2292 		info->pmi_clone = 0;
2293 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2294 	}
2295 	ASSERT(!PM_GET_PM_SCAN(dip));
2296 
2297 	/*
2298 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2299 	 * Others we check all components.  BC node that has already
2300 	 * called pm_destroy_components() has zero component count.
2301 	 * Parents that get notification are not adjusted because their
2302 	 * kidsupcnt is always 0 (or 1 during configuration).
2303 	 */
2304 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2305 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2306 
2307 	/* node is detached, so we can examine power without locking */
2308 	if (PM_ISBC(dip)) {
2309 		count = (PM_CURPOWER(dip, 0) != 0);
2310 	} else {
2311 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2312 			count += (PM_CURPOWER(dip, i) != 0);
2313 	}
2314 
2315 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2316 		e_pm_hold_rele_power(pdip, -count);
2317 
2318 	/* Schedule a request to clean up dependency records */
2319 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2320 	(void) ddi_pathname(dip, pathbuf);
2321 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2322 	    PM_DEP_NOWAIT, NULL, (count > 0));
2323 	kmem_free(pathbuf, MAXPATHLEN);
2324 
2325 	/*
2326 	 * Adjust the pm_comps_notlowest count since this device is
2327 	 * not being power-managed anymore.
2328 	 */
2329 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2330 		if (PM_CURPOWER(dip, i) != 0)
2331 			PM_DECR_NOTLOWEST(dip);
2332 	}
2333 	/*
2334 	 * Once we clear the info pointer, it looks like it is not power
2335 	 * managed to everybody else.
2336 	 */
2337 	pm_set_pm_info(dip, NULL);
2338 	kmem_free(info, sizeof (pm_info_t));
2339 }
2340 
2341 int
2342 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2343 {
2344 	int components = PM_NUMCMPTS(dip);
2345 	int *bufp;
2346 	size_t size;
2347 	int i;
2348 
2349 	if (components <= 0) {
2350 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2351 		    "can't get normal power values\n", PM_DEVICE(dip));
2352 		return (DDI_FAILURE);
2353 	} else {
2354 		size = components * sizeof (int);
2355 		bufp = kmem_alloc(size, KM_SLEEP);
2356 		for (i = 0; i < components; i++) {
2357 			bufp[i] = pm_get_normal_power(dip, i);
2358 		}
2359 	}
2360 	*length = size;
2361 	*valuep = bufp;
2362 	return (DDI_SUCCESS);
2363 }
2364 
2365 static int
2366 pm_reset_timestamps(dev_info_t *dip, void *arg)
2367 {
2368 	_NOTE(ARGUNUSED(arg))
2369 
2370 	int components;
2371 	int	i;
2372 
2373 	if (!PM_GET_PM_INFO(dip))
2374 		return (DDI_WALK_CONTINUE);
2375 	components = PM_NUMCMPTS(dip);
2376 	ASSERT(components > 0);
2377 	PM_LOCK_BUSY(dip);
2378 	for (i = 0; i < components; i++) {
2379 		struct pm_component *cp;
2380 		/*
2381 		 * If the component was not marked as busy,
2382 		 * reset its timestamp to now.
2383 		 */
2384 		cp = PM_CP(dip, i);
2385 		if (cp->pmc_timestamp)
2386 			cp->pmc_timestamp = gethrestime_sec();
2387 	}
2388 	PM_UNLOCK_BUSY(dip);
2389 	return (DDI_WALK_CONTINUE);
2390 }
2391 
2392 /*
2393  * Convert a power level to an index into the levels array (or
2394  * just PM_LEVEL_UNKNOWN in that special case).
2395  */
2396 static int
2397 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2398 {
2399 	PMD_FUNC(pmf, "level_to_index")
2400 	int i;
2401 	int limit = cp->pmc_comp.pmc_numlevels;
2402 	int *ip = cp->pmc_comp.pmc_lvals;
2403 
2404 	if (level == PM_LEVEL_UNKNOWN)
2405 		return (level);
2406 
2407 	for (i = 0; i < limit; i++) {
2408 		if (level == *ip++) {
2409 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2410 			    pmf, PM_DEVICE(dip),
2411 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2412 			return (i);
2413 		}
2414 	}
2415 	panic("pm_level_to_index: level %d not found for device "
2416 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2417 	/*NOTREACHED*/
2418 }
2419 
2420 /*
2421  * Internal function to set current power level
2422  */
2423 static void
2424 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2425 {
2426 	PMD_FUNC(pmf, "set_cur_pwr")
2427 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2428 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2429 
2430 	/*
2431 	 * Nothing to adjust if current & new levels are the same.
2432 	 */
2433 	if (curpwr != PM_LEVEL_UNKNOWN &&
2434 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2435 		return;
2436 
2437 	/*
2438 	 * Keep the count for comps doing transition to/from lowest
2439 	 * level.
2440 	 */
2441 	if (curpwr == 0) {
2442 		PM_INCR_NOTLOWEST(dip);
2443 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2444 		PM_DECR_NOTLOWEST(dip);
2445 	}
2446 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2447 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2448 }
2449 
2450 /*
2451  * This is the default method of setting the power of a device if no ppm
2452  * driver has claimed it.
2453  */
2454 int
2455 pm_power(dev_info_t *dip, int comp, int level)
2456 {
2457 	PMD_FUNC(pmf, "power")
2458 	struct dev_ops	*ops;
2459 	int		(*fn)(dev_info_t *, int, int);
2460 	struct pm_component *cp = PM_CP(dip, comp);
2461 	int retval;
2462 	pm_info_t *info = PM_GET_PM_INFO(dip);
2463 	static int pm_phc_impl(dev_info_t *, int, int, int);
2464 
2465 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2466 	    PM_DEVICE(dip), comp, level))
2467 	if (!(ops = ddi_get_driver(dip))) {
2468 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2469 		    PM_DEVICE(dip)))
2470 		return (DDI_FAILURE);
2471 	}
2472 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2473 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2474 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2475 		    (!fn ? " devo_power NULL" : "")))
2476 		return (DDI_FAILURE);
2477 	}
2478 	cp->pmc_flags |= PM_POWER_OP;
2479 	retval = (*fn)(dip, comp, level);
2480 	cp->pmc_flags &= ~PM_POWER_OP;
2481 	if (retval == DDI_SUCCESS) {
2482 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2483 		return (DDI_SUCCESS);
2484 	}
2485 
2486 	/*
2487 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2488 	 * updated only the power level of the component.  If our attempt to
2489 	 * set the device new to a power level above has failed we sync the
2490 	 * total power state via phc code now.
2491 	 */
2492 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2493 		int phc_lvl =
2494 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2495 
2496 		ASSERT(info);
2497 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2498 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2499 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2500 	}
2501 
2502 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2503 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2504 	    level, power_val_to_string(cp, level)));
2505 	return (DDI_FAILURE);
2506 }
2507 
2508 int
2509 pm_unmanage(dev_info_t *dip)
2510 {
2511 	PMD_FUNC(pmf, "unmanage")
2512 	power_req_t power_req;
2513 	int result, retval = 0;
2514 
2515 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2516 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2517 	    PM_DEVICE(dip)))
2518 	power_req.request_type = PMR_PPM_UNMANAGE;
2519 	power_req.req.ppm_config_req.who = dip;
2520 	if (pm_ppm_claimed(dip))
2521 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2522 		    &power_req, &result);
2523 #ifdef DEBUG
2524 	else
2525 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2526 		    &power_req, &result);
2527 #endif
2528 	ASSERT(retval == DDI_SUCCESS);
2529 	pm_rem_info(dip);
2530 	return (retval);
2531 }
2532 
2533 int
2534 pm_raise_power(dev_info_t *dip, int comp, int level)
2535 {
2536 	if (level < 0)
2537 		return (DDI_FAILURE);
2538 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2539 	    !e_pm_valid_power(dip, comp, level))
2540 		return (DDI_FAILURE);
2541 
2542 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2543 }
2544 
2545 int
2546 pm_lower_power(dev_info_t *dip, int comp, int level)
2547 {
2548 	PMD_FUNC(pmf, "pm_lower_power")
2549 
2550 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2551 	    !e_pm_valid_power(dip, comp, level)) {
2552 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2553 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2554 		return (DDI_FAILURE);
2555 	}
2556 
2557 	if (!DEVI_IS_DETACHING(dip)) {
2558 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2559 		    pmf, PM_DEVICE(dip)))
2560 		return (DDI_FAILURE);
2561 	}
2562 
2563 	/*
2564 	 * If we don't care about saving power, or we're treating this node
2565 	 * specially, then this is a no-op
2566 	 */
2567 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2568 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2569 		    pmf, PM_DEVICE(dip),
2570 		    !autopm_enabled ? "!autopm_enabled " : "",
2571 		    !PM_CPUPM_ENABLED ? "!cpupm_enabled " : "",
2572 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2573 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2574 		return (DDI_SUCCESS);
2575 	}
2576 
2577 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2578 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2579 		    PM_DEVICE(dip)))
2580 		return (DDI_FAILURE);
2581 	}
2582 	return (DDI_SUCCESS);
2583 }
2584 
2585 /*
2586  * Find the entries struct for a given dip in the blocked list, return it locked
2587  */
2588 static psce_t *
2589 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2590 {
2591 	pscc_t *p;
2592 	psce_t *psce;
2593 
2594 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2595 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2596 		if (p->pscc_dip == dip) {
2597 			*psccp = p;
2598 			psce = p->pscc_entries;
2599 			mutex_enter(&psce->psce_lock);
2600 			ASSERT(psce);
2601 			rw_exit(&pm_pscc_direct_rwlock);
2602 			return (psce);
2603 		}
2604 	}
2605 	rw_exit(&pm_pscc_direct_rwlock);
2606 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2607 	/*NOTREACHED*/
2608 }
2609 
2610 /*
2611  * Write an entry indicating a power level change (to be passed to a process
2612  * later) in the given psce.
2613  * If we were called in the path that brings up the console fb in the
2614  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2615  * we create a record that has a size of -1, a physaddr of NULL, and that
2616  * has the overflow flag set.
2617  */
2618 static int
2619 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2620     int old, int which, pm_canblock_t canblock)
2621 {
2622 	char	buf[MAXNAMELEN];
2623 	pm_state_change_t *p;
2624 	size_t	size;
2625 	caddr_t physpath = NULL;
2626 	int	overrun = 0;
2627 
2628 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2629 	(void) ddi_pathname(dip, buf);
2630 	size = strlen(buf) + 1;
2631 	p = psce->psce_in;
2632 	if (canblock == PM_CANBLOCK_BYPASS) {
2633 		physpath = kmem_alloc(size, KM_NOSLEEP);
2634 		if (physpath == NULL) {
2635 			/*
2636 			 * mark current entry as overrun
2637 			 */
2638 			p->flags |= PSC_EVENT_LOST;
2639 			size = (size_t)-1;
2640 		}
2641 	} else
2642 		physpath = kmem_alloc(size, KM_SLEEP);
2643 	if (p->size) {	/* overflow; mark the next entry */
2644 		if (p->size != (size_t)-1)
2645 			kmem_free(p->physpath, p->size);
2646 		ASSERT(psce->psce_out == p);
2647 		if (p == psce->psce_last) {
2648 			psce->psce_first->flags |= PSC_EVENT_LOST;
2649 			psce->psce_out = psce->psce_first;
2650 		} else {
2651 			(p + 1)->flags |= PSC_EVENT_LOST;
2652 			psce->psce_out = (p + 1);
2653 		}
2654 		overrun++;
2655 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2656 		p->flags |= PSC_EVENT_LOST;
2657 		p->size = 0;
2658 		p->physpath = NULL;
2659 	}
2660 	if (which == PSC_INTEREST) {
2661 		mutex_enter(&pm_compcnt_lock);
2662 		if (pm_comps_notlowest == 0)
2663 			p->flags |= PSC_ALL_LOWEST;
2664 		else
2665 			p->flags &= ~PSC_ALL_LOWEST;
2666 		mutex_exit(&pm_compcnt_lock);
2667 	}
2668 	p->event = event;
2669 	p->timestamp = gethrestime_sec();
2670 	p->component = comp;
2671 	p->old_level = old;
2672 	p->new_level = new;
2673 	p->physpath = physpath;
2674 	p->size = size;
2675 	if (physpath != NULL)
2676 		(void) strcpy(p->physpath, buf);
2677 	if (p == psce->psce_last)
2678 		psce->psce_in = psce->psce_first;
2679 	else
2680 		psce->psce_in = ++p;
2681 	mutex_exit(&psce->psce_lock);
2682 	return (overrun);
2683 }
2684 
2685 /*
2686  * Find the next entry on the interest list.  We keep a pointer to the item we
2687  * last returned in the user's cooke.  Returns a locked entries struct.
2688  */
2689 static psce_t *
2690 psc_interest(void **cookie, pscc_t **psccp)
2691 {
2692 	pscc_t *pscc;
2693 	pscc_t **cookiep = (pscc_t **)cookie;
2694 
2695 	if (*cookiep == NULL)
2696 		pscc = pm_pscc_interest;
2697 	else
2698 		pscc = (*cookiep)->pscc_next;
2699 	if (pscc) {
2700 		*cookiep = pscc;
2701 		*psccp = pscc;
2702 		mutex_enter(&pscc->pscc_entries->psce_lock);
2703 		return (pscc->pscc_entries);
2704 	} else {
2705 		return (NULL);
2706 	}
2707 }
2708 
2709 /*
2710  * Create an entry for a process to pick up indicating a power level change.
2711  */
2712 static void
2713 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2714     int newlevel, int oldlevel, pm_canblock_t canblock)
2715 {
2716 	PMD_FUNC(pmf, "enqueue_notify")
2717 	pscc_t	*pscc;
2718 	psce_t	*psce;
2719 	void		*cookie = NULL;
2720 	int	overrun;
2721 
2722 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2723 	switch (cmd) {
2724 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2725 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2726 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2727 		psce = pm_psc_dip_to_direct(dip, &pscc);
2728 		ASSERT(psce);
2729 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2730 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2731 		    pm_poll_cnt[pscc->pscc_clone]))
2732 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2733 		    PSC_DIRECT, canblock);
2734 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2735 		mutex_enter(&pm_clone_lock);
2736 		if (!overrun)
2737 			pm_poll_cnt[pscc->pscc_clone]++;
2738 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2739 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2740 		mutex_exit(&pm_clone_lock);
2741 		break;
2742 	case PSC_HAS_CHANGED:
2743 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2744 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2745 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2746 			psce = pm_psc_dip_to_direct(dip, &pscc);
2747 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2748 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2749 			    pm_poll_cnt[pscc->pscc_clone]))
2750 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2751 			    oldlevel, PSC_DIRECT, canblock);
2752 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2753 			mutex_enter(&pm_clone_lock);
2754 			if (!overrun)
2755 				pm_poll_cnt[pscc->pscc_clone]++;
2756 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2757 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2758 			mutex_exit(&pm_clone_lock);
2759 		}
2760 		mutex_enter(&pm_clone_lock);
2761 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2762 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2763 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2764 			    oldlevel, PSC_INTEREST, canblock);
2765 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2766 		}
2767 		rw_exit(&pm_pscc_interest_rwlock);
2768 		mutex_exit(&pm_clone_lock);
2769 		break;
2770 #ifdef DEBUG
2771 	default:
2772 		ASSERT(0);
2773 #endif
2774 	}
2775 }
2776 
2777 static void
2778 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2779 {
2780 	if (listp) {
2781 		pm_ppm_devlist_t *p, *next = NULL;
2782 
2783 		for (p = *listp; p; p = next) {
2784 			next = p->ppd_next;
2785 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2786 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2787 			    canblock);
2788 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2789 		}
2790 		*listp = NULL;
2791 	}
2792 }
2793 
2794 /*
2795  * Try to get the power locks of the parent node and target (child)
2796  * node.  Return true if successful (with both locks held) or false
2797  * (with no locks held).
2798  */
2799 static int
2800 pm_try_parent_child_locks(dev_info_t *pdip,
2801     dev_info_t *dip, int *pcircp, int *circp)
2802 {
2803 	if (ndi_devi_tryenter(pdip, pcircp))
2804 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2805 			return (1);
2806 		} else {
2807 			ndi_devi_exit(pdip, *pcircp);
2808 		}
2809 	return (0);
2810 }
2811 
2812 /*
2813  * Determine if the power lock owner is blocked by current thread.
2814  * returns :
2815  * 	1 - If the thread owning the effective power lock (the first lock on
2816  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2817  *          a mutex held by the current thread.
2818  *
2819  *	0 - otherwise
2820  *
2821  * Note : This function is called by pm_power_has_changed to determine whether
2822  * it is executing in parallel with pm_set_power.
2823  */
2824 static int
2825 pm_blocked_by_us(dev_info_t *dip)
2826 {
2827 	power_req_t power_req;
2828 	kthread_t *owner;
2829 	int result;
2830 	kmutex_t *mp;
2831 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2832 
2833 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2834 	power_req.req.ppm_power_lock_owner_req.who = dip;
2835 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2836 	    DDI_SUCCESS) {
2837 		/*
2838 		 * It is assumed that if the device is claimed by ppm, ppm
2839 		 * will always implement this request type and it'll always
2840 		 * return success. We panic here, if it fails.
2841 		 */
2842 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2843 		    PM_DEVICE(dip));
2844 		/*NOTREACHED*/
2845 	}
2846 
2847 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2848 	    owner->t_state == TS_SLEEP &&
2849 	    owner->t_sobj_ops &&
2850 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2851 	    (mp = (kmutex_t *)owner->t_wchan) &&
2852 	    mutex_owner(mp) == curthread)
2853 		return (1);
2854 
2855 	return (0);
2856 }
2857 
2858 /*
2859  * Notify parent which wants to hear about a child's power changes.
2860  */
2861 static void
2862 pm_notify_parent(dev_info_t *dip,
2863     dev_info_t *pdip, int comp, int old_level, int level)
2864 {
2865 	pm_bp_has_changed_t bphc;
2866 	pm_sp_misc_t pspm;
2867 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2868 	int result = DDI_SUCCESS;
2869 
2870 	bphc.bphc_dip = dip;
2871 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2872 	bphc.bphc_comp = comp;
2873 	bphc.bphc_olevel = old_level;
2874 	bphc.bphc_nlevel = level;
2875 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2876 	pspm.pspm_scan = 0;
2877 	bphc.bphc_private = &pspm;
2878 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2879 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2880 	kmem_free(pathbuf, MAXPATHLEN);
2881 }
2882 
2883 /*
2884  * Check if we need to resume a BC device, and make the attach call as required.
2885  */
2886 static int
2887 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2888 {
2889 	int ret = DDI_SUCCESS;
2890 
2891 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2892 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2893 		/* ppm is not interested in DDI_PM_RESUME */
2894 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2895 			/* XXX Should we mark it resumed, */
2896 			/* even though it failed? */
2897 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2898 			    PM_NAME(dip), PM_ADDR(dip));
2899 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2900 	}
2901 
2902 	return (ret);
2903 }
2904 
2905 /*
2906  * Tests outside the lock to see if we should bother to enqueue an entry
2907  * for any watching process.  If yes, then caller will take the lock and
2908  * do the full protocol
2909  */
2910 static int
2911 pm_watchers()
2912 {
2913 	if (pm_processes_stopped)
2914 		return (0);
2915 	return (pm_pscc_direct || pm_pscc_interest);
2916 }
2917 
2918 /*
2919  * A driver is reporting that the power of one of its device's components
2920  * has changed.  Update the power state accordingly.
2921  */
2922 int
2923 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2924 {
2925 	PMD_FUNC(pmf, "pm_power_has_changed")
2926 	int ret;
2927 	dev_info_t *pdip = ddi_get_parent(dip);
2928 	struct pm_component *cp;
2929 	int blocked, circ, pcirc, old_level;
2930 	static int pm_phc_impl(dev_info_t *, int, int, int);
2931 
2932 	if (level < 0) {
2933 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2934 		    PM_DEVICE(dip), level))
2935 		return (DDI_FAILURE);
2936 	}
2937 
2938 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2939 	    PM_DEVICE(dip), comp, level))
2940 
2941 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2942 	    !e_pm_valid_power(dip, comp, level))
2943 		return (DDI_FAILURE);
2944 
2945 	/*
2946 	 * A driver thread calling pm_power_has_changed and another thread
2947 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2948 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2949 	 * this specific deadlock.  If we can't get the lock immediately
2950 	 * and we are deadlocked, just update the component's level, do
2951 	 * notifications, and return.  We intend to update the total power
2952 	 * state later (if the other thread fails to set power to the
2953 	 * desired level).  If we were called because of a power change on a
2954 	 * component that isn't involved in a set_power op, update all state
2955 	 * immediately.
2956 	 */
2957 	cp = PM_CP(dip, comp);
2958 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2959 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2960 		    (cp->pmc_flags & PM_POWER_OP)) {
2961 			if (pm_watchers()) {
2962 				mutex_enter(&pm_rsvp_lock);
2963 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2964 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2965 				mutex_exit(&pm_rsvp_lock);
2966 			}
2967 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2968 				pm_notify_parent(dip,
2969 				    pdip, comp, cur_power(cp), level);
2970 			(void) pm_check_and_resume(dip,
2971 			    comp, cur_power(cp), level);
2972 
2973 			/*
2974 			 * Stash the old power index, update curpwr, and flag
2975 			 * that the total power state needs to be synched.
2976 			 */
2977 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2978 			/*
2979 			 * Several pm_power_has_changed calls could arrive
2980 			 * while the set power path remains blocked.  Keep the
2981 			 * oldest old power and the newest new power of any
2982 			 * sequence of phc calls which arrive during deadlock.
2983 			 */
2984 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2985 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2986 			cp->pmc_cur_pwr =
2987 			    pm_level_to_index(dip, cp, level);
2988 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2989 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2990 			return (DDI_SUCCESS);
2991 		} else
2992 			if (blocked) {	/* blocked, but different cmpt? */
2993 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2994 					cmn_err(CE_NOTE,
2995 					    "!pm: parent kuc not updated due "
2996 					    "to possible deadlock.\n");
2997 					return (pm_phc_impl(dip,
2998 					    comp, level, 1));
2999 				}
3000 				old_level = cur_power(cp);
3001 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3002 				    (!PM_ISBC(dip) || comp == 0) &&
3003 				    POWERING_ON(old_level, level))
3004 					pm_hold_power(pdip);
3005 				ret = pm_phc_impl(dip, comp, level, 1);
3006 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3007 					if ((!PM_ISBC(dip) ||
3008 					    comp == 0) && level == 0 &&
3009 					    old_level != PM_LEVEL_UNKNOWN)
3010 						pm_rele_power(pdip);
3011 				}
3012 				ndi_devi_exit(pdip, pcirc);
3013 				/* child lock not held: deadlock */
3014 				return (ret);
3015 			}
3016 		delay(1);
3017 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3018 	}
3019 
3020 	/* non-deadlock case */
3021 	old_level = cur_power(cp);
3022 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3023 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3024 		pm_hold_power(pdip);
3025 	ret = pm_phc_impl(dip, comp, level, 1);
3026 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3027 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3028 		    old_level != PM_LEVEL_UNKNOWN)
3029 			pm_rele_power(pdip);
3030 	}
3031 	PM_UNLOCK_POWER(dip, circ);
3032 	ndi_devi_exit(pdip, pcirc);
3033 	return (ret);
3034 }
3035 
3036 /*
3037  * Account for power changes to a component of the the console frame buffer.
3038  * If lowering power from full (or "unkown", which is treatd as full)
3039  * we will increment the "components off" count of the fb device.
3040  * Subsequent lowering of the same component doesn't affect the count.  If
3041  * raising a component back to full power, we will decrement the count.
3042  *
3043  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3044  */
3045 static int
3046 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3047 {
3048 	struct pm_component *cp = PM_CP(dip, cmpt);
3049 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3050 	int want_normal = (new == cp->pmc_norm_pwr);
3051 	int incr = 0;
3052 
3053 	if (on && !want_normal)
3054 		incr = 1;
3055 	else if (!on && want_normal)
3056 		incr = -1;
3057 	return (incr);
3058 }
3059 
3060 /*
3061  * Adjust the count of console frame buffer components < full power.
3062  */
3063 static void
3064 update_comps_off(int incr, dev_info_t *dip)
3065 {
3066 		mutex_enter(&pm_cfb_lock);
3067 		pm_cfb_comps_off += incr;
3068 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3069 		mutex_exit(&pm_cfb_lock);
3070 }
3071 
3072 /*
3073  * Update the power state in the framework (via the ppm).  The 'notify'
3074  * argument tells whether to notify watchers.  Power lock is already held.
3075  */
3076 static int
3077 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3078 {
3079 	PMD_FUNC(pmf, "phc_impl")
3080 	power_req_t power_req;
3081 	int i, dodeps = 0;
3082 	dev_info_t *pdip = ddi_get_parent(dip);
3083 	int result;
3084 	int old_level;
3085 	struct pm_component *cp;
3086 	int incr = 0;
3087 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3088 	int work_type = 0;
3089 	char *pathbuf;
3090 
3091 	/* Must use "official" power level for this test. */
3092 	cp = PM_CP(dip, comp);
3093 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3094 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3095 	if (old_level != PM_LEVEL_UNKNOWN)
3096 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3097 
3098 	if (level == old_level) {
3099 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3100 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3101 		return (DDI_SUCCESS);
3102 	}
3103 
3104 	/*
3105 	 * Tell ppm about this.
3106 	 */
3107 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3108 	power_req.req.ppm_notify_level_req.who = dip;
3109 	power_req.req.ppm_notify_level_req.cmpt = comp;
3110 	power_req.req.ppm_notify_level_req.new_level = level;
3111 	power_req.req.ppm_notify_level_req.old_level = old_level;
3112 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3113 	    &result) == DDI_FAILURE) {
3114 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3115 		    pmf, PM_DEVICE(dip), level))
3116 		return (DDI_FAILURE);
3117 	}
3118 
3119 	if (PM_IS_CFB(dip)) {
3120 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3121 
3122 		if (incr) {
3123 			update_comps_off(incr, dip);
3124 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3125 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3126 			    comp, old_level, level, pm_cfb_comps_off))
3127 		}
3128 	}
3129 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3130 	result = DDI_SUCCESS;
3131 
3132 	if (notify) {
3133 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3134 			pm_notify_parent(dip, pdip, comp, old_level, level);
3135 		(void) pm_check_and_resume(dip, comp, old_level, level);
3136 	}
3137 
3138 	/*
3139 	 * Decrement the dependency kidsup count if we turn a device
3140 	 * off.
3141 	 */
3142 	if (POWERING_OFF(old_level, level)) {
3143 		dodeps = 1;
3144 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3145 			cp = PM_CP(dip, i);
3146 			if (cur_power(cp)) {
3147 				dodeps = 0;
3148 				break;
3149 			}
3150 		}
3151 		if (dodeps)
3152 			work_type = PM_DEP_WK_POWER_OFF;
3153 	}
3154 
3155 	/*
3156 	 * Increment if we turn it on. Check to see
3157 	 * if other comps are already on, if so,
3158 	 * dont increment.
3159 	 */
3160 	if (POWERING_ON(old_level, level)) {
3161 		dodeps = 1;
3162 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3163 			cp = PM_CP(dip, i);
3164 			if (comp == i)
3165 				continue;
3166 			/* -1 also treated as 0 in this case */
3167 			if (cur_power(cp) > 0) {
3168 				dodeps = 0;
3169 				break;
3170 			}
3171 		}
3172 		if (dodeps)
3173 			work_type = PM_DEP_WK_POWER_ON;
3174 	}
3175 
3176 	if (dodeps) {
3177 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3178 		(void) ddi_pathname(dip, pathbuf);
3179 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3180 		    PM_DEP_NOWAIT, NULL, 0);
3181 		kmem_free(pathbuf, MAXPATHLEN);
3182 	}
3183 
3184 	if (notify && (level != old_level) && pm_watchers()) {
3185 		mutex_enter(&pm_rsvp_lock);
3186 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3187 		    PM_CANBLOCK_BLOCK);
3188 		mutex_exit(&pm_rsvp_lock);
3189 	}
3190 
3191 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3192 	pm_rescan(dip);
3193 	return (DDI_SUCCESS);
3194 }
3195 
3196 /*
3197  * This function is called at startup time to notify pm of the existence
3198  * of any platform power managers for this platform.  As a result of
3199  * this registration, each function provided will be called each time
3200  * a device node is attached, until one returns true, and it must claim the
3201  * device node (by returning non-zero) if it wants to be involved in the
3202  * node's power management.  If it does claim the node, then it will
3203  * subsequently be notified of attach and detach events.
3204  *
3205  */
3206 
3207 int
3208 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3209 {
3210 	PMD_FUNC(pmf, "register_ppm")
3211 	struct ppm_callbacks *ppmcp;
3212 	pm_component_t *cp;
3213 	int i, pwr, result, circ;
3214 	power_req_t power_req;
3215 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3216 	void pm_ppm_claim(dev_info_t *);
3217 
3218 	mutex_enter(&ppm_lock);
3219 	ppmcp = ppm_callbacks;
3220 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3221 		if (ppmcp->ppmc_func == NULL) {
3222 			ppmcp->ppmc_func = func;
3223 			ppmcp->ppmc_dip = dip;
3224 			break;
3225 		}
3226 	}
3227 	mutex_exit(&ppm_lock);
3228 
3229 	if (i >= MAX_PPM_HANDLERS)
3230 		return (DDI_FAILURE);
3231 	while ((dip = ddi_get_parent(dip)) != NULL) {
3232 		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3233 			continue;
3234 		pm_ppm_claim(dip);
3235 		/* don't bother with the not power-manageable nodes */
3236 		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3237 			/*
3238 			 * Tell ppm about this.
3239 			 */
3240 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3241 			p->old_level = PM_LEVEL_UNKNOWN;
3242 			p->who = dip;
3243 			PM_LOCK_POWER(dip, &circ);
3244 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3245 				cp = PM_CP(dip, i);
3246 				pwr = cp->pmc_cur_pwr;
3247 				if (pwr != PM_LEVEL_UNKNOWN) {
3248 					p->cmpt = i;
3249 					p->new_level = cur_power(cp);
3250 					p->old_level = PM_LEVEL_UNKNOWN;
3251 					if (pm_ctlops(PPM(dip), dip,
3252 					    DDI_CTLOPS_POWER, &power_req,
3253 					    &result) == DDI_FAILURE) {
3254 						PMD(PMD_FAIL, ("%s: pc "
3255 						    "%s@%s(%s#%d) to %d "
3256 						    "fails\n", pmf,
3257 						    PM_DEVICE(dip), pwr))
3258 					}
3259 				}
3260 			}
3261 			PM_UNLOCK_POWER(dip, circ);
3262 		}
3263 	}
3264 	return (DDI_SUCCESS);
3265 }
3266 
3267 /*
3268  * Call the ppm's that have registered and adjust the devinfo struct as
3269  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3270  * by each ppm are assumed to be disjoint.
3271  */
3272 void
3273 pm_ppm_claim(dev_info_t *dip)
3274 {
3275 	struct ppm_callbacks *ppmcp;
3276 
3277 	if (PPM(dip)) {
3278 		return;
3279 	}
3280 	mutex_enter(&ppm_lock);
3281 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3282 		if ((*ppmcp->ppmc_func)(dip)) {
3283 			DEVI(dip)->devi_pm_ppm =
3284 			    (struct dev_info *)ppmcp->ppmc_dip;
3285 			mutex_exit(&ppm_lock);
3286 			return;
3287 		}
3288 	}
3289 	mutex_exit(&ppm_lock);
3290 }
3291 
3292 /*
3293  * Node is being detached so stop autopm until we see if it succeeds, in which
3294  * case pm_stop will be called.  For backwards compatible devices we bring the
3295  * device up to full power on the assumption the detach will succeed.
3296  */
3297 void
3298 pm_detaching(dev_info_t *dip)
3299 {
3300 	PMD_FUNC(pmf, "detaching")
3301 	pm_info_t *info = PM_GET_PM_INFO(dip);
3302 	int iscons;
3303 
3304 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3305 	    PM_NUMCMPTS(dip)))
3306 	if (info == NULL)
3307 		return;
3308 	ASSERT(DEVI_IS_DETACHING(dip));
3309 	PM_LOCK_DIP(dip);
3310 	info->pmi_dev_pm_state |= PM_DETACHING;
3311 	PM_UNLOCK_DIP(dip);
3312 	if (!PM_ISBC(dip))
3313 		pm_scan_stop(dip);
3314 
3315 	/*
3316 	 * console and old-style devices get brought up when detaching.
3317 	 */
3318 	iscons = PM_IS_CFB(dip);
3319 	if (iscons || PM_ISBC(dip)) {
3320 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3321 		if (iscons) {
3322 			mutex_enter(&pm_cfb_lock);
3323 			while (cfb_inuse) {
3324 				mutex_exit(&pm_cfb_lock);
3325 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3326 				delay(1);
3327 				mutex_enter(&pm_cfb_lock);
3328 			}
3329 			ASSERT(cfb_dip_detaching == NULL);
3330 			ASSERT(cfb_dip);
3331 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3332 			cfb_dip = NULL;
3333 			mutex_exit(&pm_cfb_lock);
3334 		}
3335 	}
3336 }
3337 
3338 /*
3339  * Node failed to detach.  If it used to be autopm'd, make it so again.
3340  */
3341 void
3342 pm_detach_failed(dev_info_t *dip)
3343 {
3344 	PMD_FUNC(pmf, "detach_failed")
3345 	pm_info_t *info = PM_GET_PM_INFO(dip);
3346 	int pm_all_at_normal(dev_info_t *);
3347 
3348 	if (info == NULL)
3349 		return;
3350 	ASSERT(DEVI_IS_DETACHING(dip));
3351 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3352 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3353 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3354 			/* Make sure the operation is still needed */
3355 			if (!pm_all_at_normal(dip)) {
3356 				if (pm_all_to_normal(dip,
3357 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3358 					PMD(PMD_ERROR, ("%s: could not bring "
3359 					    "%s@%s(%s#%d) to normal\n", pmf,
3360 					    PM_DEVICE(dip)))
3361 				}
3362 			}
3363 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3364 		}
3365 	}
3366 	if (!PM_ISBC(dip)) {
3367 		mutex_enter(&pm_scan_lock);
3368 		if (PM_SCANABLE(dip))
3369 			pm_scan_init(dip);
3370 		mutex_exit(&pm_scan_lock);
3371 		pm_rescan(dip);
3372 	}
3373 }
3374 
3375 /* generic Backwards Compatible component */
3376 static char *bc_names[] = {"off", "on"};
3377 
3378 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3379 
3380 static void
3381 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3382 {
3383 	pm_comp_t *pmc;
3384 	pmc = &cp->pmc_comp;
3385 	pmc->pmc_numlevels = 2;
3386 	pmc->pmc_lvals[0] = 0;
3387 	pmc->pmc_lvals[1] = norm;
3388 	e_pm_set_cur_pwr(dip, cp, norm);
3389 }
3390 
3391 static void
3392 e_pm_default_components(dev_info_t *dip, int cmpts)
3393 {
3394 	int i;
3395 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3396 
3397 	p = DEVI(dip)->devi_pm_components;
3398 	for (i = 0; i < cmpts; i++, p++) {
3399 		p->pmc_comp = bc_comp;	/* struct assignment */
3400 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3401 		    KM_SLEEP);
3402 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3403 		    KM_SLEEP);
3404 		p->pmc_comp.pmc_numlevels = 2;
3405 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3406 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3407 	}
3408 }
3409 
3410 /*
3411  * Called from functions that require components to exist already to allow
3412  * for their creation by parsing the pm-components property.
3413  * Device will not be power managed as a result of this call
3414  * No locking needed because we're single threaded by the ndi_devi_enter
3415  * done while attaching, and the device isn't visible until after it has
3416  * attached
3417  */
3418 int
3419 pm_premanage(dev_info_t *dip, int style)
3420 {
3421 	PMD_FUNC(pmf, "premanage")
3422 	pm_comp_t	*pcp, *compp;
3423 	int		cmpts, i, norm, error;
3424 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3425 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3426 
3427 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3428 	/*
3429 	 * If this dip has already been processed, don't mess with it
3430 	 */
3431 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3432 		return (DDI_SUCCESS);
3433 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3434 		return (DDI_FAILURE);
3435 	}
3436 	/*
3437 	 * Look up pm-components property and create components accordingly
3438 	 * If that fails, fall back to backwards compatibility
3439 	 */
3440 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3441 		/*
3442 		 * If error is set, the property existed but was not well formed
3443 		 */
3444 		if (error || (style == PM_STYLE_NEW)) {
3445 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3446 			return (DDI_FAILURE);
3447 		}
3448 		/*
3449 		 * If they don't have the pm-components property, then we
3450 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3451 		 * behavior driver must have called pm_create_components, and
3452 		 * we need to flesh out dummy components
3453 		 */
3454 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3455 			/*
3456 			 * Not really failure, but we don't want the
3457 			 * caller to treat it as success
3458 			 */
3459 			return (DDI_FAILURE);
3460 		}
3461 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3462 		e_pm_default_components(dip, cmpts);
3463 		for (i = 0; i < cmpts; i++) {
3464 			/*
3465 			 * if normal power not set yet, we don't really know
3466 			 * what *ANY* of the power values are.  If normal
3467 			 * power is set, then we assume for this backwards
3468 			 * compatible case that the values are 0, normal power.
3469 			 */
3470 			norm = pm_get_normal_power(dip, i);
3471 			if (norm == (uint_t)-1) {
3472 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3473 				    PM_DEVICE(dip), i))
3474 				return (DDI_FAILURE);
3475 			}
3476 			/*
3477 			 * Components of BC devices start at their normal power,
3478 			 * so count them to be not at their lowest power.
3479 			 */
3480 			PM_INCR_NOTLOWEST(dip);
3481 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3482 		}
3483 	} else {
3484 		/*
3485 		 * e_pm_create_components was called from pm_autoconfig(), it
3486 		 * creates components with no descriptions (or known levels)
3487 		 */
3488 		cmpts = PM_NUMCMPTS(dip);
3489 		ASSERT(cmpts != 0);
3490 		pcp = compp;
3491 		p = DEVI(dip)->devi_pm_components;
3492 		for (i = 0; i < cmpts; i++, p++) {
3493 			p->pmc_comp = *pcp++;   /* struct assignment */
3494 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3495 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3496 		}
3497 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3498 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3499 			    PMC_CPU_THRESH);
3500 		else
3501 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3502 			    PMC_DEF_THRESH);
3503 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3504 	}
3505 	return (DDI_SUCCESS);
3506 }
3507 
3508 /*
3509  * Called from during or after the device's attach to let us know it is ready
3510  * to play autopm.   Look up the pm model and manage the device accordingly.
3511  * Returns system call errno value.
3512  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3513  * a little cleaner
3514  *
3515  * Called with dip lock held, return with dip lock unheld.
3516  */
3517 
3518 int
3519 e_pm_manage(dev_info_t *dip, int style)
3520 {
3521 	PMD_FUNC(pmf, "e_manage")
3522 	pm_info_t	*info;
3523 	dev_info_t	*pdip = ddi_get_parent(dip);
3524 	int	pm_thresh_specd(dev_info_t *);
3525 	int	count;
3526 	char	*pathbuf;
3527 
3528 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3529 		return (DDI_FAILURE);
3530 	}
3531 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3532 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3533 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3534 
3535 	/*
3536 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3537 	 * out at their normal power, so they are "up", others start out
3538 	 * unknown, which is effectively "up".  Parent which want notification
3539 	 * get kidsupcnt of 0 always.
3540 	 */
3541 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3542 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3543 		e_pm_hold_rele_power(pdip, count);
3544 
3545 	pm_set_pm_info(dip, info);
3546 	/*
3547 	 * Apply any recorded thresholds
3548 	 */
3549 	(void) pm_thresh_specd(dip);
3550 
3551 	/*
3552 	 * Do dependency processing.
3553 	 */
3554 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3555 	(void) ddi_pathname(dip, pathbuf);
3556 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3557 	    PM_DEP_NOWAIT, NULL, 0);
3558 	kmem_free(pathbuf, MAXPATHLEN);
3559 
3560 	if (!PM_ISBC(dip)) {
3561 		mutex_enter(&pm_scan_lock);
3562 		if (PM_SCANABLE(dip)) {
3563 			pm_scan_init(dip);
3564 			mutex_exit(&pm_scan_lock);
3565 			pm_rescan(dip);
3566 		} else {
3567 			mutex_exit(&pm_scan_lock);
3568 		}
3569 	}
3570 	return (0);
3571 }
3572 
3573 /*
3574  * This is the obsolete exported interface for a driver to find out its
3575  * "normal" (max) power.
3576  * We only get components destroyed while no power management is
3577  * going on (and the device is detached), so we don't need a mutex here
3578  */
3579 int
3580 pm_get_normal_power(dev_info_t *dip, int comp)
3581 {
3582 
3583 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3584 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3585 	}
3586 	return (DDI_FAILURE);
3587 }
3588 
3589 /*
3590  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3591  */
3592 int
3593 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3594 {
3595 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3596 		*levelp = PM_CURPOWER(dip, comp);
3597 		return (DDI_SUCCESS);
3598 	}
3599 	return (DDI_FAILURE);
3600 }
3601 
3602 /*
3603  * Returns current threshold of indicated component
3604  */
3605 static int
3606 cur_threshold(dev_info_t *dip, int comp)
3607 {
3608 	pm_component_t *cp = PM_CP(dip, comp);
3609 	int pwr;
3610 
3611 	if (PM_ISBC(dip)) {
3612 		/*
3613 		 * backwards compatible nodes only have one threshold
3614 		 */
3615 		return (cp->pmc_comp.pmc_thresh[1]);
3616 	}
3617 	pwr = cp->pmc_cur_pwr;
3618 	if (pwr == PM_LEVEL_UNKNOWN) {
3619 		int thresh;
3620 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3621 			thresh = pm_default_nexus_threshold;
3622 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3623 			thresh = pm_cpu_idle_threshold;
3624 		else
3625 			thresh = pm_system_idle_threshold;
3626 		return (thresh);
3627 	}
3628 	ASSERT(cp->pmc_comp.pmc_thresh);
3629 	return (cp->pmc_comp.pmc_thresh[pwr]);
3630 }
3631 
3632 /*
3633  * Compute next lower component power level given power index.
3634  */
3635 static int
3636 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3637 {
3638 	int nxt_pwr;
3639 
3640 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3641 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3642 	} else {
3643 		pwrndx--;
3644 		ASSERT(pwrndx >= 0);
3645 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3646 	}
3647 	return (nxt_pwr);
3648 }
3649 
3650 /*
3651  * Update the maxpower (normal) power of a component. Note that the
3652  * component's power level is only changed if it's current power level
3653  * is higher than the new max power.
3654  */
3655 int
3656 pm_update_maxpower(dev_info_t *dip, int comp, int level)
3657 {
3658 	PMD_FUNC(pmf, "update_maxpower")
3659 	int old;
3660 	int result;
3661 
3662 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3663 	    !e_pm_valid_power(dip, comp, level)) {
3664 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3665 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3666 		return (DDI_FAILURE);
3667 	}
3668 	old = e_pm_get_max_power(dip, comp);
3669 	e_pm_set_max_power(dip, comp, level);
3670 
3671 	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3672 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3673 		e_pm_set_max_power(dip, comp, old);
3674 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3675 		    PM_DEVICE(dip)))
3676 		return (DDI_FAILURE);
3677 	}
3678 	return (DDI_SUCCESS);
3679 }
3680 
3681 /*
3682  * Bring all components of device to normal power
3683  */
3684 int
3685 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3686 {
3687 	PMD_FUNC(pmf, "all_to_normal")
3688 	int		*normal;
3689 	int		i, ncomps, result;
3690 	size_t		size;
3691 	int		changefailed = 0;
3692 
3693 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3694 	ASSERT(PM_GET_PM_INFO(dip));
3695 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3696 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3697 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3698 		return (DDI_FAILURE);
3699 	}
3700 	ncomps = PM_NUMCMPTS(dip);
3701 	for (i = 0; i < ncomps; i++) {
3702 		if (pm_set_power(dip, i, normal[i],
3703 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3704 			changefailed++;
3705 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3706 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3707 			    PM_DEVICE(dip), i, normal[i], result))
3708 		}
3709 	}
3710 	kmem_free(normal, size);
3711 	if (changefailed) {
3712 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3713 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3714 		return (DDI_FAILURE);
3715 	}
3716 	return (DDI_SUCCESS);
3717 }
3718 
3719 /*
3720  * Returns true if all components of device are at normal power
3721  */
3722 int
3723 pm_all_at_normal(dev_info_t *dip)
3724 {
3725 	PMD_FUNC(pmf, "all_at_normal")
3726 	int		*normal;
3727 	int		i;
3728 	size_t		size;
3729 
3730 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3731 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3732 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3733 		return (DDI_FAILURE);
3734 	}
3735 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3736 		int current = PM_CURPOWER(dip, i);
3737 		if (normal[i] > current) {
3738 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3739 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3740 			    normal[i], current))
3741 			break;
3742 		}
3743 	}
3744 	kmem_free(normal, size);
3745 	if (i != PM_NUMCMPTS(dip)) {
3746 		return (0);
3747 	}
3748 	return (1);
3749 }
3750 
3751 static void
3752 bring_wekeeps_up(char *keeper)
3753 {
3754 	PMD_FUNC(pmf, "bring_wekeeps_up")
3755 	int i;
3756 	pm_pdr_t *dp;
3757 	pm_info_t *wku_info;
3758 	char *kept_path;
3759 	dev_info_t *kept;
3760 	static void bring_pmdep_up(dev_info_t *, int);
3761 
3762 	if (panicstr) {
3763 		return;
3764 	}
3765 	/*
3766 	 * We process the request even if the keeper detaches because
3767 	 * detach processing expects this to increment kidsupcnt of kept.
3768 	 */
3769 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3770 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3771 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3772 			continue;
3773 		for (i = 0; i < dp->pdr_kept_count; i++) {
3774 			kept_path = dp->pdr_kept_paths[i];
3775 			if (kept_path == NULL)
3776 				continue;
3777 			ASSERT(kept_path[0] != '\0');
3778 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3779 				continue;
3780 			wku_info = PM_GET_PM_INFO(kept);
3781 			if (wku_info == NULL) {
3782 				if (kept)
3783 					ddi_release_devi(kept);
3784 				continue;
3785 			}
3786 			/*
3787 			 * Don't mess with it if it is being detached, it isn't
3788 			 * safe to call its power entry point
3789 			 */
3790 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3791 				if (kept)
3792 					ddi_release_devi(kept);
3793 				continue;
3794 			}
3795 			bring_pmdep_up(kept, 1);
3796 			ddi_release_devi(kept);
3797 		}
3798 	}
3799 }
3800 
3801 /*
3802  * Bring up the 'kept' device passed as argument
3803  */
3804 static void
3805 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3806 {
3807 	PMD_FUNC(pmf, "bring_pmdep_up")
3808 	int is_all_at_normal = 0;
3809 
3810 	/*
3811 	 * If the kept device has been unmanaged, do nothing.
3812 	 */
3813 	if (!PM_GET_PM_INFO(kept_dip))
3814 		return;
3815 
3816 	/* Just ignore DIRECT PM device till they are released. */
3817 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3818 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3819 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3820 		    "controlling process did something else\n", pmf,
3821 		    PM_DEVICE(kept_dip)))
3822 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3823 		return;
3824 	}
3825 	/* if we got here the keeper had a transition from OFF->ON */
3826 	if (hold)
3827 		pm_hold_power(kept_dip);
3828 
3829 	if (!is_all_at_normal)
3830 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3831 }
3832 
3833 /*
3834  * A bunch of stuff that belongs only to the next routine (or two)
3835  */
3836 
3837 static const char namestr[] = "NAME=";
3838 static const int nameln = sizeof (namestr) - 1;
3839 static const char pmcompstr[] = "pm-components";
3840 
3841 struct pm_comp_pkg {
3842 	pm_comp_t		*comp;
3843 	struct pm_comp_pkg	*next;
3844 };
3845 
3846 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3847 
3848 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3849 			((ch) >= 'A' && (ch) <= 'F'))
3850 
3851 /*
3852  * Rather than duplicate this code ...
3853  * (this code excerpted from the function that follows it)
3854  */
3855 #define	FINISH_COMP { \
3856 	ASSERT(compp); \
3857 	compp->pmc_lnames_sz = size; \
3858 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3859 	compp->pmc_numlevels = level; \
3860 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3861 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3862 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3863 	/* copy string out of prop array into buffer */ \
3864 	for (j = 0; j < level; j++) { \
3865 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3866 		compp->pmc_lvals[j] = lvals[j]; \
3867 		(void) strcpy(tp, lnames[j]); \
3868 		compp->pmc_lnames[j] = tp; \
3869 		tp += lszs[j]; \
3870 	} \
3871 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3872 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3873 	}
3874 
3875 /*
3876  * Create (empty) component data structures.
3877  */
3878 static void
3879 e_pm_create_components(dev_info_t *dip, int num_components)
3880 {
3881 	struct pm_component *compp, *ocompp;
3882 	int i, size = 0;
3883 
3884 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3885 	ASSERT(!DEVI(dip)->devi_pm_components);
3886 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3887 	size = sizeof (struct pm_component) * num_components;
3888 
3889 	compp = kmem_zalloc(size, KM_SLEEP);
3890 	ocompp = compp;
3891 	DEVI(dip)->devi_pm_comp_size = size;
3892 	DEVI(dip)->devi_pm_num_components = num_components;
3893 	PM_LOCK_BUSY(dip);
3894 	for (i = 0; i < num_components;  i++) {
3895 		compp->pmc_timestamp = gethrestime_sec();
3896 		compp->pmc_norm_pwr = (uint_t)-1;
3897 		compp++;
3898 	}
3899 	PM_UNLOCK_BUSY(dip);
3900 	DEVI(dip)->devi_pm_components = ocompp;
3901 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3902 }
3903 
3904 /*
3905  * Parse hex or decimal value from char string
3906  */
3907 static char *
3908 pm_parsenum(char *cp, int *valp)
3909 {
3910 	int ch, offset;
3911 	char numbuf[256];
3912 	char *np = numbuf;
3913 	int value = 0;
3914 
3915 	ch = *cp++;
3916 	if (isdigit(ch)) {
3917 		if (ch == '0') {
3918 			if ((ch = *cp++) == 'x' || ch == 'X') {
3919 				ch = *cp++;
3920 				while (isxdigit(ch)) {
3921 					*np++ = (char)ch;
3922 					ch = *cp++;
3923 				}
3924 				*np = 0;
3925 				cp--;
3926 				goto hexval;
3927 			} else {
3928 				goto digit;
3929 			}
3930 		} else {
3931 digit:
3932 			while (isdigit(ch)) {
3933 				*np++ = (char)ch;
3934 				ch = *cp++;
3935 			}
3936 			*np = 0;
3937 			cp--;
3938 			goto decval;
3939 		}
3940 	} else
3941 		return (NULL);
3942 
3943 hexval:
3944 	for (np = numbuf; *np; np++) {
3945 		if (*np >= 'a' && *np <= 'f')
3946 			offset = 'a' - 10;
3947 		else if (*np >= 'A' && *np <= 'F')
3948 			offset = 'A' - 10;
3949 		else if (*np >= '0' && *np <= '9')
3950 			offset = '0';
3951 		value *= 16;
3952 		value += *np - offset;
3953 	}
3954 	*valp = value;
3955 	return (cp);
3956 
3957 decval:
3958 	offset = '0';
3959 	for (np = numbuf; *np; np++) {
3960 		value *= 10;
3961 		value += *np - offset;
3962 	}
3963 	*valp = value;
3964 	return (cp);
3965 }
3966 
3967 /*
3968  * Set max (previously documented as "normal") power.
3969  */
3970 static void
3971 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3972 {
3973 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3974 }
3975 
3976 /*
3977  * Get max (previously documented as "normal") power.
3978  */
3979 static int
3980 e_pm_get_max_power(dev_info_t *dip, int component_number)
3981 {
3982 	return (PM_CP(dip, component_number)->pmc_norm_pwr);
3983 }
3984 
3985 /*
3986  * Internal routine for destroying components
3987  * It is called even when there might not be any, so it must be forgiving.
3988  */
3989 static void
3990 e_pm_destroy_components(dev_info_t *dip)
3991 {
3992 	int i;
3993 	struct pm_component *cp;
3994 
3995 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3996 	if (PM_NUMCMPTS(dip) == 0)
3997 		return;
3998 	cp = DEVI(dip)->devi_pm_components;
3999 	ASSERT(cp);
4000 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
4001 		int nlevels = cp->pmc_comp.pmc_numlevels;
4002 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
4003 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
4004 		/*
4005 		 * For BC nodes, the rest is static in bc_comp, so skip it
4006 		 */
4007 		if (PM_ISBC(dip))
4008 			continue;
4009 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
4010 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
4011 		kmem_free(cp->pmc_comp.pmc_lname_buf,
4012 		    cp->pmc_comp.pmc_lnames_sz);
4013 	}
4014 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
4015 	DEVI(dip)->devi_pm_components = NULL;
4016 	DEVI(dip)->devi_pm_num_components = 0;
4017 	DEVI(dip)->devi_pm_flags &=
4018 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4019 }
4020 
4021 /*
4022  * Read the pm-components property (if there is one) and use it to set up
4023  * components.  Returns a pointer to an array of component structures if
4024  * pm-components found and successfully parsed, else returns NULL.
4025  * Sets error return *errp to true to indicate a failure (as opposed to no
4026  * property being present).
4027  */
4028 pm_comp_t *
4029 pm_autoconfig(dev_info_t *dip, int *errp)
4030 {
4031 	PMD_FUNC(pmf, "autoconfig")
4032 	uint_t nelems;
4033 	char **pp;
4034 	pm_comp_t *compp = NULL;
4035 	int i, j, level, components = 0;
4036 	size_t size = 0;
4037 	struct pm_comp_pkg *p, *ptail;
4038 	struct pm_comp_pkg *phead = NULL;
4039 	int *lvals = NULL;
4040 	int *lszs = NULL;
4041 	int *np = NULL;
4042 	int npi = 0;
4043 	char **lnames = NULL;
4044 	char *cp, *tp;
4045 	pm_comp_t *ret = NULL;
4046 
4047 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4048 	*errp = 0;	/* assume success */
4049 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4050 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
4051 		return (NULL);
4052 	}
4053 
4054 	if (nelems < 3) {	/* need at least one name and two levels */
4055 		goto errout;
4056 	}
4057 
4058 	/*
4059 	 * pm_create_components is no longer allowed
4060 	 */
4061 	if (PM_NUMCMPTS(dip) != 0) {
4062 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
4063 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4064 		goto errout;
4065 	}
4066 
4067 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4068 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4069 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4070 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4071 
4072 	level = 0;
4073 	phead = NULL;
4074 	for (i = 0; i < nelems; i++) {
4075 		cp = pp[i];
4076 		if (!isdigit(*cp)) {	/*  must be name */
4077 			if (strncmp(cp, namestr, nameln) != 0) {
4078 				goto errout;
4079 			}
4080 			if (i != 0) {
4081 				if (level == 0) {	/* no level spec'd */
4082 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4083 					    pmf))
4084 					goto errout;
4085 				}
4086 				np[npi++] = lvals[level - 1];
4087 				/* finish up previous component levels */
4088 				FINISH_COMP;
4089 			}
4090 			cp += nameln;
4091 			if (!*cp) {
4092 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4093 				goto errout;
4094 			}
4095 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4096 			if (phead == NULL) {
4097 				phead = ptail = p;
4098 			} else {
4099 				ptail->next = p;
4100 				ptail = p;
4101 			}
4102 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4103 			    KM_SLEEP);
4104 			compp->pmc_name_sz = strlen(cp) + 1;
4105 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4106 			    KM_SLEEP);
4107 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4108 			components++;
4109 			level = 0;
4110 		} else {	/* better be power level <num>=<name> */
4111 #ifdef DEBUG
4112 			tp = cp;
4113 #endif
4114 			if (i == 0 ||
4115 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4116 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4117 				goto errout;
4118 			}
4119 #ifdef DEBUG
4120 			tp = cp;
4121 #endif
4122 			if (*cp++ != '=' || !*cp) {
4123 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4124 				goto errout;
4125 			}
4126 
4127 			lszs[level] = strlen(cp) + 1;
4128 			size += lszs[level];
4129 			lnames[level] = cp;	/* points into prop string */
4130 			level++;
4131 		}
4132 	}
4133 	np[npi++] = lvals[level - 1];
4134 	if (level == 0) {	/* ended with a name */
4135 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4136 		goto errout;
4137 	}
4138 	FINISH_COMP;
4139 
4140 
4141 	/*
4142 	 * Now we have a list of components--we have to return instead an
4143 	 * array of them, but we can just copy the top level and leave
4144 	 * the rest as is
4145 	 */
4146 	(void) e_pm_create_components(dip, components);
4147 	for (i = 0; i < components; i++)
4148 		e_pm_set_max_power(dip, i, np[i]);
4149 
4150 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4151 	for (i = 0, p = phead; i < components; i++) {
4152 		ASSERT(p);
4153 		/*
4154 		 * Now sanity-check values:  levels must be monotonically
4155 		 * increasing
4156 		 */
4157 		if (p->comp->pmc_numlevels < 2) {
4158 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4159 			    "levels\n", pmf,
4160 			    p->comp->pmc_name, PM_DEVICE(dip),
4161 			    p->comp->pmc_numlevels))
4162 			goto errout;
4163 		}
4164 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4165 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4166 			    (p->comp->pmc_lvals[j] <=
4167 			    p->comp->pmc_lvals[j - 1]))) {
4168 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4169 				    "not mono. incr, %d follows %d\n", pmf,
4170 				    p->comp->pmc_name, PM_DEVICE(dip),
4171 				    p->comp->pmc_lvals[j],
4172 				    p->comp->pmc_lvals[j - 1]))
4173 				goto errout;
4174 			}
4175 		}
4176 		ret[i] = *p->comp;	/* struct assignment */
4177 		for (j = 0; j < i; j++) {
4178 			/*
4179 			 * Test for unique component names
4180 			 */
4181 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4182 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4183 				    "unique\n", pmf, ret[j].pmc_name,
4184 				    PM_DEVICE(dip)))
4185 				goto errout;
4186 			}
4187 		}
4188 		ptail = p;
4189 		p = p->next;
4190 		phead = p;	/* errout depends on phead making sense */
4191 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4192 		kmem_free(ptail, sizeof (*ptail));
4193 	}
4194 out:
4195 	ddi_prop_free(pp);
4196 	if (lvals)
4197 		kmem_free(lvals, nelems * sizeof (int));
4198 	if (lszs)
4199 		kmem_free(lszs, nelems * sizeof (int));
4200 	if (lnames)
4201 		kmem_free(lnames, nelems * sizeof (char *));
4202 	if (np)
4203 		kmem_free(np, nelems * sizeof (int));
4204 	return (ret);
4205 
4206 errout:
4207 	e_pm_destroy_components(dip);
4208 	*errp = 1;	/* signal failure */
4209 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4210 	for (i = 0; i < nelems - 1; i++)
4211 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4212 	if (nelems != 0)
4213 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4214 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4215 	for (p = phead; p; ) {
4216 		pm_comp_t *pp;
4217 		int n;
4218 
4219 		ptail = p;
4220 		/*
4221 		 * Free component data structures
4222 		 */
4223 		pp = p->comp;
4224 		n = pp->pmc_numlevels;
4225 		if (pp->pmc_name_sz) {
4226 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4227 		}
4228 		if (pp->pmc_lnames_sz) {
4229 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4230 		}
4231 		if (pp->pmc_lnames) {
4232 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4233 		}
4234 		if (pp->pmc_thresh) {
4235 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4236 		}
4237 		if (pp->pmc_lvals) {
4238 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4239 		}
4240 		p = ptail->next;
4241 		kmem_free(ptail, sizeof (*ptail));
4242 	}
4243 	if (ret != NULL)
4244 		kmem_free(ret, components * sizeof (pm_comp_t));
4245 	ret = NULL;
4246 	goto out;
4247 }
4248 
4249 /*
4250  * Set threshold values for a devices components by dividing the target
4251  * threshold (base) by the number of transitions and assign each transition
4252  * that threshold.  This will get the entire device down in the target time if
4253  * all components are idle and even if there are dependencies among components.
4254  *
4255  * Devices may well get powered all the way down before the target time, but
4256  * at least the EPA will be happy.
4257  */
4258 void
4259 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4260 {
4261 	PMD_FUNC(pmf, "set_device_threshold")
4262 	int target_threshold = (base * 95) / 100;
4263 	int level, comp;		/* loop counters */
4264 	int transitions = 0;
4265 	int ncomp = PM_NUMCMPTS(dip);
4266 	int thresh;
4267 	int remainder;
4268 	pm_comp_t *pmc;
4269 	int i, circ;
4270 
4271 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4272 	PM_LOCK_DIP(dip);
4273 	/*
4274 	 * First we handle the easy one.  If we're setting the default
4275 	 * threshold for a node with children, then we set it to the
4276 	 * default nexus threshold (currently 0) and mark it as default
4277 	 * nexus threshold instead
4278 	 */
4279 	if (PM_IS_NEXUS(dip)) {
4280 		if (flag == PMC_DEF_THRESH) {
4281 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4282 			    PM_DEVICE(dip)))
4283 			thresh = pm_default_nexus_threshold;
4284 			for (comp = 0; comp < ncomp; comp++) {
4285 				pmc = &PM_CP(dip, comp)->pmc_comp;
4286 				for (level = 1; level < pmc->pmc_numlevels;
4287 				    level++) {
4288 					pmc->pmc_thresh[level] = thresh;
4289 				}
4290 			}
4291 			DEVI(dip)->devi_pm_dev_thresh =
4292 			    pm_default_nexus_threshold;
4293 			/*
4294 			 * If the nexus node is being reconfigured back to
4295 			 * the default threshold, adjust the notlowest count.
4296 			 */
4297 			if (DEVI(dip)->devi_pm_flags &
4298 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4299 				PM_LOCK_POWER(dip, &circ);
4300 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4301 					if (PM_CURPOWER(dip, i) == 0)
4302 						continue;
4303 					mutex_enter(&pm_compcnt_lock);
4304 					ASSERT(pm_comps_notlowest);
4305 					pm_comps_notlowest--;
4306 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4307 					    "notlowest to %d\n", pmf,
4308 					    PM_DEVICE(dip), pm_comps_notlowest))
4309 					if (pm_comps_notlowest == 0)
4310 						pm_ppm_notify_all_lowest(dip,
4311 						    PM_ALL_LOWEST);
4312 					mutex_exit(&pm_compcnt_lock);
4313 				}
4314 				PM_UNLOCK_POWER(dip, circ);
4315 			}
4316 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4317 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4318 			PM_UNLOCK_DIP(dip);
4319 			return;
4320 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4321 			/*
4322 			 * If the nexus node is being configured for a
4323 			 * non-default threshold, include that node in
4324 			 * the notlowest accounting.
4325 			 */
4326 			PM_LOCK_POWER(dip, &circ);
4327 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4328 				if (PM_CURPOWER(dip, i) == 0)
4329 					continue;
4330 				mutex_enter(&pm_compcnt_lock);
4331 				if (pm_comps_notlowest == 0)
4332 					pm_ppm_notify_all_lowest(dip,
4333 					    PM_NOT_ALL_LOWEST);
4334 				pm_comps_notlowest++;
4335 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4336 				    "notlowest to %d\n", pmf,
4337 				    PM_DEVICE(dip), pm_comps_notlowest))
4338 				mutex_exit(&pm_compcnt_lock);
4339 			}
4340 			PM_UNLOCK_POWER(dip, circ);
4341 		}
4342 	}
4343 	/*
4344 	 * Compute the total number of transitions for all components
4345 	 * of the device.  Distribute the threshold evenly over them
4346 	 */
4347 	for (comp = 0; comp < ncomp; comp++) {
4348 		pmc = &PM_CP(dip, comp)->pmc_comp;
4349 		ASSERT(pmc->pmc_numlevels > 1);
4350 		transitions += pmc->pmc_numlevels - 1;
4351 	}
4352 	ASSERT(transitions);
4353 	thresh = target_threshold / transitions;
4354 
4355 	for (comp = 0; comp < ncomp; comp++) {
4356 		pmc = &PM_CP(dip, comp)->pmc_comp;
4357 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4358 			pmc->pmc_thresh[level] = thresh;
4359 		}
4360 	}
4361 
4362 #ifdef DEBUG
4363 	for (comp = 0; comp < ncomp; comp++) {
4364 		pmc = &PM_CP(dip, comp)->pmc_comp;
4365 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4366 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4367 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4368 			    comp, level, pmc->pmc_thresh[level]))
4369 		}
4370 	}
4371 #endif
4372 	/*
4373 	 * Distribute any remainder till they are all gone
4374 	 */
4375 	remainder = target_threshold - thresh * transitions;
4376 	level = 1;
4377 #ifdef DEBUG
4378 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4379 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4380 	    transitions))
4381 #endif
4382 	while (remainder > 0) {
4383 		comp = 0;
4384 		while (remainder && (comp < ncomp)) {
4385 			pmc = &PM_CP(dip, comp)->pmc_comp;
4386 			if (level < pmc->pmc_numlevels) {
4387 				pmc->pmc_thresh[level] += 1;
4388 				remainder--;
4389 			}
4390 			comp++;
4391 		}
4392 		level++;
4393 	}
4394 #ifdef DEBUG
4395 	for (comp = 0; comp < ncomp; comp++) {
4396 		pmc = &PM_CP(dip, comp)->pmc_comp;
4397 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4398 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4399 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4400 			    comp, level, pmc->pmc_thresh[level]))
4401 		}
4402 	}
4403 #endif
4404 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4405 	DEVI(dip)->devi_pm_dev_thresh = base;
4406 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4407 	DEVI(dip)->devi_pm_flags |= flag;
4408 	PM_UNLOCK_DIP(dip);
4409 }
4410 
4411 /*
4412  * Called when there is no old-style platform power management driver
4413  */
4414 static int
4415 ddi_no_platform_power(power_req_t *req)
4416 {
4417 	_NOTE(ARGUNUSED(req))
4418 	return (DDI_FAILURE);
4419 }
4420 
4421 /*
4422  * This function calls the entry point supplied by the platform-specific
4423  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4424  * The use of global for getting the  function name from platform-specific
4425  * pm driver is not ideal, but it is simple and efficient.
4426  * The previous property lookup was being done in the idle loop on swift
4427  * systems without pmc chips and hurt deskbench performance as well as
4428  * violating scheduler locking rules
4429  */
4430 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4431 
4432 /*
4433  * Old obsolete interface for a device to request a power change (but only
4434  * an increase in power)
4435  */
4436 int
4437 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4438 {
4439 	return (pm_raise_power(dip, cmpt, level));
4440 }
4441 
4442 /*
4443  * The old obsolete interface to platform power management.  Only used by
4444  * Gypsy platform and APM on X86.
4445  */
4446 int
4447 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4448 {
4449 	power_req_t	request;
4450 
4451 	request.request_type = PMR_SET_POWER;
4452 	request.req.set_power_req.who = dip;
4453 	request.req.set_power_req.cmpt = pm_cmpt;
4454 	request.req.set_power_req.level = pm_level;
4455 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4456 }
4457 
4458 /*
4459  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4460  * passed.  Returns true if subsequent processing could result in power being
4461  * removed from the device.  The arg is not currently used because it is
4462  * implicit in the operation of cpr/DR.
4463  */
4464 int
4465 ddi_removing_power(dev_info_t *dip)
4466 {
4467 	_NOTE(ARGUNUSED(dip))
4468 	return (pm_powering_down);
4469 }
4470 
4471 /*
4472  * Returns true if a device indicates that its parent handles suspend/resume
4473  * processing for it.
4474  */
4475 int
4476 e_ddi_parental_suspend_resume(dev_info_t *dip)
4477 {
4478 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4479 }
4480 
4481 /*
4482  * Called for devices which indicate that their parent does suspend/resume
4483  * handling for them
4484  */
4485 int
4486 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4487 {
4488 	power_req_t	request;
4489 	request.request_type = PMR_SUSPEND;
4490 	request.req.suspend_req.who = dip;
4491 	request.req.suspend_req.cmd = cmd;
4492 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4493 }
4494 
4495 /*
4496  * Called for devices which indicate that their parent does suspend/resume
4497  * handling for them
4498  */
4499 int
4500 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4501 {
4502 	power_req_t	request;
4503 	request.request_type = PMR_RESUME;
4504 	request.req.resume_req.who = dip;
4505 	request.req.resume_req.cmd = cmd;
4506 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4507 }
4508 
4509 /*
4510  * Old obsolete exported interface for drivers to create components.
4511  * This is now handled by exporting the pm-components property.
4512  */
4513 int
4514 pm_create_components(dev_info_t *dip, int num_components)
4515 {
4516 	PMD_FUNC(pmf, "pm_create_components")
4517 
4518 	if (num_components < 1)
4519 		return (DDI_FAILURE);
4520 
4521 	if (!DEVI_IS_ATTACHING(dip)) {
4522 		return (DDI_FAILURE);
4523 	}
4524 
4525 	/* don't need to lock dip because attach is single threaded */
4526 	if (DEVI(dip)->devi_pm_components) {
4527 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4528 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4529 		return (DDI_FAILURE);
4530 	}
4531 	e_pm_create_components(dip, num_components);
4532 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4533 	e_pm_default_components(dip, num_components);
4534 	return (DDI_SUCCESS);
4535 }
4536 
4537 /*
4538  * Obsolete interface previously called by drivers to destroy their components
4539  * at detach time.  This is now done automatically.  However, we need to keep
4540  * this for the old drivers.
4541  */
4542 void
4543 pm_destroy_components(dev_info_t *dip)
4544 {
4545 	PMD_FUNC(pmf, "pm_destroy_components")
4546 	dev_info_t *pdip = ddi_get_parent(dip);
4547 
4548 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4549 	    PM_DEVICE(dip)))
4550 	ASSERT(DEVI_IS_DETACHING(dip));
4551 #ifdef DEBUG
4552 	if (!PM_ISBC(dip))
4553 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4554 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4555 		    PM_ADDR(dip));
4556 #endif
4557 	/*
4558 	 * We ignore this unless this is an old-style driver, except for
4559 	 * printing the message above
4560 	 */
4561 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4562 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4563 		    PM_DEVICE(dip)))
4564 		return;
4565 	}
4566 	ASSERT(PM_GET_PM_INFO(dip));
4567 
4568 	/*
4569 	 * pm_unmanage will clear info pointer later, after dealing with
4570 	 * dependencies
4571 	 */
4572 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4573 	/*
4574 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4575 	 * Parents that get notification are not adjusted because their
4576 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4577 	 */
4578 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4579 		pm_rele_power(pdip);
4580 #ifdef DEBUG
4581 	else {
4582 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4583 		    pmf, PM_DEVICE(dip)))
4584 	}
4585 #endif
4586 	e_pm_destroy_components(dip);
4587 	/*
4588 	 * Forget we ever knew anything about the components of this  device
4589 	 */
4590 	DEVI(dip)->devi_pm_flags &=
4591 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4592 }
4593 
4594 /*
4595  * Exported interface for a driver to set a component busy.
4596  */
4597 int
4598 pm_busy_component(dev_info_t *dip, int cmpt)
4599 {
4600 	struct pm_component *cp;
4601 
4602 	ASSERT(dip != NULL);
4603 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4604 		return (DDI_FAILURE);
4605 	PM_LOCK_BUSY(dip);
4606 	cp->pmc_busycount++;
4607 	cp->pmc_timestamp = 0;
4608 	PM_UNLOCK_BUSY(dip);
4609 	return (DDI_SUCCESS);
4610 }
4611 
4612 /*
4613  * Exported interface for a driver to set a component idle.
4614  */
4615 int
4616 pm_idle_component(dev_info_t *dip, int cmpt)
4617 {
4618 	PMD_FUNC(pmf, "pm_idle_component")
4619 	struct pm_component *cp;
4620 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4621 
4622 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4623 		return (DDI_FAILURE);
4624 
4625 	PM_LOCK_BUSY(dip);
4626 	if (cp->pmc_busycount) {
4627 		if (--(cp->pmc_busycount) == 0)
4628 			cp->pmc_timestamp = gethrestime_sec();
4629 	} else {
4630 		cp->pmc_timestamp = gethrestime_sec();
4631 	}
4632 
4633 	PM_UNLOCK_BUSY(dip);
4634 
4635 	/*
4636 	 * if device becomes idle during idle down period, try scan it down
4637 	 */
4638 	if (scanp && PM_IS_PID(dip)) {
4639 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4640 		    PM_DEVICE(dip)))
4641 		pm_rescan(dip);
4642 		return (DDI_SUCCESS);
4643 	}
4644 
4645 	/*
4646 	 * handle scan not running with nexus threshold == 0
4647 	 */
4648 
4649 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4650 		pm_rescan(dip);
4651 	}
4652 
4653 	return (DDI_SUCCESS);
4654 }
4655 
4656 /*
4657  * This is the old  obsolete interface called by drivers to set their normal
4658  * power.  Thus we can't fix its behavior or return a value.
4659  * This functionality is replaced by the pm-component property.
4660  * We'll only get components destroyed while no power management is
4661  * going on (and the device is detached), so we don't need a mutex here
4662  */
4663 void
4664 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4665 {
4666 	PMD_FUNC(pmf, "set_normal_power")
4667 #ifdef DEBUG
4668 	if (!PM_ISBC(dip))
4669 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4670 		    "(driver exporting pm-components property) ignored",
4671 		    PM_NAME(dip), PM_ADDR(dip));
4672 #endif
4673 	if (PM_ISBC(dip)) {
4674 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4675 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4676 		e_pm_set_max_power(dip, comp, level);
4677 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4678 	}
4679 }
4680 
4681 /*
4682  * Called on a successfully detached driver to free pm resources
4683  */
4684 static void
4685 pm_stop(dev_info_t *dip)
4686 {
4687 	PMD_FUNC(pmf, "stop")
4688 	dev_info_t *pdip = ddi_get_parent(dip);
4689 
4690 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4691 	/* stopping scan, destroy scan data structure */
4692 	if (!PM_ISBC(dip)) {
4693 		pm_scan_stop(dip);
4694 		pm_scan_fini(dip);
4695 	}
4696 
4697 	if (PM_GET_PM_INFO(dip) != NULL) {
4698 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4699 			/*
4700 			 * Old style driver may have called
4701 			 * pm_destroy_components already, but just in case ...
4702 			 */
4703 			e_pm_destroy_components(dip);
4704 		} else {
4705 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4706 			    pmf, PM_DEVICE(dip)))
4707 		}
4708 	} else {
4709 		if (PM_NUMCMPTS(dip))
4710 			e_pm_destroy_components(dip);
4711 		else {
4712 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4713 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4714 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4715 					pm_rele_power(pdip);
4716 				} else if (pdip &&
4717 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4718 					(void) mdi_power(pdip,
4719 					    MDI_PM_RELE_POWER,
4720 					    (void *)dip, NULL, 0);
4721 				}
4722 			}
4723 		}
4724 	}
4725 }
4726 
4727 /*
4728  * The node is the subject of a reparse pm props ioctl. Throw away the old
4729  * info and start over.
4730  */
4731 int
4732 e_new_pm_props(dev_info_t *dip)
4733 {
4734 	if (PM_GET_PM_INFO(dip) != NULL) {
4735 		pm_stop(dip);
4736 
4737 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4738 			return (DDI_FAILURE);
4739 		}
4740 	}
4741 	e_pm_props(dip);
4742 	return (DDI_SUCCESS);
4743 }
4744 
4745 /*
4746  * Device has been attached, so process its pm properties
4747  */
4748 void
4749 e_pm_props(dev_info_t *dip)
4750 {
4751 	char *pp;
4752 	int len;
4753 	int flags = 0;
4754 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4755 
4756 	/*
4757 	 * It doesn't matter if we do this more than once, we should always
4758 	 * get the same answers, and if not, then the last one in is the
4759 	 * best one.
4760 	 */
4761 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4762 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4763 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4764 			flags = PMC_NEEDS_SR;
4765 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4766 			flags = PMC_NO_SR;
4767 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4768 			flags = PMC_PARENTAL_SR;
4769 		} else {
4770 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4771 			    "%s property value '%s'", PM_NAME(dip),
4772 			    PM_ADDR(dip), "pm-hardware-state", pp);
4773 		}
4774 		kmem_free(pp, len);
4775 	}
4776 	/*
4777 	 * This next segment (PMC_WANTS_NOTIFY) is in
4778 	 * support of nexus drivers which will want to be involved in
4779 	 * (or at least notified of) their child node's power level transitions.
4780 	 * "pm-want-child-notification?" is defined by the parent.
4781 	 */
4782 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4783 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4784 		flags |= PMC_WANTS_NOTIFY;
4785 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4786 	    dip, propflag, "pm-want-child-notification?"));
4787 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4788 	    "no-involuntary-power-cycles"))
4789 		flags |= PMC_NO_INVOL;
4790 	/*
4791 	 * Is the device a CPU device?
4792 	 */
4793 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4794 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4795 		if (strcmp(pp, "CPU") == 0) {
4796 			flags |= PMC_CPU_DEVICE;
4797 		} else {
4798 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4799 			    "%s property value '%s'", PM_NAME(dip),
4800 			    PM_ADDR(dip), "pm-class", pp);
4801 		}
4802 		kmem_free(pp, len);
4803 	}
4804 	/* devfs single threads us */
4805 	DEVI(dip)->devi_pm_flags |= flags;
4806 }
4807 
4808 /*
4809  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4810  * driver which has claimed a node.
4811  * Sets old_power in arg struct.
4812  */
4813 static int
4814 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4815     ddi_ctl_enum_t ctlop, void *arg, void *result)
4816 {
4817 	_NOTE(ARGUNUSED(dip))
4818 	PMD_FUNC(pmf, "ctlops")
4819 	power_req_t *reqp = (power_req_t *)arg;
4820 	int retval;
4821 	dev_info_t *target_dip;
4822 	int new_level, old_level, cmpt;
4823 #ifdef PMDDEBUG
4824 	char *format;
4825 #endif
4826 
4827 	/*
4828 	 * The interface for doing the actual power level changes is now
4829 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4830 	 * different platform-specific power control drivers.
4831 	 *
4832 	 * This driver implements the "default" version of this interface.
4833 	 * If no ppm driver has been installed then this interface is called
4834 	 * instead.
4835 	 */
4836 	ASSERT(dip == NULL);
4837 	switch (ctlop) {
4838 	case DDI_CTLOPS_POWER:
4839 		switch (reqp->request_type) {
4840 		case PMR_PPM_SET_POWER:
4841 		{
4842 			target_dip = reqp->req.ppm_set_power_req.who;
4843 			ASSERT(target_dip == rdip);
4844 			new_level = reqp->req.ppm_set_power_req.new_level;
4845 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4846 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4847 			old_level = PM_CURPOWER(target_dip, cmpt);
4848 			reqp->req.ppm_set_power_req.old_level = old_level;
4849 			retval = pm_power(target_dip, cmpt, new_level);
4850 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4851 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4852 			    old_level, new_level, (retval == DDI_SUCCESS ?
4853 			    "chd" : "no chg")))
4854 			return (retval);
4855 		}
4856 
4857 		case PMR_PPM_PRE_DETACH:
4858 		case PMR_PPM_POST_DETACH:
4859 		case PMR_PPM_PRE_ATTACH:
4860 		case PMR_PPM_POST_ATTACH:
4861 		case PMR_PPM_PRE_PROBE:
4862 		case PMR_PPM_POST_PROBE:
4863 		case PMR_PPM_PRE_RESUME:
4864 		case PMR_PPM_INIT_CHILD:
4865 		case PMR_PPM_UNINIT_CHILD:
4866 #ifdef PMDDEBUG
4867 			switch (reqp->request_type) {
4868 				case PMR_PPM_PRE_DETACH:
4869 					format = "%s: PMR_PPM_PRE_DETACH "
4870 					    "%s@%s(%s#%d)\n";
4871 					break;
4872 				case PMR_PPM_POST_DETACH:
4873 					format = "%s: PMR_PPM_POST_DETACH "
4874 					    "%s@%s(%s#%d) rets %d\n";
4875 					break;
4876 				case PMR_PPM_PRE_ATTACH:
4877 					format = "%s: PMR_PPM_PRE_ATTACH "
4878 					    "%s@%s(%s#%d)\n";
4879 					break;
4880 				case PMR_PPM_POST_ATTACH:
4881 					format = "%s: PMR_PPM_POST_ATTACH "
4882 					    "%s@%s(%s#%d) rets %d\n";
4883 					break;
4884 				case PMR_PPM_PRE_PROBE:
4885 					format = "%s: PMR_PPM_PRE_PROBE "
4886 					    "%s@%s(%s#%d)\n";
4887 					break;
4888 				case PMR_PPM_POST_PROBE:
4889 					format = "%s: PMR_PPM_POST_PROBE "
4890 					    "%s@%s(%s#%d) rets %d\n";
4891 					break;
4892 				case PMR_PPM_PRE_RESUME:
4893 					format = "%s: PMR_PPM_PRE_RESUME "
4894 					    "%s@%s(%s#%d) rets %d\n";
4895 					break;
4896 				case PMR_PPM_INIT_CHILD:
4897 					format = "%s: PMR_PPM_INIT_CHILD "
4898 					    "%s@%s(%s#%d)\n";
4899 					break;
4900 				case PMR_PPM_UNINIT_CHILD:
4901 					format = "%s: PMR_PPM_UNINIT_CHILD "
4902 					    "%s@%s(%s#%d)\n";
4903 					break;
4904 				default:
4905 					break;
4906 			}
4907 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4908 			    reqp->req.ppm_config_req.result))
4909 #endif
4910 			return (DDI_SUCCESS);
4911 
4912 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4913 			/*
4914 			 * Nothing for us to do
4915 			 */
4916 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4917 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4918 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4919 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4920 			    reqp->req.ppm_notify_level_req.cmpt,
4921 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4922 			    reqp->req.ppm_notify_level_req.cmpt),
4923 			    reqp->req.ppm_notify_level_req.new_level))
4924 			return (DDI_SUCCESS);
4925 
4926 		case PMR_PPM_UNMANAGE:
4927 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4928 			    pmf, PM_DEVICE(rdip)))
4929 			return (DDI_SUCCESS);
4930 
4931 		case PMR_PPM_LOCK_POWER:
4932 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4933 			    reqp->req.ppm_lock_power_req.circp);
4934 			return (DDI_SUCCESS);
4935 
4936 		case PMR_PPM_UNLOCK_POWER:
4937 			pm_unlock_power_single(
4938 			    reqp->req.ppm_unlock_power_req.who,
4939 			    reqp->req.ppm_unlock_power_req.circ);
4940 			return (DDI_SUCCESS);
4941 
4942 		case PMR_PPM_TRY_LOCK_POWER:
4943 			*(int *)result = pm_try_locking_power_single(
4944 			    reqp->req.ppm_lock_power_req.who,
4945 			    reqp->req.ppm_lock_power_req.circp);
4946 			return (DDI_SUCCESS);
4947 
4948 		case PMR_PPM_POWER_LOCK_OWNER:
4949 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4950 			ASSERT(target_dip == rdip);
4951 			reqp->req.ppm_power_lock_owner_req.owner =
4952 			    DEVI(rdip)->devi_busy_thread;
4953 			return (DDI_SUCCESS);
4954 		default:
4955 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4956 			return (DDI_FAILURE);
4957 		}
4958 
4959 	default:
4960 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4961 		return (DDI_FAILURE);
4962 	}
4963 }
4964 
4965 /*
4966  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4967  * power_ops struct for this functionality instead?
4968  * However, we only ever do this on a ppm driver.
4969  */
4970 int
4971 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4972 {
4973 	int (*fp)();
4974 
4975 	/* if no ppm handler, call the default routine */
4976 	if (d == NULL) {
4977 		return (pm_default_ctlops(d, r, op, a, v));
4978 	}
4979 	if (!d || !r)
4980 		return (DDI_FAILURE);
4981 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4982 	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4983 
4984 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4985 	return ((*fp)(d, r, op, a, v));
4986 }
4987 
4988 /*
4989  * Called on a node when attach completes or the driver makes its first pm
4990  * call (whichever comes first).
4991  * In the attach case, device may not be power manageable at all.
4992  * Don't need to lock the dip because we're single threaded by the devfs code
4993  */
4994 static int
4995 pm_start(dev_info_t *dip)
4996 {
4997 	PMD_FUNC(pmf, "start")
4998 	int ret;
4999 	dev_info_t *pdip = ddi_get_parent(dip);
5000 	int e_pm_manage(dev_info_t *, int);
5001 	void pm_noinvol_specd(dev_info_t *dip);
5002 
5003 	e_pm_props(dip);
5004 	pm_noinvol_specd(dip);
5005 	/*
5006 	 * If this dip has already been processed, don't mess with it
5007 	 * (but decrement the speculative count we did above, as whatever
5008 	 * code put it under pm already will have dealt with it)
5009 	 */
5010 	if (PM_GET_PM_INFO(dip)) {
5011 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
5012 		    pmf, PM_DEVICE(dip)))
5013 		return (0);
5014 	}
5015 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
5016 
5017 	if (PM_GET_PM_INFO(dip) == NULL) {
5018 		/*
5019 		 * keep the kidsupcount increment as is
5020 		 */
5021 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
5022 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
5023 			pm_hold_power(pdip);
5024 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
5025 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
5026 			    (void *)dip, NULL, 0);
5027 		}
5028 
5029 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
5030 		    "left up\n", pmf, PM_DEVICE(dip)))
5031 	}
5032 
5033 	return (ret);
5034 }
5035 
5036 /*
5037  * Keep a list of recorded thresholds.  For now we just keep a list and
5038  * search it linearly.  We don't expect too many entries.  Can always hash it
5039  * later if we need to.
5040  */
5041 void
5042 pm_record_thresh(pm_thresh_rec_t *rp)
5043 {
5044 	pm_thresh_rec_t *pptr, *ptr;
5045 
5046 	ASSERT(*rp->ptr_physpath);
5047 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
5048 	for (pptr = NULL, ptr = pm_thresh_head;
5049 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
5050 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
5051 			/* replace this one */
5052 			rp->ptr_next = ptr->ptr_next;
5053 			if (pptr) {
5054 				pptr->ptr_next = rp;
5055 			} else {
5056 				pm_thresh_head = rp;
5057 			}
5058 			rw_exit(&pm_thresh_rwlock);
5059 			kmem_free(ptr, ptr->ptr_size);
5060 			return;
5061 		}
5062 		continue;
5063 	}
5064 	/*
5065 	 * There was not a match in the list, insert this one in front
5066 	 */
5067 	if (pm_thresh_head) {
5068 		rp->ptr_next = pm_thresh_head;
5069 		pm_thresh_head = rp;
5070 	} else {
5071 		rp->ptr_next = NULL;
5072 		pm_thresh_head = rp;
5073 	}
5074 	rw_exit(&pm_thresh_rwlock);
5075 }
5076 
5077 /*
5078  * Create a new dependency record and hang a new dependency entry off of it
5079  */
5080 pm_pdr_t *
5081 newpdr(char *kept, char *keeps, int isprop)
5082 {
5083 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5084 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5085 	p->pdr_size = size;
5086 	p->pdr_isprop = isprop;
5087 	p->pdr_kept_paths = NULL;
5088 	p->pdr_kept_count = 0;
5089 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5090 	(void) strcpy(p->pdr_kept, kept);
5091 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5092 	(void) strcpy(p->pdr_keeper, keeps);
5093 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5094 	    (intptr_t)p + size);
5095 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5096 	    (intptr_t)p + size);
5097 	return (p);
5098 }
5099 
5100 /*
5101  * Keep a list of recorded dependencies.  We only keep the
5102  * keeper -> kept list for simplification. At this point We do not
5103  * care about whether the devices are attached or not yet,
5104  * this would be done in pm_keeper() and pm_kept().
5105  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5106  * and it is up to the user to issue the ioctl again if they want it
5107  * (e.g. pmconfig)
5108  * Returns true if dependency already exists in the list.
5109  */
5110 int
5111 pm_record_keeper(char *kept, char *keeper, int isprop)
5112 {
5113 	PMD_FUNC(pmf, "record_keeper")
5114 	pm_pdr_t *npdr, *ppdr, *pdr;
5115 
5116 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5117 	ASSERT(kept && keeper);
5118 #ifdef DEBUG
5119 	if (pm_debug & PMD_KEEPS)
5120 		prdeps("pm_record_keeper entry");
5121 #endif
5122 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5123 	    ppdr = pdr, pdr = pdr->pdr_next) {
5124 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5125 		    pdr->pdr_keeper))
5126 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5127 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5128 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5129 			return (1);
5130 		}
5131 	}
5132 	/*
5133 	 * We did not find any match, so we have to make an entry
5134 	 */
5135 	npdr = newpdr(kept, keeper, isprop);
5136 	if (ppdr) {
5137 		ASSERT(ppdr->pdr_next == NULL);
5138 		ppdr->pdr_next = npdr;
5139 	} else {
5140 		ASSERT(pm_dep_head == NULL);
5141 		pm_dep_head = npdr;
5142 	}
5143 #ifdef DEBUG
5144 	if (pm_debug & PMD_KEEPS)
5145 		prdeps("pm_record_keeper after new record");
5146 #endif
5147 	if (!isprop)
5148 		pm_unresolved_deps++;
5149 	else
5150 		pm_prop_deps++;
5151 	return (0);
5152 }
5153 
5154 /*
5155  * Look up this device in the set of devices we've seen ioctls for
5156  * to see if we are holding a threshold spec for it.  If so, make it so.
5157  * At ioctl time, we were given the physical path of the device.
5158  */
5159 int
5160 pm_thresh_specd(dev_info_t *dip)
5161 {
5162 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5163 	char *path = 0;
5164 	char pathbuf[MAXNAMELEN];
5165 	pm_thresh_rec_t *rp;
5166 
5167 	path = ddi_pathname(dip, pathbuf);
5168 
5169 	rw_enter(&pm_thresh_rwlock, RW_READER);
5170 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5171 		if (strcmp(rp->ptr_physpath, path) != 0)
5172 			continue;
5173 		pm_apply_recorded_thresh(dip, rp);
5174 		rw_exit(&pm_thresh_rwlock);
5175 		return (1);
5176 	}
5177 	rw_exit(&pm_thresh_rwlock);
5178 	return (0);
5179 }
5180 
5181 static int
5182 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5183 {
5184 	PMD_FUNC(pmf, "set_keeping")
5185 	pm_info_t *kept_info;
5186 	int j, up = 0, circ;
5187 	void prdeps(char *);
5188 
5189 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5190 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5191 #ifdef DEBUG
5192 	if (pm_debug & PMD_KEEPS)
5193 		prdeps("Before PAD\n");
5194 #endif
5195 	ASSERT(keeper != kept);
5196 	if (PM_GET_PM_INFO(keeper) == NULL) {
5197 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5198 		    "%s@%s(%s#%d), but the latter is not power managed",
5199 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5200 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5201 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5202 		return (0);
5203 	}
5204 	kept_info = PM_GET_PM_INFO(kept);
5205 	ASSERT(kept_info);
5206 	PM_LOCK_POWER(keeper, &circ);
5207 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5208 		if (PM_CURPOWER(keeper, j)) {
5209 			up++;
5210 			break;
5211 		}
5212 	}
5213 	if (up) {
5214 		/* Bringup and maintain a hold on the kept */
5215 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5216 		    PM_DEVICE(kept)))
5217 		bring_pmdep_up(kept, 1);
5218 	}
5219 	PM_UNLOCK_POWER(keeper, circ);
5220 #ifdef DEBUG
5221 	if (pm_debug & PMD_KEEPS)
5222 		prdeps("After PAD\n");
5223 #endif
5224 	return (1);
5225 }
5226 
5227 /*
5228  * Should this device keep up another device?
5229  * Look up this device in the set of devices we've seen ioctls for
5230  * to see if we are holding a dependency spec for it.  If so, make it so.
5231  * Because we require the kept device to be attached already in order to
5232  * make the list entry (and hold it), we only need to look for keepers.
5233  * At ioctl time, we were given the physical path of the device.
5234  */
5235 int
5236 pm_keeper(char *keeper)
5237 {
5238 	PMD_FUNC(pmf, "keeper")
5239 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5240 	dev_info_t *dip;
5241 	pm_pdr_t *dp;
5242 	dev_info_t *kept = NULL;
5243 	int ret = 0;
5244 	int i;
5245 
5246 	if (!pm_unresolved_deps && !pm_prop_deps)
5247 		return (0);
5248 	ASSERT(keeper != NULL);
5249 	dip = pm_name_to_dip(keeper, 1);
5250 	if (dip == NULL)
5251 		return (0);
5252 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5253 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5254 		if (!dp->pdr_isprop) {
5255 			if (!pm_unresolved_deps)
5256 				continue;
5257 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5258 			if (dp->pdr_satisfied) {
5259 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5260 				continue;
5261 			}
5262 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5263 				ret += pm_apply_recorded_dep(dip, dp);
5264 			}
5265 		} else {
5266 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5267 				continue;
5268 			for (i = 0; i < dp->pdr_kept_count; i++) {
5269 				if (dp->pdr_kept_paths[i] == NULL)
5270 					continue;
5271 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5272 				if (kept == NULL)
5273 					continue;
5274 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5275 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5276 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5277 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5278 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5279 				    dp->pdr_kept_count))
5280 				if (kept != dip) {
5281 					ret += pm_set_keeping(dip, kept);
5282 				}
5283 				ddi_release_devi(kept);
5284 			}
5285 
5286 		}
5287 	}
5288 	ddi_release_devi(dip);
5289 	return (ret);
5290 }
5291 
5292 /*
5293  * Should this device be kept up by another device?
5294  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5295  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5296  * kept device lists.
5297  */
5298 static int
5299 pm_kept(char *keptp)
5300 {
5301 	PMD_FUNC(pmf, "kept")
5302 	pm_pdr_t *dp;
5303 	int found = 0;
5304 	int ret = 0;
5305 	dev_info_t *keeper;
5306 	dev_info_t *kept;
5307 	size_t length;
5308 	int i;
5309 	char **paths;
5310 	char *path;
5311 
5312 	ASSERT(keptp != NULL);
5313 	kept = pm_name_to_dip(keptp, 1);
5314 	if (kept == NULL)
5315 		return (0);
5316 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5317 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5318 		if (dp->pdr_isprop) {
5319 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5320 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5321 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5322 				/*
5323 				 * Dont allow self dependency.
5324 				 */
5325 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5326 					continue;
5327 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5328 				if (keeper == NULL)
5329 					continue;
5330 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5331 				    "%p\n", pmf, (void *)kept))
5332 #ifdef DEBUG
5333 				if (pm_debug & PMD_DEP)
5334 					prdeps("Before Adding from pm_kept\n");
5335 #endif
5336 				/*
5337 				 * Add ourselves to the dip list.
5338 				 */
5339 				if (dp->pdr_kept_count == 0) {
5340 					length = strlen(keptp) + 1;
5341 					path =
5342 					    kmem_alloc(length, KM_SLEEP);
5343 					paths = kmem_alloc(sizeof (char **),
5344 					    KM_SLEEP);
5345 					(void) strcpy(path, keptp);
5346 					paths[0] = path;
5347 					dp->pdr_kept_paths = paths;
5348 					dp->pdr_kept_count++;
5349 				} else {
5350 					/* Check to see if already on list */
5351 					for (i = 0; i < dp->pdr_kept_count;
5352 					    i++) {
5353 						if (strcmp(keptp,
5354 						    dp->pdr_kept_paths[i])
5355 						    == 0) {
5356 							found++;
5357 							break;
5358 						}
5359 					}
5360 					if (found) {
5361 						ddi_release_devi(keeper);
5362 						continue;
5363 					}
5364 					length = dp->pdr_kept_count *
5365 					    sizeof (char **);
5366 					paths = kmem_alloc(
5367 					    length + sizeof (char **),
5368 					    KM_SLEEP);
5369 					if (dp->pdr_kept_count) {
5370 						bcopy(dp->pdr_kept_paths,
5371 						    paths, length);
5372 						kmem_free(dp->pdr_kept_paths,
5373 						    length);
5374 					}
5375 					dp->pdr_kept_paths = paths;
5376 					length = strlen(keptp) + 1;
5377 					path =
5378 					    kmem_alloc(length, KM_SLEEP);
5379 					(void) strcpy(path, keptp);
5380 					dp->pdr_kept_paths[i] = path;
5381 					dp->pdr_kept_count++;
5382 				}
5383 #ifdef DEBUG
5384 				if (pm_debug & PMD_DEP)
5385 					prdeps("After from pm_kept\n");
5386 #endif
5387 				if (keeper) {
5388 					ret += pm_set_keeping(keeper, kept);
5389 					ddi_release_devi(keeper);
5390 				}
5391 			}
5392 		} else {
5393 			/*
5394 			 * pm_keeper would be called later to do
5395 			 * the actual pm_set_keeping.
5396 			 */
5397 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5398 			    pmf, (void *)kept))
5399 #ifdef DEBUG
5400 			if (pm_debug & PMD_DEP)
5401 				prdeps("Before Adding from pm_kept\n");
5402 #endif
5403 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5404 				if (dp->pdr_kept_paths == NULL) {
5405 					length = strlen(keptp) + 1;
5406 					path =
5407 					    kmem_alloc(length, KM_SLEEP);
5408 					paths = kmem_alloc(sizeof (char **),
5409 					    KM_SLEEP);
5410 					(void) strcpy(path, keptp);
5411 					paths[0] = path;
5412 					dp->pdr_kept_paths = paths;
5413 					dp->pdr_kept_count++;
5414 				}
5415 			}
5416 #ifdef DEBUG
5417 			if (pm_debug & PMD_DEP)
5418 				prdeps("After from pm_kept\n");
5419 #endif
5420 		}
5421 	}
5422 	ddi_release_devi(kept);
5423 	return (ret);
5424 }
5425 
5426 /*
5427  * Apply a recorded dependency.  dp specifies the dependency, and
5428  * keeper is already known to be the device that keeps up the other (kept) one.
5429  * We have to the whole tree for the "kept" device, then apply
5430  * the dependency (which may already be applied).
5431  */
5432 int
5433 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5434 {
5435 	PMD_FUNC(pmf, "apply_recorded_dep")
5436 	dev_info_t *kept = NULL;
5437 	int ret = 0;
5438 	char *keptp = NULL;
5439 
5440 	/*
5441 	 * Device to Device dependency can only be 1 to 1.
5442 	 */
5443 	if (dp->pdr_kept_paths == NULL)
5444 		return (0);
5445 	keptp = dp->pdr_kept_paths[0];
5446 	if (keptp == NULL)
5447 		return (0);
5448 	ASSERT(*keptp != '\0');
5449 	kept = pm_name_to_dip(keptp, 1);
5450 	if (kept == NULL)
5451 		return (0);
5452 	if (kept) {
5453 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5454 		    dp->pdr_keeper, keptp))
5455 		if (pm_set_keeping(keeper, kept)) {
5456 			ASSERT(dp->pdr_satisfied == 0);
5457 			dp->pdr_satisfied = 1;
5458 			ASSERT(pm_unresolved_deps);
5459 			pm_unresolved_deps--;
5460 			ret++;
5461 		}
5462 	}
5463 	ddi_release_devi(kept);
5464 
5465 	return (ret);
5466 }
5467 
5468 /*
5469  * Called from common/io/pm.c
5470  */
5471 int
5472 pm_cur_power(pm_component_t *cp)
5473 {
5474 	return (cur_power(cp));
5475 }
5476 
5477 /*
5478  * External interface to sanity-check a power level.
5479  */
5480 int
5481 pm_valid_power(dev_info_t *dip, int comp, int level)
5482 {
5483 	PMD_FUNC(pmf, "valid_power")
5484 
5485 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5486 		return (e_pm_valid_power(dip, comp, level));
5487 	else {
5488 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5489 		    pmf, comp, PM_NUMCMPTS(dip), level))
5490 		return (0);
5491 	}
5492 }
5493 
5494 /*
5495  * Called when a device that is direct power managed needs to change state.
5496  * This routine arranges to block the request until the process managing
5497  * the device makes the change (or some other incompatible change) or
5498  * the process closes /dev/pm.
5499  */
5500 static int
5501 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5502 {
5503 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5504 	int ret = 0;
5505 	void pm_dequeue_blocked(pm_rsvp_t *);
5506 	void pm_enqueue_blocked(pm_rsvp_t *);
5507 
5508 	ASSERT(!pm_processes_stopped);
5509 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5510 	new->pr_dip = dip;
5511 	new->pr_comp = comp;
5512 	new->pr_newlevel = newpower;
5513 	new->pr_oldlevel = oldpower;
5514 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5515 	mutex_enter(&pm_rsvp_lock);
5516 	pm_enqueue_blocked(new);
5517 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5518 	    PM_CANBLOCK_BLOCK);
5519 	PM_UNLOCK_DIP(dip);
5520 	/*
5521 	 * truss may make the cv_wait_sig return prematurely
5522 	 */
5523 	while (ret == 0) {
5524 		/*
5525 		 * Normally there will be no user context involved, but if
5526 		 * there is (e.g. we are here via an ioctl call to a driver)
5527 		 * then we should allow the process to abort the request,
5528 		 * or we get an unkillable process if the same thread does
5529 		 * PM_DIRECT_PM and pm_raise_power
5530 		 */
5531 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5532 			ret = PMP_FAIL;
5533 		} else {
5534 			ret = new->pr_retval;
5535 		}
5536 	}
5537 	pm_dequeue_blocked(new);
5538 	mutex_exit(&pm_rsvp_lock);
5539 	cv_destroy(&new->pr_cv);
5540 	kmem_free(new, sizeof (*new));
5541 	return (ret);
5542 }
5543 
5544 /*
5545  * Returns true if the process is interested in power level changes (has issued
5546  * PM_GET_STATE_CHANGE ioctl).
5547  */
5548 int
5549 pm_interest_registered(int clone)
5550 {
5551 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5552 	return (pm_interest[clone]);
5553 }
5554 
5555 /*
5556  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5557  * watch all state transitions (dip == NULL).  Set up data
5558  * structs to communicate with process about state changes.
5559  */
5560 void
5561 pm_register_watcher(int clone, dev_info_t *dip)
5562 {
5563 	pscc_t	*p;
5564 	psce_t	*psce;
5565 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5566 
5567 	/*
5568 	 * We definitely need a control struct, then we have to search to see
5569 	 * there is already an entries struct (in the dip != NULL case).
5570 	 */
5571 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5572 	pscc->pscc_clone = clone;
5573 	pscc->pscc_dip = dip;
5574 
5575 	if (dip) {
5576 		int found = 0;
5577 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5578 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5579 			/*
5580 			 * Already an entry for this clone, so just use it
5581 			 * for the new one (for the case where a single
5582 			 * process is watching multiple devices)
5583 			 */
5584 			if (p->pscc_clone == clone) {
5585 				pscc->pscc_entries = p->pscc_entries;
5586 				pscc->pscc_entries->psce_references++;
5587 				found++;
5588 				break;
5589 			}
5590 		}
5591 		if (!found) {		/* create a new one */
5592 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5593 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5594 			psce->psce_first =
5595 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5596 			    KM_SLEEP);
5597 			psce->psce_in = psce->psce_out = psce->psce_first;
5598 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5599 			psce->psce_references = 1;
5600 			pscc->pscc_entries = psce;
5601 		}
5602 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5603 		rw_exit(&pm_pscc_direct_rwlock);
5604 	} else {
5605 		ASSERT(!pm_interest_registered(clone));
5606 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5607 #ifdef DEBUG
5608 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5609 			/*
5610 			 * Should not be an entry for this clone!
5611 			 */
5612 			ASSERT(p->pscc_clone != clone);
5613 		}
5614 #endif
5615 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5616 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5617 		    PSCCOUNT, KM_SLEEP);
5618 		psce->psce_in = psce->psce_out = psce->psce_first;
5619 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5620 		psce->psce_references = 1;
5621 		pscc->pscc_entries = psce;
5622 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5623 		pm_interest[clone] = 1;
5624 		rw_exit(&pm_pscc_interest_rwlock);
5625 	}
5626 }
5627 
5628 /*
5629  * Remove the given entry from the blocked list
5630  */
5631 void
5632 pm_dequeue_blocked(pm_rsvp_t *p)
5633 {
5634 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5635 	if (pm_blocked_list == p) {
5636 		ASSERT(p->pr_prev == NULL);
5637 		if (p->pr_next != NULL)
5638 			p->pr_next->pr_prev = NULL;
5639 		pm_blocked_list = p->pr_next;
5640 	} else {
5641 		ASSERT(p->pr_prev != NULL);
5642 		p->pr_prev->pr_next = p->pr_next;
5643 		if (p->pr_next != NULL)
5644 			p->pr_next->pr_prev = p->pr_prev;
5645 	}
5646 }
5647 
5648 /*
5649  * Remove the given control struct from the given list
5650  */
5651 static void
5652 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5653 {
5654 	if (*list == p) {
5655 		ASSERT(p->pscc_prev == NULL);
5656 		if (p->pscc_next != NULL)
5657 			p->pscc_next->pscc_prev = NULL;
5658 		*list = p->pscc_next;
5659 	} else {
5660 		ASSERT(p->pscc_prev != NULL);
5661 		p->pscc_prev->pscc_next = p->pscc_next;
5662 		if (p->pscc_next != NULL)
5663 			p->pscc_next->pscc_prev = p->pscc_prev;
5664 	}
5665 }
5666 
5667 /*
5668  * Stick the control struct specified on the front of the list
5669  */
5670 static void
5671 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5672 {
5673 	pscc_t *h;	/* entry at head of list */
5674 	if ((h = *list) == NULL) {
5675 		*list = p;
5676 		ASSERT(p->pscc_next == NULL);
5677 		ASSERT(p->pscc_prev == NULL);
5678 	} else {
5679 		p->pscc_next = h;
5680 		ASSERT(h->pscc_prev == NULL);
5681 		h->pscc_prev = p;
5682 		ASSERT(p->pscc_prev == NULL);
5683 		*list = p;
5684 	}
5685 }
5686 
5687 /*
5688  * If dip is NULL, process is closing "clone" clean up all its registrations.
5689  * Otherwise only clean up those for dip because process is just giving up
5690  * control of a direct device.
5691  */
5692 void
5693 pm_deregister_watcher(int clone, dev_info_t *dip)
5694 {
5695 	pscc_t	*p, *pn;
5696 	psce_t	*psce;
5697 	int found = 0;
5698 
5699 	if (dip == NULL) {
5700 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5701 		for (p = pm_pscc_interest; p; p = pn) {
5702 			pn = p->pscc_next;
5703 			if (p->pscc_clone == clone) {
5704 				pm_dequeue_pscc(p, &pm_pscc_interest);
5705 				psce = p->pscc_entries;
5706 				ASSERT(psce->psce_references == 1);
5707 				mutex_destroy(&psce->psce_lock);
5708 				kmem_free(psce->psce_first,
5709 				    sizeof (pm_state_change_t) * PSCCOUNT);
5710 				kmem_free(psce, sizeof (*psce));
5711 				kmem_free(p, sizeof (*p));
5712 			}
5713 		}
5714 		pm_interest[clone] = 0;
5715 		rw_exit(&pm_pscc_interest_rwlock);
5716 	}
5717 	found = 0;
5718 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5719 	for (p = pm_pscc_direct; p; p = pn) {
5720 		pn = p->pscc_next;
5721 		if ((dip && p->pscc_dip == dip) ||
5722 		    (dip == NULL && clone == p->pscc_clone)) {
5723 			ASSERT(clone == p->pscc_clone);
5724 			found++;
5725 			/*
5726 			 * Remove from control list
5727 			 */
5728 			pm_dequeue_pscc(p, &pm_pscc_direct);
5729 			/*
5730 			 * If we're the last reference, free the
5731 			 * entries struct.
5732 			 */
5733 			psce = p->pscc_entries;
5734 			ASSERT(psce);
5735 			if (psce->psce_references == 1) {
5736 				kmem_free(psce->psce_first,
5737 				    PSCCOUNT * sizeof (pm_state_change_t));
5738 				kmem_free(psce, sizeof (*psce));
5739 			} else {
5740 				psce->psce_references--;
5741 			}
5742 			kmem_free(p, sizeof (*p));
5743 		}
5744 	}
5745 	ASSERT(dip == NULL || found);
5746 	rw_exit(&pm_pscc_direct_rwlock);
5747 }
5748 
5749 /*
5750  * Search the indicated list for an entry that matches clone, and return a
5751  * pointer to it.  To be interesting, the entry must have something ready to
5752  * be passed up to the controlling process.
5753  * The returned entry will be locked upon return from this call.
5754  */
5755 static psce_t *
5756 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5757 {
5758 	pscc_t	*p;
5759 	psce_t	*psce;
5760 	rw_enter(lock, RW_READER);
5761 	for (p = *list; p; p = p->pscc_next) {
5762 		if (clone == p->pscc_clone) {
5763 			psce = p->pscc_entries;
5764 			mutex_enter(&psce->psce_lock);
5765 			if (psce->psce_out->size) {
5766 				rw_exit(lock);
5767 				return (psce);
5768 			} else {
5769 				mutex_exit(&psce->psce_lock);
5770 			}
5771 		}
5772 	}
5773 	rw_exit(lock);
5774 	return (NULL);
5775 }
5776 
5777 /*
5778  * Find an entry for a particular clone in the direct list.
5779  */
5780 psce_t *
5781 pm_psc_clone_to_direct(int clone)
5782 {
5783 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5784 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5785 	    &pm_pscc_direct_rwlock));
5786 }
5787 
5788 /*
5789  * Find an entry for a particular clone in the interest list.
5790  */
5791 psce_t *
5792 pm_psc_clone_to_interest(int clone)
5793 {
5794 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5795 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5796 	    &pm_pscc_interest_rwlock));
5797 }
5798 
5799 /*
5800  * Put the given entry at the head of the blocked list
5801  */
5802 void
5803 pm_enqueue_blocked(pm_rsvp_t *p)
5804 {
5805 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5806 	ASSERT(p->pr_next == NULL);
5807 	ASSERT(p->pr_prev == NULL);
5808 	if (pm_blocked_list != NULL) {
5809 		p->pr_next = pm_blocked_list;
5810 		ASSERT(pm_blocked_list->pr_prev == NULL);
5811 		pm_blocked_list->pr_prev = p;
5812 		pm_blocked_list = p;
5813 	} else {
5814 		pm_blocked_list = p;
5815 	}
5816 }
5817 
5818 /*
5819  * Sets every power managed device back to its default threshold
5820  */
5821 void
5822 pm_all_to_default_thresholds(void)
5823 {
5824 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5825 	    (void *) &pm_system_idle_threshold);
5826 }
5827 
5828 static int
5829 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5830 {
5831 	int thr = (int)(*(int *)arg);
5832 
5833 	if (!PM_GET_PM_INFO(dip))
5834 		return (DDI_WALK_CONTINUE);
5835 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5836 	return (DDI_WALK_CONTINUE);
5837 }
5838 
5839 /*
5840  * Returns the current threshold value (in seconds) for the indicated component
5841  */
5842 int
5843 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5844 {
5845 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5846 		return (DDI_FAILURE);
5847 	} else {
5848 		*threshp = cur_threshold(dip, comp);
5849 		return (DDI_SUCCESS);
5850 	}
5851 }
5852 
5853 /*
5854  * To be called when changing the power level of a component of a device.
5855  * On some platforms, changing power on one device may require that power
5856  * be changed on other, related devices in the same transaction.  Thus, we
5857  * always pass this request to the platform power manager so that all the
5858  * affected devices will be locked.
5859  */
5860 void
5861 pm_lock_power(dev_info_t *dip, int *circp)
5862 {
5863 	power_req_t power_req;
5864 	int result;
5865 
5866 	power_req.request_type = PMR_PPM_LOCK_POWER;
5867 	power_req.req.ppm_lock_power_req.who = dip;
5868 	power_req.req.ppm_lock_power_req.circp = circp;
5869 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5870 }
5871 
5872 /*
5873  * Release the lock (or locks) acquired to change the power of a device.
5874  * See comments for pm_lock_power.
5875  */
5876 void
5877 pm_unlock_power(dev_info_t *dip, int circ)
5878 {
5879 	power_req_t power_req;
5880 	int result;
5881 
5882 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5883 	power_req.req.ppm_unlock_power_req.who = dip;
5884 	power_req.req.ppm_unlock_power_req.circ = circ;
5885 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5886 }
5887 
5888 
5889 /*
5890  * Attempt (without blocking) to acquire the lock(s) needed to change the
5891  * power of a component of a device.  See comments for pm_lock_power.
5892  *
5893  * Return: 1 if lock(s) acquired, 0 if not.
5894  */
5895 int
5896 pm_try_locking_power(dev_info_t *dip, int *circp)
5897 {
5898 	power_req_t power_req;
5899 	int result;
5900 
5901 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5902 	power_req.req.ppm_lock_power_req.who = dip;
5903 	power_req.req.ppm_lock_power_req.circp = circp;
5904 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5905 	return (result);
5906 }
5907 
5908 
5909 /*
5910  * Lock power state of a device.
5911  *
5912  * The implementation handles a special case where another thread may have
5913  * acquired the lock and created/launched this thread to do the work.  If
5914  * the lock cannot be acquired immediately, we check to see if this thread
5915  * is registered as a borrower of the lock.  If so, we may proceed without
5916  * the lock.  This assumes that the lending thread blocks on the completion
5917  * of this thread.
5918  *
5919  * Note 1: for use by ppm only.
5920  *
5921  * Note 2: On failing to get the lock immediately, we search lock_loan list
5922  * for curthread (as borrower of the lock).  On a hit, we check that the
5923  * lending thread already owns the lock we want.  It is safe to compare
5924  * devi_busy_thread and thread id of the lender because in the == case (the
5925  * only one we care about) we know that the owner is blocked.  Similarly,
5926  * If we find that curthread isn't registered as a lock borrower, it is safe
5927  * to use the blocking call (ndi_devi_enter) because we know that if we
5928  * weren't already listed as a borrower (upstream on the call stack) we won't
5929  * become one.
5930  */
5931 void
5932 pm_lock_power_single(dev_info_t *dip, int *circp)
5933 {
5934 	lock_loan_t *cur;
5935 
5936 	/* if the lock is available, we are done. */
5937 	if (ndi_devi_tryenter(dip, circp))
5938 		return;
5939 
5940 	mutex_enter(&pm_loan_lock);
5941 	/* see if our thread is registered as a lock borrower. */
5942 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5943 		if (cur->pmlk_borrower == curthread)
5944 			break;
5945 	mutex_exit(&pm_loan_lock);
5946 
5947 	/* if this thread not already registered, it is safe to block */
5948 	if (cur == NULL)
5949 		ndi_devi_enter(dip, circp);
5950 	else {
5951 		/* registered: does lender own the lock we want? */
5952 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5953 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5954 			cur->pmlk_dip = dip;
5955 		} else /* no: just block for it */
5956 			ndi_devi_enter(dip, circp);
5957 
5958 	}
5959 }
5960 
5961 /*
5962  * Drop the lock on the device's power state.  See comment for
5963  * pm_lock_power_single() for special implementation considerations.
5964  *
5965  * Note: for use by ppm only.
5966  */
5967 void
5968 pm_unlock_power_single(dev_info_t *dip, int circ)
5969 {
5970 	lock_loan_t *cur;
5971 
5972 	/* optimization: mutex not needed to check empty list */
5973 	if (lock_loan_head.pmlk_next == NULL) {
5974 		ndi_devi_exit(dip, circ);
5975 		return;
5976 	}
5977 
5978 	mutex_enter(&pm_loan_lock);
5979 	/* see if our thread is registered as a lock borrower. */
5980 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5981 		if (cur->pmlk_borrower == curthread)
5982 			break;
5983 	mutex_exit(&pm_loan_lock);
5984 
5985 	if (cur == NULL || cur->pmlk_dip != dip)
5986 		/* we acquired the lock directly, so return it */
5987 		ndi_devi_exit(dip, circ);
5988 }
5989 
5990 /*
5991  * Try to take the lock for changing the power level of a component.
5992  *
5993  * Note: for use by ppm only.
5994  */
5995 int
5996 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5997 {
5998 	return (ndi_devi_tryenter(dip, circp));
5999 }
6000 
6001 #ifdef	DEBUG
6002 /*
6003  * The following are used only to print out data structures for debugging
6004  */
6005 void
6006 prdeps(char *msg)
6007 {
6008 
6009 	pm_pdr_t *rp;
6010 	int i;
6011 
6012 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
6013 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
6014 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
6015 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
6016 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
6017 		    (void *)rp->pdr_next);
6018 		if (rp->pdr_kept_count != 0) {
6019 			pm_log("kept list = ");
6020 			i = 0;
6021 			while (i < rp->pdr_kept_count) {
6022 				pm_log("%s ", rp->pdr_kept_paths[i]);
6023 				i++;
6024 			}
6025 			pm_log("\n");
6026 		}
6027 	}
6028 }
6029 
6030 void
6031 pr_noinvol(char *hdr)
6032 {
6033 	pm_noinvol_t *ip;
6034 
6035 	pm_log("%s\n", hdr);
6036 	rw_enter(&pm_noinvol_rwlock, RW_READER);
6037 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
6038 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
6039 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
6040 	rw_exit(&pm_noinvol_rwlock);
6041 }
6042 #endif
6043 
6044 /*
6045  * Attempt to apply the thresholds indicated by rp to the node specified by
6046  * dip.
6047  */
6048 void
6049 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6050 {
6051 	PMD_FUNC(pmf, "apply_recorded_thresh")
6052 	int i, j;
6053 	int comps = PM_NUMCMPTS(dip);
6054 	struct pm_component *cp;
6055 	pm_pte_t *ep;
6056 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
6057 
6058 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
6059 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
6060 	PM_LOCK_DIP(dip);
6061 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
6062 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
6063 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
6064 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
6065 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
6066 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
6067 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
6068 		PM_UNLOCK_DIP(dip);
6069 		return;
6070 	}
6071 
6072 	ep = rp->ptr_entries;
6073 	/*
6074 	 * Here we do the special case of a device threshold
6075 	 */
6076 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6077 		ASSERT(ep && ep->pte_numthresh == 1);
6078 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6079 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6080 		PM_UNLOCK_DIP(dip);
6081 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6082 		if (PM_SCANABLE(dip))
6083 			pm_rescan(dip);
6084 		return;
6085 	}
6086 	for (i = 0; i < comps; i++) {
6087 		cp = PM_CP(dip, i);
6088 		for (j = 0; j < ep->pte_numthresh; j++) {
6089 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6090 			    "to %x\n", pmf, j, PM_DEVICE(dip),
6091 			    i, ep->pte_thresh[j]))
6092 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6093 		}
6094 		ep++;
6095 	}
6096 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6097 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6098 	PM_UNLOCK_DIP(dip);
6099 
6100 	if (PM_SCANABLE(dip))
6101 		pm_rescan(dip);
6102 }
6103 
6104 /*
6105  * Returns true if the threshold specified by rp could be applied to dip
6106  * (that is, the number of components and transitions are the same)
6107  */
6108 int
6109 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6110 {
6111 	PMD_FUNC(pmf, "valid_thresh")
6112 	int comps, i;
6113 	pm_component_t *cp;
6114 	pm_pte_t *ep;
6115 
6116 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6117 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6118 		    rp->ptr_physpath))
6119 		return (0);
6120 	}
6121 	/*
6122 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6123 	 * an entry with numcomps == 0, (since we don't know how many
6124 	 * components there are in advance).  This is always a valid
6125 	 * spec.
6126 	 */
6127 	if (rp->ptr_numcomps == 0) {
6128 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6129 		return (1);
6130 	}
6131 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6132 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6133 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6134 		return (0);
6135 	}
6136 	ep = rp->ptr_entries;
6137 	for (i = 0; i < comps; i++) {
6138 		cp = PM_CP(dip, i);
6139 		if ((ep + i)->pte_numthresh !=
6140 		    cp->pmc_comp.pmc_numlevels - 1) {
6141 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6142 			    pmf, rp->ptr_physpath, i,
6143 			    cp->pmc_comp.pmc_numlevels - 1,
6144 			    (ep + i)->pte_numthresh))
6145 			return (0);
6146 		}
6147 	}
6148 	return (1);
6149 }
6150 
6151 /*
6152  * Remove any recorded threshold for device physpath
6153  * We know there will be at most one.
6154  */
6155 void
6156 pm_unrecord_threshold(char *physpath)
6157 {
6158 	pm_thresh_rec_t *pptr, *ptr;
6159 
6160 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6161 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6162 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6163 			if (pptr) {
6164 				pptr->ptr_next = ptr->ptr_next;
6165 			} else {
6166 				ASSERT(pm_thresh_head == ptr);
6167 				pm_thresh_head = ptr->ptr_next;
6168 			}
6169 			kmem_free(ptr, ptr->ptr_size);
6170 			break;
6171 		}
6172 		pptr = ptr;
6173 	}
6174 	rw_exit(&pm_thresh_rwlock);
6175 }
6176 
6177 /*
6178  * Discard all recorded thresholds.  We are returning to the default pm state.
6179  */
6180 void
6181 pm_discard_thresholds(void)
6182 {
6183 	pm_thresh_rec_t *rp;
6184 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6185 	while (pm_thresh_head) {
6186 		rp = pm_thresh_head;
6187 		pm_thresh_head = rp->ptr_next;
6188 		kmem_free(rp, rp->ptr_size);
6189 	}
6190 	rw_exit(&pm_thresh_rwlock);
6191 }
6192 
6193 /*
6194  * Discard all recorded dependencies.  We are returning to the default pm state.
6195  */
6196 void
6197 pm_discard_dependencies(void)
6198 {
6199 	pm_pdr_t *rp;
6200 	int i;
6201 	size_t length;
6202 
6203 #ifdef DEBUG
6204 	if (pm_debug & PMD_DEP)
6205 		prdeps("Before discard\n");
6206 #endif
6207 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6208 
6209 #ifdef DEBUG
6210 	if (pm_debug & PMD_DEP)
6211 		prdeps("After discard\n");
6212 #endif
6213 	while (pm_dep_head) {
6214 		rp = pm_dep_head;
6215 		if (!rp->pdr_isprop) {
6216 			ASSERT(rp->pdr_satisfied == 0);
6217 			ASSERT(pm_unresolved_deps);
6218 			pm_unresolved_deps--;
6219 		} else {
6220 			ASSERT(pm_prop_deps);
6221 			pm_prop_deps--;
6222 		}
6223 		pm_dep_head = rp->pdr_next;
6224 		if (rp->pdr_kept_count)  {
6225 			for (i = 0; i < rp->pdr_kept_count; i++) {
6226 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6227 				kmem_free(rp->pdr_kept_paths[i], length);
6228 			}
6229 			kmem_free(rp->pdr_kept_paths,
6230 			    rp->pdr_kept_count * sizeof (char **));
6231 		}
6232 		kmem_free(rp, rp->pdr_size);
6233 	}
6234 }
6235 
6236 
6237 static int
6238 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6239 {
6240 	_NOTE(ARGUNUSED(arg))
6241 	char *pathbuf;
6242 
6243 	if (PM_GET_PM_INFO(dip) == NULL)
6244 		return (DDI_WALK_CONTINUE);
6245 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6246 	(void) ddi_pathname(dip, pathbuf);
6247 	pm_free_keeper(pathbuf, 0);
6248 	kmem_free(pathbuf, MAXPATHLEN);
6249 	return (DDI_WALK_CONTINUE);
6250 }
6251 
6252 static int
6253 pm_kept_walk(dev_info_t *dip, void *arg)
6254 {
6255 	_NOTE(ARGUNUSED(arg))
6256 	char *pathbuf;
6257 
6258 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6259 	(void) ddi_pathname(dip, pathbuf);
6260 	(void) pm_kept(pathbuf);
6261 	kmem_free(pathbuf, MAXPATHLEN);
6262 
6263 	return (DDI_WALK_CONTINUE);
6264 }
6265 
6266 static int
6267 pm_keeper_walk(dev_info_t *dip, void *arg)
6268 {
6269 	_NOTE(ARGUNUSED(arg))
6270 	char *pathbuf;
6271 
6272 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6273 	(void) ddi_pathname(dip, pathbuf);
6274 	(void) pm_keeper(pathbuf);
6275 	kmem_free(pathbuf, MAXPATHLEN);
6276 
6277 	return (DDI_WALK_CONTINUE);
6278 }
6279 
6280 static char *
6281 pdw_type_decode(int type)
6282 {
6283 	switch (type) {
6284 	case PM_DEP_WK_POWER_ON:
6285 		return ("power on");
6286 	case PM_DEP_WK_POWER_OFF:
6287 		return ("power off");
6288 	case PM_DEP_WK_DETACH:
6289 		return ("detach");
6290 	case PM_DEP_WK_REMOVE_DEP:
6291 		return ("remove dep");
6292 	case PM_DEP_WK_BRINGUP_SELF:
6293 		return ("bringup self");
6294 	case PM_DEP_WK_RECORD_KEEPER:
6295 		return ("add dependent");
6296 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6297 		return ("add dependent property");
6298 	case PM_DEP_WK_KEPT:
6299 		return ("kept");
6300 	case PM_DEP_WK_KEEPER:
6301 		return ("keeper");
6302 	case PM_DEP_WK_ATTACH:
6303 		return ("attach");
6304 	case PM_DEP_WK_CHECK_KEPT:
6305 		return ("check kept");
6306 	case PM_DEP_WK_CPR_SUSPEND:
6307 		return ("suspend");
6308 	case PM_DEP_WK_CPR_RESUME:
6309 		return ("resume");
6310 	default:
6311 		return ("unknown");
6312 	}
6313 
6314 }
6315 
6316 static void
6317 pm_rele_dep(char *keeper)
6318 {
6319 	PMD_FUNC(pmf, "rele_dep")
6320 	pm_pdr_t *dp;
6321 	char *kept_path = NULL;
6322 	dev_info_t *kept = NULL;
6323 	int count = 0;
6324 
6325 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6326 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6327 			continue;
6328 		for (count = 0; count < dp->pdr_kept_count; count++) {
6329 			kept_path = dp->pdr_kept_paths[count];
6330 			if (kept_path == NULL)
6331 				continue;
6332 			kept = pm_name_to_dip(kept_path, 1);
6333 			if (kept) {
6334 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6335 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6336 				    keeper))
6337 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6338 				pm_rele_power(kept);
6339 				ddi_release_devi(kept);
6340 			}
6341 		}
6342 	}
6343 }
6344 
6345 /*
6346  * Called when we are just released from direct PM.  Bring ourself up
6347  * if our keeper is up since dependency is not honored while a kept
6348  * device is under direct PM.
6349  */
6350 static void
6351 pm_bring_self_up(char *keptpath)
6352 {
6353 	PMD_FUNC(pmf, "bring_self_up")
6354 	dev_info_t *kept;
6355 	dev_info_t *keeper;
6356 	pm_pdr_t *dp;
6357 	int i, j;
6358 	int up = 0, circ;
6359 
6360 	kept = pm_name_to_dip(keptpath, 1);
6361 	if (kept == NULL)
6362 		return;
6363 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6364 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6365 		if (dp->pdr_kept_count == 0)
6366 			continue;
6367 		for (i = 0; i < dp->pdr_kept_count; i++) {
6368 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6369 				continue;
6370 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6371 			if (keeper) {
6372 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6373 				    pmf, PM_DEVICE(keeper)))
6374 				PM_LOCK_POWER(keeper, &circ);
6375 				for (j = 0; j < PM_NUMCMPTS(keeper);
6376 				    j++) {
6377 					if (PM_CURPOWER(keeper, j)) {
6378 						PMD(PMD_KEEPS, ("%s: comp="
6379 						    "%d is up\n", pmf, j))
6380 						up++;
6381 					}
6382 				}
6383 				if (up) {
6384 					if (PM_SKBU(kept))
6385 						DEVI(kept)->devi_pm_flags &=
6386 						    ~PMC_SKIP_BRINGUP;
6387 					bring_pmdep_up(kept, 1);
6388 				}
6389 				PM_UNLOCK_POWER(keeper, circ);
6390 				ddi_release_devi(keeper);
6391 			}
6392 		}
6393 	}
6394 	ddi_release_devi(kept);
6395 }
6396 
6397 static void
6398 pm_process_dep_request(pm_dep_wk_t *work)
6399 {
6400 	PMD_FUNC(pmf, "dep_req")
6401 	int ret;
6402 
6403 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6404 	    pdw_type_decode(work->pdw_type)))
6405 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6406 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6407 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6408 
6409 	switch (work->pdw_type) {
6410 	case PM_DEP_WK_POWER_ON:
6411 		/* Bring up the kept devices and put a hold on them */
6412 		bring_wekeeps_up(work->pdw_keeper);
6413 		break;
6414 	case PM_DEP_WK_POWER_OFF:
6415 		/* Release the kept devices */
6416 		pm_rele_dep(work->pdw_keeper);
6417 		break;
6418 	case PM_DEP_WK_DETACH:
6419 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6420 		break;
6421 	case PM_DEP_WK_REMOVE_DEP:
6422 		pm_discard_dependencies();
6423 		break;
6424 	case PM_DEP_WK_BRINGUP_SELF:
6425 		/*
6426 		 * We deferred satisfying our dependency till now, so satisfy
6427 		 * it again and bring ourselves up.
6428 		 */
6429 		pm_bring_self_up(work->pdw_kept);
6430 		break;
6431 	case PM_DEP_WK_RECORD_KEEPER:
6432 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6433 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6434 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6435 		break;
6436 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6437 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6438 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6439 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6440 		break;
6441 	case PM_DEP_WK_KEPT:
6442 		ret = pm_kept(work->pdw_kept);
6443 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6444 		    ret))
6445 		break;
6446 	case PM_DEP_WK_KEEPER:
6447 		ret = pm_keeper(work->pdw_keeper);
6448 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6449 		    pmf, ret))
6450 		break;
6451 	case PM_DEP_WK_ATTACH:
6452 		ret = pm_keeper(work->pdw_keeper);
6453 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6454 		    pmf, ret))
6455 		ret = pm_kept(work->pdw_kept);
6456 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6457 		    pmf, ret))
6458 		break;
6459 	case PM_DEP_WK_CHECK_KEPT:
6460 		ret = pm_is_kept(work->pdw_kept);
6461 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6462 		    pmf, work->pdw_kept, ret))
6463 		break;
6464 	case PM_DEP_WK_CPR_SUSPEND:
6465 		pm_discard_dependencies();
6466 		break;
6467 	case PM_DEP_WK_CPR_RESUME:
6468 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6469 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6470 		break;
6471 	default:
6472 		ASSERT(0);
6473 		break;
6474 	}
6475 	/*
6476 	 * Free the work structure if the requester is not waiting
6477 	 * Otherwise it is the requester's responsiblity to free it.
6478 	 */
6479 	if (!work->pdw_wait) {
6480 		if (work->pdw_keeper)
6481 			kmem_free(work->pdw_keeper,
6482 			    strlen(work->pdw_keeper) + 1);
6483 		if (work->pdw_kept)
6484 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6485 		kmem_free(work, sizeof (pm_dep_wk_t));
6486 	} else {
6487 		/*
6488 		 * Notify requester if it is waiting for it.
6489 		 */
6490 		work->pdw_ret = ret;
6491 		work->pdw_done = 1;
6492 		cv_signal(&work->pdw_cv);
6493 	}
6494 }
6495 
6496 /*
6497  * Process PM dependency requests.
6498  */
6499 static void
6500 pm_dep_thread(void)
6501 {
6502 	pm_dep_wk_t *work;
6503 	callb_cpr_t cprinfo;
6504 
6505 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6506 	    "pm_dep_thread");
6507 	for (;;) {
6508 		mutex_enter(&pm_dep_thread_lock);
6509 		if (pm_dep_thread_workq == NULL) {
6510 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6511 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6512 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6513 		}
6514 		work = pm_dep_thread_workq;
6515 		pm_dep_thread_workq = work->pdw_next;
6516 		if (pm_dep_thread_tail == work)
6517 			pm_dep_thread_tail = work->pdw_next;
6518 		mutex_exit(&pm_dep_thread_lock);
6519 		pm_process_dep_request(work);
6520 
6521 	}
6522 	/*NOTREACHED*/
6523 }
6524 
6525 /*
6526  * Set the power level of the indicated device to unknown (if it is not a
6527  * backwards compatible device), as it has just been resumed, and it won't
6528  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6529  */
6530 void
6531 pm_forget_power_level(dev_info_t *dip)
6532 {
6533 	dev_info_t *pdip = ddi_get_parent(dip);
6534 	int i, count = 0;
6535 
6536 	if (!PM_ISBC(dip)) {
6537 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6538 			count += (PM_CURPOWER(dip, i) == 0);
6539 
6540 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6541 			e_pm_hold_rele_power(pdip, count);
6542 
6543 		/*
6544 		 * Count this as a power cycle if we care
6545 		 */
6546 		if (DEVI(dip)->devi_pm_volpmd &&
6547 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6548 			DEVI(dip)->devi_pm_volpmd = 0;
6549 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6550 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6551 	}
6552 }
6553 
6554 /*
6555  * This function advises the caller whether it should make a power-off
6556  * transition at this time or not.  If the transition is not advised
6557  * at this time, the time that the next power-off transition can
6558  * be made from now is returned through "intervalp" pointer.
6559  * This function returns:
6560  *
6561  *  1  power-off advised
6562  *  0  power-off not advised, intervalp will point to seconds from
6563  *	  now that a power-off is advised.  If it is passed the number
6564  *	  of years that policy specifies the device should last,
6565  *	  a large number is returned as the time interval.
6566  *  -1  error
6567  */
6568 int
6569 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6570 {
6571 	PMD_FUNC(pmf, "pm_trans_check")
6572 	char dbuf[DC_SCSI_MFR_LEN];
6573 	struct pm_scsi_cycles *scp;
6574 	int service_years, service_weeks, full_years;
6575 	time_t now, service_seconds, tdiff;
6576 	time_t within_year, when_allowed;
6577 	char *ptr;
6578 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6579 	int cycles_diff, cycles_over;
6580 
6581 	if (datap == NULL) {
6582 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6583 		return (-1);
6584 	}
6585 
6586 	if (datap->format == DC_SCSI_FORMAT) {
6587 		/*
6588 		 * Power cycles of the scsi drives are distributed
6589 		 * over 5 years with the following percentage ratio:
6590 		 *
6591 		 *	30%, 25%, 20%, 15%, and 10%
6592 		 *
6593 		 * The power cycle quota for each year is distributed
6594 		 * linearly through out the year.  The equation for
6595 		 * determining the expected cycles is:
6596 		 *
6597 		 *	e = a * (n / y)
6598 		 *
6599 		 * e = expected cycles
6600 		 * a = allocated cycles for this year
6601 		 * n = number of seconds since beginning of this year
6602 		 * y = number of seconds in a year
6603 		 *
6604 		 * Note that beginning of the year starts the day that
6605 		 * the drive has been put on service.
6606 		 *
6607 		 * If the drive has passed its expected cycles, we
6608 		 * can determine when it can start to power cycle
6609 		 * again to keep it on track to meet the 5-year
6610 		 * life expectancy.  The equation for determining
6611 		 * when to power cycle is:
6612 		 *
6613 		 *	w = y * (c / a)
6614 		 *
6615 		 * w = when it can power cycle again
6616 		 * y = number of seconds in a year
6617 		 * c = current number of cycles
6618 		 * a = allocated cycles for the year
6619 		 *
6620 		 */
6621 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6622 
6623 		scp = &datap->un.scsi_cycles;
6624 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6625 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6626 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6627 		if (scp->ncycles < 0 || scp->flag != 0) {
6628 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6629 			return (-1);
6630 		}
6631 
6632 		if (scp->ncycles > scp->lifemax) {
6633 			*intervalp = (LONG_MAX / hz);
6634 			return (0);
6635 		}
6636 
6637 		/*
6638 		 * convert service date to time_t
6639 		 */
6640 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6641 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6642 		ptr = dbuf;
6643 		service_years = stoi(&ptr) - EPOCH_YEAR;
6644 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6645 		    DC_SCSI_WEEK_LEN);
6646 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6647 
6648 		/*
6649 		 * scsi standard does not specify WW data,
6650 		 * could be (00-51) or (01-52)
6651 		 */
6652 		ptr = dbuf;
6653 		service_weeks = stoi(&ptr);
6654 		if (service_years < 0 ||
6655 		    service_weeks < 0 || service_weeks > 52) {
6656 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6657 			    pmf, service_years, service_weeks))
6658 			return (-1);
6659 		}
6660 
6661 		/*
6662 		 * calculate service date in seconds-since-epoch,
6663 		 * adding one day for each leap-year.
6664 		 *
6665 		 * (years-since-epoch + 2) fixes integer truncation,
6666 		 * example: (8) leap-years during [1972, 2000]
6667 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6668 		 */
6669 		service_seconds = (service_years * DC_SPY) +
6670 		    (service_weeks * DC_SPW) +
6671 		    (((service_years + 2) / 4) * DC_SPD);
6672 
6673 		now = gethrestime_sec();
6674 		/*
6675 		 * since the granularity of 'svc_date' is day not second,
6676 		 * 'now' should be rounded up to full day.
6677 		 */
6678 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6679 		if (service_seconds > now) {
6680 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6681 			    "than now (%ld)!\n", pmf, service_seconds, now))
6682 			return (-1);
6683 		}
6684 
6685 		tdiff = now - service_seconds;
6686 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6687 
6688 		/*
6689 		 * NOTE - Leap years are not considered in the calculations
6690 		 * below.
6691 		 */
6692 		full_years = (tdiff / DC_SPY);
6693 		if ((full_years >= DC_SCSI_NPY) &&
6694 		    (scp->ncycles <= scp->lifemax))
6695 			return (1);
6696 
6697 		/*
6698 		 * Determine what is the normal cycle usage for the
6699 		 * device at the beginning and the end of this year.
6700 		 */
6701 		lower_bound_cycles = (!full_years) ? 0 :
6702 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6703 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6704 
6705 		if (scp->ncycles <= lower_bound_cycles)
6706 			return (1);
6707 
6708 		/*
6709 		 * The linear slope that determines how many cycles
6710 		 * are allowed this year is number of seconds
6711 		 * passed this year over total number of seconds in a year.
6712 		 */
6713 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6714 		within_year = (tdiff % DC_SPY);
6715 		cycles_allowed = lower_bound_cycles +
6716 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6717 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6718 		    full_years, within_year))
6719 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6720 		    cycles_allowed))
6721 
6722 		if (scp->ncycles <= cycles_allowed)
6723 			return (1);
6724 
6725 		/*
6726 		 * The transition is not advised now but we can
6727 		 * determine when the next transition can be made.
6728 		 *
6729 		 * Depending on how many cycles the device has been
6730 		 * over-used, we may need to skip years with
6731 		 * different percentage quota in order to determine
6732 		 * when the next transition can be made.
6733 		 */
6734 		cycles_over = (scp->ncycles - lower_bound_cycles);
6735 		while (cycles_over > cycles_diff) {
6736 			full_years++;
6737 			if (full_years >= DC_SCSI_NPY) {
6738 				*intervalp = (LONG_MAX / hz);
6739 				return (0);
6740 			}
6741 			cycles_over -= cycles_diff;
6742 			lower_bound_cycles = upper_bound_cycles;
6743 			upper_bound_cycles =
6744 			    (scp->lifemax * pcnt[full_years]) / 100;
6745 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6746 		}
6747 
6748 		/*
6749 		 * The linear slope that determines when the next transition
6750 		 * can be made is the relative position of used cycles within a
6751 		 * year over total number of cycles within that year.
6752 		 */
6753 		when_allowed = service_seconds + (full_years * DC_SPY) +
6754 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6755 		*intervalp = (when_allowed - now);
6756 		if (*intervalp > (LONG_MAX / hz))
6757 			*intervalp = (LONG_MAX / hz);
6758 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6759 		    *intervalp))
6760 		return (0);
6761 	}
6762 
6763 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6764 	return (-1);
6765 }
6766 
6767 /*
6768  * Nexus drivers call into pm framework to indicate which child driver is about
6769  * to be installed.  In some platforms, ppm may need to configure the hardware
6770  * for successful installation of a driver.
6771  */
6772 int
6773 pm_init_child(dev_info_t *dip)
6774 {
6775 	power_req_t power_req;
6776 
6777 	ASSERT(ddi_binding_name(dip));
6778 	ASSERT(ddi_get_name_addr(dip));
6779 	pm_ppm_claim(dip);
6780 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6781 		power_req.request_type = PMR_PPM_INIT_CHILD;
6782 		power_req.req.ppm_config_req.who = dip;
6783 		ASSERT(PPM(dip) != NULL);
6784 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6785 		    NULL));
6786 	} else {
6787 #ifdef DEBUG
6788 		/* pass it to the default handler so we can debug things */
6789 		power_req.request_type = PMR_PPM_INIT_CHILD;
6790 		power_req.req.ppm_config_req.who = dip;
6791 		(void) pm_ctlops(NULL, dip,
6792 		    DDI_CTLOPS_POWER, &power_req, NULL);
6793 #endif
6794 	}
6795 	return (DDI_SUCCESS);
6796 }
6797 
6798 /*
6799  * Bring parent of a node that is about to be probed up to full power, and
6800  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6801  * it is time to let it go down again
6802  */
6803 void
6804 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6805 {
6806 	int result;
6807 	power_req_t power_req;
6808 
6809 	bzero(cp, sizeof (*cp));
6810 	cp->ppc_dip = dip;
6811 
6812 	pm_ppm_claim(dip);
6813 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6814 		power_req.request_type = PMR_PPM_PRE_PROBE;
6815 		power_req.req.ppm_config_req.who = dip;
6816 		ASSERT(PPM(dip) != NULL);
6817 		(void) pm_ctlops(PPM(dip), dip,
6818 		    DDI_CTLOPS_POWER, &power_req, &result);
6819 		cp->ppc_ppm = PPM(dip);
6820 	} else {
6821 #ifdef DEBUG
6822 		/* pass it to the default handler so we can debug things */
6823 		power_req.request_type = PMR_PPM_PRE_PROBE;
6824 		power_req.req.ppm_config_req.who = dip;
6825 		(void) pm_ctlops(NULL, dip,
6826 		    DDI_CTLOPS_POWER, &power_req, &result);
6827 #endif
6828 		cp->ppc_ppm = NULL;
6829 	}
6830 }
6831 
6832 int
6833 pm_pre_config(dev_info_t *dip, char *devnm)
6834 {
6835 	PMD_FUNC(pmf, "pre_config")
6836 	int ret;
6837 
6838 	if (MDI_VHCI(dip)) {
6839 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6840 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6841 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6842 	} else if (!PM_GET_PM_INFO(dip))
6843 		return (DDI_SUCCESS);
6844 
6845 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6846 	pm_hold_power(dip);
6847 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6848 	if (ret != DDI_SUCCESS)
6849 		pm_rele_power(dip);
6850 	return (ret);
6851 }
6852 
6853 /*
6854  * This routine is called by devfs during its walk to unconfigue a node.
6855  * If the call is due to auto mod_unloads and the dip is not at its
6856  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6857  * return DDI_SUCCESS.
6858  */
6859 int
6860 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6861 {
6862 	PMD_FUNC(pmf, "pre_unconfig")
6863 	int ret;
6864 
6865 	if (MDI_VHCI(dip)) {
6866 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6867 		    PM_DEVICE(dip), flags))
6868 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6869 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6870 	} else if (!PM_GET_PM_INFO(dip))
6871 		return (DDI_SUCCESS);
6872 
6873 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6874 	    flags))
6875 	*held = 0;
6876 
6877 	/*
6878 	 * If the dip is a leaf node, don't power it up.
6879 	 */
6880 	if (!ddi_get_child(dip))
6881 		return (DDI_SUCCESS);
6882 
6883 	/*
6884 	 * Do not power up the node if it is called due to auto-modunload.
6885 	 */
6886 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6887 		return (DDI_FAILURE);
6888 
6889 	pm_hold_power(dip);
6890 	*held = 1;
6891 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6892 	if (ret != DDI_SUCCESS) {
6893 		pm_rele_power(dip);
6894 		*held = 0;
6895 	}
6896 	return (ret);
6897 }
6898 
6899 /*
6900  * Notify ppm of attach action.  Parent is already held at full power by
6901  * probe action.
6902  */
6903 void
6904 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6905 {
6906 	static char *me = "pm_pre_attach";
6907 	power_req_t power_req;
6908 	int result;
6909 
6910 	/*
6911 	 * Initialize and fill in the PPM cookie
6912 	 */
6913 	bzero(cp, sizeof (*cp));
6914 	cp->ppc_cmd = (int)cmd;
6915 	cp->ppc_ppm = PPM(dip);
6916 	cp->ppc_dip = dip;
6917 
6918 	/*
6919 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6920 	 * Power Management stuff. DDI_RESUME also has to purge it's
6921 	 * powerlevel information.
6922 	 */
6923 	switch (cmd) {
6924 	case DDI_ATTACH:
6925 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6926 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6927 			power_req.req.ppm_config_req.who = dip;
6928 			ASSERT(PPM(dip));
6929 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6930 			    &power_req, &result);
6931 		}
6932 #ifdef DEBUG
6933 		else {
6934 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6935 			power_req.req.ppm_config_req.who = dip;
6936 			(void) pm_ctlops(NULL, dip,
6937 			    DDI_CTLOPS_POWER, &power_req, &result);
6938 		}
6939 #endif
6940 		break;
6941 	case DDI_RESUME:
6942 		pm_forget_power_level(dip);
6943 
6944 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6945 			power_req.request_type = PMR_PPM_PRE_RESUME;
6946 			power_req.req.resume_req.who = cp->ppc_dip;
6947 			power_req.req.resume_req.cmd =
6948 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6949 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6950 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6951 			    DDI_CTLOPS_POWER, &power_req, &result);
6952 		}
6953 #ifdef DEBUG
6954 		else {
6955 			power_req.request_type = PMR_PPM_PRE_RESUME;
6956 			power_req.req.resume_req.who = cp->ppc_dip;
6957 			power_req.req.resume_req.cmd =
6958 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6959 			(void) pm_ctlops(NULL, cp->ppc_dip,
6960 			    DDI_CTLOPS_POWER, &power_req, &result);
6961 		}
6962 #endif
6963 		break;
6964 
6965 	case DDI_PM_RESUME:
6966 		break;
6967 
6968 	default:
6969 		panic(me);
6970 	}
6971 }
6972 
6973 /*
6974  * Nexus drivers call into pm framework to indicate which child driver is
6975  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6976  * hardware since the device driver is no longer installed.
6977  */
6978 int
6979 pm_uninit_child(dev_info_t *dip)
6980 {
6981 	power_req_t power_req;
6982 
6983 	ASSERT(ddi_binding_name(dip));
6984 	ASSERT(ddi_get_name_addr(dip));
6985 	pm_ppm_claim(dip);
6986 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6987 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6988 		power_req.req.ppm_config_req.who = dip;
6989 		ASSERT(PPM(dip));
6990 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6991 		    NULL));
6992 	} else {
6993 #ifdef DEBUG
6994 		/* pass it to the default handler so we can debug things */
6995 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6996 		power_req.req.ppm_config_req.who = dip;
6997 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6998 #endif
6999 	}
7000 	return (DDI_SUCCESS);
7001 }
7002 /*
7003  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
7004  * Also notify ppm of result of probe if there is a ppm that cares
7005  */
7006 void
7007 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
7008 {
7009 	_NOTE(ARGUNUSED(probe_failed))
7010 	int result;
7011 	power_req_t power_req;
7012 
7013 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7014 		power_req.request_type = PMR_PPM_POST_PROBE;
7015 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7016 		power_req.req.ppm_config_req.result = ret;
7017 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7018 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
7019 		    &power_req, &result);
7020 	}
7021 #ifdef DEBUG
7022 	else {
7023 		power_req.request_type = PMR_PPM_POST_PROBE;
7024 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7025 		power_req.req.ppm_config_req.result = ret;
7026 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
7027 		    &power_req, &result);
7028 	}
7029 #endif
7030 }
7031 
7032 void
7033 pm_post_config(dev_info_t *dip, char *devnm)
7034 {
7035 	PMD_FUNC(pmf, "post_config")
7036 
7037 	if (MDI_VHCI(dip)) {
7038 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7039 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
7040 		return;
7041 	} else if (!PM_GET_PM_INFO(dip))
7042 		return;
7043 
7044 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7045 	pm_rele_power(dip);
7046 }
7047 
7048 void
7049 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
7050 {
7051 	PMD_FUNC(pmf, "post_unconfig")
7052 
7053 	if (MDI_VHCI(dip)) {
7054 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
7055 		    PM_DEVICE(dip), held))
7056 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
7057 		return;
7058 	} else if (!PM_GET_PM_INFO(dip))
7059 		return;
7060 
7061 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
7062 	    held))
7063 	if (!held)
7064 		return;
7065 	/*
7066 	 * We have held power in pre_unconfig, release it here.
7067 	 */
7068 	pm_rele_power(dip);
7069 }
7070 
7071 /*
7072  * Notify ppm of result of attach if there is a ppm that cares
7073  */
7074 void
7075 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
7076 {
7077 	int result;
7078 	power_req_t power_req;
7079 	dev_info_t	*dip;
7080 
7081 	if (cp->ppc_cmd != DDI_ATTACH)
7082 		return;
7083 
7084 	dip = cp->ppc_dip;
7085 
7086 	if (ret == DDI_SUCCESS) {
7087 		/*
7088 		 * Attach succeeded, so proceed to doing post-attach pm tasks
7089 		 */
7090 		if (PM_GET_PM_INFO(dip) == NULL)
7091 			(void) pm_start(dip);
7092 	} else {
7093 		/*
7094 		 * Attach may have got pm started before failing
7095 		 */
7096 		pm_stop(dip);
7097 	}
7098 
7099 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7100 		power_req.request_type = PMR_PPM_POST_ATTACH;
7101 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7102 		power_req.req.ppm_config_req.result = ret;
7103 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7104 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7105 		    DDI_CTLOPS_POWER, &power_req, &result);
7106 	}
7107 #ifdef DEBUG
7108 	else {
7109 		power_req.request_type = PMR_PPM_POST_ATTACH;
7110 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7111 		power_req.req.ppm_config_req.result = ret;
7112 		(void) pm_ctlops(NULL, cp->ppc_dip,
7113 		    DDI_CTLOPS_POWER, &power_req, &result);
7114 	}
7115 #endif
7116 }
7117 
7118 /*
7119  * Notify ppm of attach action.  Parent is already held at full power by
7120  * probe action.
7121  */
7122 void
7123 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7124 {
7125 	int result;
7126 	power_req_t power_req;
7127 
7128 	bzero(cp, sizeof (*cp));
7129 	cp->ppc_dip = dip;
7130 	cp->ppc_cmd = (int)cmd;
7131 
7132 	switch (cmd) {
7133 	case DDI_DETACH:
7134 		pm_detaching(dip);		/* suspend pm while detaching */
7135 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7136 			power_req.request_type = PMR_PPM_PRE_DETACH;
7137 			power_req.req.ppm_config_req.who = dip;
7138 			ASSERT(PPM(dip));
7139 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7140 			    &power_req, &result);
7141 			cp->ppc_ppm = PPM(dip);
7142 		} else {
7143 #ifdef DEBUG
7144 			/* pass to the default handler so we can debug things */
7145 			power_req.request_type = PMR_PPM_PRE_DETACH;
7146 			power_req.req.ppm_config_req.who = dip;
7147 			(void) pm_ctlops(NULL, dip,
7148 			    DDI_CTLOPS_POWER, &power_req, &result);
7149 #endif
7150 			cp->ppc_ppm = NULL;
7151 		}
7152 		break;
7153 
7154 	default:
7155 		break;
7156 	}
7157 }
7158 
7159 /*
7160  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7161  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7162  * make an entry to record the details, which includes certain flag settings.
7163  */
7164 static void
7165 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7166     int wasvolpmd, major_t major)
7167 {
7168 	PMD_FUNC(pmf, "record_invol_path")
7169 	major_t pm_path_to_major(char *);
7170 	size_t plen;
7171 	pm_noinvol_t *ip, *np, *pp;
7172 	pp = NULL;
7173 
7174 	plen = strlen(path) + 1;
7175 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7176 	np->ni_size = plen;
7177 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7178 	np->ni_noinvolpm = noinvolpm;
7179 	np->ni_volpmd = volpmd;
7180 	np->ni_wasvolpmd = wasvolpmd;
7181 	np->ni_flags = flags;
7182 	(void) strcpy(np->ni_path, path);
7183 	/*
7184 	 * If we haven't actually seen the node attached, it is hard to figure
7185 	 * out its major.  If we could hold the node by path, we would be much
7186 	 * happier here.
7187 	 */
7188 	if (major == DDI_MAJOR_T_NONE) {
7189 		np->ni_major = pm_path_to_major(path);
7190 	} else {
7191 		np->ni_major = major;
7192 	}
7193 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7194 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7195 		int comp = strcmp(path, ip->ni_path);
7196 		if (comp < 0) {
7197 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7198 			    pmf, path, ip->ni_path))
7199 			/* insert before current entry */
7200 			np->ni_next = ip;
7201 			if (pp) {
7202 				pp->ni_next = np;
7203 			} else {
7204 				pm_noinvol_head = np;
7205 			}
7206 			rw_exit(&pm_noinvol_rwlock);
7207 #ifdef DEBUG
7208 			if (pm_debug & PMD_NOINVOL)
7209 				pr_noinvol("record_invol_path exit0");
7210 #endif
7211 			return;
7212 		} else if (comp == 0) {
7213 			panic("%s already in pm_noinvol list", path);
7214 		}
7215 	}
7216 	/*
7217 	 * If we did not find an entry in the list that this should go before,
7218 	 * then it must go at the end
7219 	 */
7220 	if (pp) {
7221 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7222 		    pp->ni_path))
7223 		ASSERT(pp->ni_next == 0);
7224 		pp->ni_next = np;
7225 	} else {
7226 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7227 		ASSERT(!pm_noinvol_head);
7228 		pm_noinvol_head = np;
7229 	}
7230 	rw_exit(&pm_noinvol_rwlock);
7231 #ifdef DEBUG
7232 	if (pm_debug & PMD_NOINVOL)
7233 		pr_noinvol("record_invol_path exit");
7234 #endif
7235 }
7236 
7237 void
7238 pm_record_invol(dev_info_t *dip)
7239 {
7240 	char *pathbuf;
7241 	int pm_all_components_off(dev_info_t *);
7242 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7243 
7244 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7245 	(void) ddi_pathname(dip, pathbuf);
7246 
7247 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7248 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7249 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7250 
7251 	/*
7252 	 * If this child's detach will be holding up its ancestors, then we
7253 	 * allow for an exception to that if all children of this type have
7254 	 * gone down voluntarily.
7255 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7256 	 */
7257 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7258 	    dip);
7259 	kmem_free(pathbuf, MAXPATHLEN);
7260 }
7261 
7262 void
7263 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7264 {
7265 	dev_info_t *dip = cp->ppc_dip;
7266 	int result;
7267 	power_req_t power_req;
7268 
7269 	switch (cp->ppc_cmd) {
7270 	case DDI_DETACH:
7271 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7272 			power_req.request_type = PMR_PPM_POST_DETACH;
7273 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7274 			power_req.req.ppm_config_req.result = ret;
7275 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7276 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7277 			    DDI_CTLOPS_POWER, &power_req, &result);
7278 		}
7279 #ifdef DEBUG
7280 		else {
7281 			power_req.request_type = PMR_PPM_POST_DETACH;
7282 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7283 			power_req.req.ppm_config_req.result = ret;
7284 			(void) pm_ctlops(NULL, cp->ppc_dip,
7285 			    DDI_CTLOPS_POWER, &power_req, &result);
7286 		}
7287 #endif
7288 		if (ret == DDI_SUCCESS) {
7289 			/*
7290 			 * For hotplug detach we assume it is *really* gone
7291 			 */
7292 			if (cp->ppc_cmd == DDI_DETACH &&
7293 			    ((DEVI(dip)->devi_pm_flags &
7294 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7295 			    DEVI(dip)->devi_pm_noinvolpm))
7296 				pm_record_invol(dip);
7297 			DEVI(dip)->devi_pm_flags &=
7298 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7299 
7300 			/*
7301 			 * If console fb is detaching, then we don't need to
7302 			 * worry any more about it going off (pm_detaching has
7303 			 * brought up all components)
7304 			 */
7305 			if (PM_IS_CFB(dip)) {
7306 				mutex_enter(&pm_cfb_lock);
7307 				ASSERT(cfb_dip_detaching);
7308 				ASSERT(cfb_dip == NULL);
7309 				ASSERT(pm_cfb_comps_off == 0);
7310 				cfb_dip_detaching = NULL;
7311 				mutex_exit(&pm_cfb_lock);
7312 			}
7313 			pm_stop(dip);	/* make it permanent */
7314 		} else {
7315 			if (PM_IS_CFB(dip)) {
7316 				mutex_enter(&pm_cfb_lock);
7317 				ASSERT(cfb_dip_detaching);
7318 				ASSERT(cfb_dip == NULL);
7319 				ASSERT(pm_cfb_comps_off == 0);
7320 				cfb_dip = cfb_dip_detaching;
7321 				cfb_dip_detaching = NULL;
7322 				mutex_exit(&pm_cfb_lock);
7323 			}
7324 			pm_detach_failed(dip);	/* resume power management */
7325 		}
7326 		break;
7327 	case DDI_PM_SUSPEND:
7328 		break;
7329 	case DDI_SUSPEND:
7330 		break;				/* legal, but nothing to do */
7331 	default:
7332 #ifdef DEBUG
7333 		panic("pm_post_detach: unrecognized cmd %d for detach",
7334 		    cp->ppc_cmd);
7335 		/*NOTREACHED*/
7336 #else
7337 		break;
7338 #endif
7339 	}
7340 }
7341 
7342 /*
7343  * Called after vfs_mountroot has got the clock started to fix up timestamps
7344  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7345  * devices look busy but have a 0 busycnt
7346  */
7347 int
7348 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7349 {
7350 	_NOTE(ARGUNUSED(arg))
7351 
7352 	pm_info_t *info = PM_GET_PM_INFO(dip);
7353 	struct pm_component *cp;
7354 	int i;
7355 
7356 	if (!info)
7357 		return (DDI_WALK_CONTINUE);
7358 	PM_LOCK_BUSY(dip);
7359 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7360 		cp = PM_CP(dip, i);
7361 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7362 			cp->pmc_timestamp = gethrestime_sec();
7363 	}
7364 	PM_UNLOCK_BUSY(dip);
7365 	return (DDI_WALK_CONTINUE);
7366 }
7367 
7368 /*
7369  * Called at attach time to see if the device being attached has a record in
7370  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7371  * parents and set a flag in the dip
7372  */
7373 void
7374 pm_noinvol_specd(dev_info_t *dip)
7375 {
7376 	PMD_FUNC(pmf, "noinvol_specd")
7377 	char *pathbuf;
7378 	pm_noinvol_t *ip, *pp = NULL;
7379 	int wasvolpmd;
7380 	int found = 0;
7381 
7382 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7383 		return;
7384 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7385 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7386 	(void) ddi_pathname(dip, pathbuf);
7387 
7388 	PM_LOCK_DIP(dip);
7389 	DEVI(dip)->devi_pm_volpmd = 0;
7390 	DEVI(dip)->devi_pm_noinvolpm = 0;
7391 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7392 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7393 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7394 		    pmf, pathbuf, ip->ni_path))
7395 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7396 			found++;
7397 			break;
7398 		}
7399 	}
7400 	rw_exit(&pm_noinvol_rwlock);
7401 	if (!found) {
7402 		PM_UNLOCK_DIP(dip);
7403 		kmem_free(pathbuf, MAXPATHLEN);
7404 		return;
7405 	}
7406 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7407 	pp = NULL;
7408 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7409 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7410 		    pmf, pathbuf, ip->ni_path))
7411 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7412 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7413 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7414 			/*
7415 			 * Handle special case of console fb
7416 			 */
7417 			if (PM_IS_CFB(dip)) {
7418 				mutex_enter(&pm_cfb_lock);
7419 				cfb_dip = dip;
7420 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7421 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7422 				mutex_exit(&pm_cfb_lock);
7423 			}
7424 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7425 			ASSERT((DEVI(dip)->devi_pm_flags &
7426 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7427 			    DEVI(dip)->devi_pm_noinvolpm);
7428 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7429 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7430 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7431 			    ip->ni_noinvolpm, ip->ni_volpmd,
7432 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7433 			/*
7434 			 * free the entry in hopes the list will now be empty
7435 			 * and we won't have to search it any more until the
7436 			 * device detaches
7437 			 */
7438 			if (pp) {
7439 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7440 				    pmf, ip->ni_path, pp->ni_path))
7441 				pp->ni_next = ip->ni_next;
7442 			} else {
7443 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7444 				    pmf, ip->ni_path))
7445 				ASSERT(pm_noinvol_head == ip);
7446 				pm_noinvol_head = ip->ni_next;
7447 			}
7448 			PM_UNLOCK_DIP(dip);
7449 			wasvolpmd = ip->ni_wasvolpmd;
7450 			rw_exit(&pm_noinvol_rwlock);
7451 			kmem_free(ip->ni_path, ip->ni_size);
7452 			kmem_free(ip, sizeof (*ip));
7453 			/*
7454 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7455 			 * (and volpmd if appropriate)
7456 			 */
7457 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7458 			    wasvolpmd, pathbuf, dip);
7459 #ifdef DEBUG
7460 			if (pm_debug & PMD_NOINVOL)
7461 				pr_noinvol("noinvol_specd exit");
7462 #endif
7463 			kmem_free(pathbuf, MAXPATHLEN);
7464 			return;
7465 		}
7466 	}
7467 	kmem_free(pathbuf, MAXPATHLEN);
7468 	rw_exit(&pm_noinvol_rwlock);
7469 	PM_UNLOCK_DIP(dip);
7470 }
7471 
7472 int
7473 pm_all_components_off(dev_info_t *dip)
7474 {
7475 	int i;
7476 	pm_component_t *cp;
7477 
7478 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7479 		cp = PM_CP(dip, i);
7480 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7481 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7482 			return (0);
7483 	}
7484 	return (1);	/* all off */
7485 }
7486 
7487 /*
7488  * Make sure that all "no involuntary power cycles" devices are attached.
7489  * Called before doing a cpr suspend to make sure the driver has a say about
7490  * the power cycle
7491  */
7492 int
7493 pm_reattach_noinvol(void)
7494 {
7495 	PMD_FUNC(pmf, "reattach_noinvol")
7496 	pm_noinvol_t *ip;
7497 	char *path;
7498 	dev_info_t *dip;
7499 
7500 	/*
7501 	 * Prevent the modunload thread from unloading any modules until we
7502 	 * have completely stopped all kernel threads.
7503 	 */
7504 	modunload_disable();
7505 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7506 		/*
7507 		 * Forget we'v ever seen any entry
7508 		 */
7509 		ip->ni_persistent = 0;
7510 	}
7511 restart:
7512 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7513 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7514 #ifdef PMDDEBUG
7515 		major_t maj;
7516 		maj = ip->ni_major;
7517 #endif
7518 		path = ip->ni_path;
7519 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7520 			if (ip->ni_persistent) {
7521 				/*
7522 				 * If we weren't able to make this entry
7523 				 * go away, then we give up, as
7524 				 * holding/attaching the driver ought to have
7525 				 * resulted in this entry being deleted
7526 				 */
7527 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7528 				    "(%s|%d)\n", pmf, ip->ni_path,
7529 				    ddi_major_to_name(maj), (int)maj))
7530 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7531 				    ip->ni_path);
7532 				modunload_enable();
7533 				rw_exit(&pm_noinvol_rwlock);
7534 				return (0);
7535 			}
7536 			ip->ni_persistent++;
7537 			rw_exit(&pm_noinvol_rwlock);
7538 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7539 			dip = e_ddi_hold_devi_by_path(path, 0);
7540 			if (dip == NULL) {
7541 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7542 				    pmf, path, (int)maj))
7543 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7544 				    "driver", path);
7545 				modunload_enable();
7546 				return (0);
7547 			} else {
7548 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7549 				/*
7550 				 * Since the modunload thread is stopped, we
7551 				 * don't have to keep the driver held, which
7552 				 * saves a ton of bookkeeping
7553 				 */
7554 				ddi_release_devi(dip);
7555 				goto restart;
7556 			}
7557 		} else {
7558 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7559 			    pmf, ip->ni_path))
7560 			continue;
7561 		}
7562 	}
7563 	rw_exit(&pm_noinvol_rwlock);
7564 	return (1);
7565 }
7566 
7567 void
7568 pm_reattach_noinvol_fini(void)
7569 {
7570 	modunload_enable();
7571 }
7572 
7573 /*
7574  * Display pm support code
7575  */
7576 
7577 
7578 /*
7579  * console frame-buffer power-mgmt gets enabled when debugging
7580  * services are not present or console fbpm override is set
7581  */
7582 void
7583 pm_cfb_setup(const char *stdout_path)
7584 {
7585 	PMD_FUNC(pmf, "cfb_setup")
7586 	extern int obpdebug;
7587 	char *devname;
7588 	dev_info_t *dip;
7589 	int devname_len;
7590 	extern dev_info_t *fbdip;
7591 
7592 	/*
7593 	 * By virtue of this function being called (from consconfig),
7594 	 * we know stdout is a framebuffer.
7595 	 */
7596 	stdout_is_framebuffer = 1;
7597 
7598 	if (obpdebug || (boothowto & RB_DEBUG)) {
7599 		if (pm_cfb_override == 0) {
7600 			/*
7601 			 * Console is frame buffer, but we want to suppress
7602 			 * pm on it because of debugging setup
7603 			 */
7604 			pm_cfb_enabled = 0;
7605 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7606 			    "console power management.");
7607 			/*
7608 			 * however, we still need to know which is the console
7609 			 * fb in order to suppress pm on it
7610 			 */
7611 		} else {
7612 			cmn_err(CE_WARN, "Kernel debugger present: see "
7613 			    "kmdb(1M) for interaction with power management.");
7614 		}
7615 	}
7616 #ifdef DEBUG
7617 	/*
7618 	 * IF console is fb and is power managed, don't do prom_printfs from
7619 	 * pm debug macro
7620 	 */
7621 	if (pm_cfb_enabled && !pm_debug_to_console) {
7622 		if (pm_debug)
7623 			prom_printf("pm debug output will be to log only\n");
7624 		pm_divertdebug++;
7625 	}
7626 #endif
7627 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7628 	devname_len = strlen(devname) + 1;
7629 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7630 	/* if the driver is attached */
7631 	if ((dip = fbdip) != NULL) {
7632 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7633 		    PM_DEVICE(dip)))
7634 		/*
7635 		 * We set up here as if the driver were power manageable in case
7636 		 * we get a later attach of a pm'able driver (which would result
7637 		 * in a panic later)
7638 		 */
7639 		cfb_dip = dip;
7640 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7641 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7642 		    PM_DEVICE(dip)))
7643 #ifdef DEBUG
7644 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7645 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7646 			    pmf, PM_DEVICE(dip)))
7647 		}
7648 #endif
7649 	} else {
7650 		char *ep;
7651 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7652 		pm_record_invol_path(devname,
7653 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7654 		    DDI_MAJOR_T_NONE);
7655 		for (ep = strrchr(devname, '/'); ep != devname;
7656 		    ep = strrchr(devname, '/')) {
7657 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7658 			*ep = '\0';
7659 			dip = pm_name_to_dip(devname, 0);
7660 			if (dip != NULL) {
7661 				/*
7662 				 * Walk up the tree incrementing
7663 				 * devi_pm_noinvolpm
7664 				 */
7665 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7666 				    0, 0, devname, dip);
7667 				break;
7668 			} else {
7669 				pm_record_invol_path(devname,
7670 				    PMC_NO_INVOL, 1, 0, 0, DDI_MAJOR_T_NONE);
7671 			}
7672 		}
7673 	}
7674 	kmem_free(devname, devname_len);
7675 }
7676 
7677 void
7678 pm_cfb_rele(void)
7679 {
7680 	mutex_enter(&pm_cfb_lock);
7681 	/*
7682 	 * this call isn't using the console any  more, it is ok to take it
7683 	 * down if the count goes to 0
7684 	 */
7685 	cfb_inuse--;
7686 	mutex_exit(&pm_cfb_lock);
7687 }
7688 
7689 /*
7690  * software interrupt handler for fbpm; this function exists because we can't
7691  * bring up the frame buffer power from above lock level.  So if we need to,
7692  * we instead schedule a softint that runs this routine and takes us into
7693  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7694  */
7695 static uint_t
7696 pm_cfb_softint(caddr_t int_handler_arg)
7697 {
7698 	_NOTE(ARGUNUSED(int_handler_arg))
7699 	int rval = DDI_INTR_UNCLAIMED;
7700 
7701 	mutex_enter(&pm_cfb_lock);
7702 	if (pm_soft_pending) {
7703 		mutex_exit(&pm_cfb_lock);
7704 		debug_enter((char *)NULL);
7705 		/* acquired in debug_enter before calling pm_cfb_trigger */
7706 		pm_cfb_rele();
7707 		mutex_enter(&pm_cfb_lock);
7708 		pm_soft_pending = 0;
7709 		mutex_exit(&pm_cfb_lock);
7710 		rval = DDI_INTR_CLAIMED;
7711 	} else
7712 		mutex_exit(&pm_cfb_lock);
7713 
7714 	return (rval);
7715 }
7716 
7717 void
7718 pm_cfb_setup_intr(void)
7719 {
7720 	PMD_FUNC(pmf, "cfb_setup_intr")
7721 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7722 	void pm_cfb_check_and_powerup(void);
7723 
7724 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7725 #ifdef PMDDEBUG
7726 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7727 #endif
7728 
7729 	if (!stdout_is_framebuffer) {
7730 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7731 		return;
7732 	}
7733 
7734 	/*
7735 	 * setup software interrupt handler
7736 	 */
7737 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7738 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7739 		panic("pm: unable to register soft intr.");
7740 
7741 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7742 }
7743 
7744 /*
7745  * Checks to see if it is safe to write to the console wrt power management
7746  * (i.e. if the console is a framebuffer, then it must be at full power)
7747  * returns 1 when power is off (power-up is needed)
7748  * returns 0 when power is on (power-up not needed)
7749  */
7750 int
7751 pm_cfb_check_and_hold(void)
7752 {
7753 	/*
7754 	 * cfb_dip is set iff console is a power manageable frame buffer
7755 	 * device
7756 	 */
7757 	extern int modrootloaded;
7758 
7759 	mutex_enter(&pm_cfb_lock);
7760 	cfb_inuse++;
7761 	ASSERT(cfb_inuse);	/* wrap? */
7762 	if (modrootloaded && cfb_dip) {
7763 		/*
7764 		 * don't power down the frame buffer, the prom is using it
7765 		 */
7766 		if (pm_cfb_comps_off) {
7767 			mutex_exit(&pm_cfb_lock);
7768 			return (1);
7769 		}
7770 	}
7771 	mutex_exit(&pm_cfb_lock);
7772 	return (0);
7773 }
7774 
7775 /*
7776  * turn on cfb power (which is known to be off).
7777  * Must be called below lock level!
7778  */
7779 void
7780 pm_cfb_powerup(void)
7781 {
7782 	pm_info_t *info;
7783 	int norm;
7784 	int ccount, ci;
7785 	int unused;
7786 #ifdef DEBUG
7787 	/*
7788 	 * Can't reenter prom_prekern, so suppress pm debug messages
7789 	 * (still go to circular buffer).
7790 	 */
7791 	mutex_enter(&pm_debug_lock);
7792 	pm_divertdebug++;
7793 	mutex_exit(&pm_debug_lock);
7794 #endif
7795 	info = PM_GET_PM_INFO(cfb_dip);
7796 	ASSERT(info);
7797 
7798 	ccount = PM_NUMCMPTS(cfb_dip);
7799 	for (ci = 0; ci < ccount; ci++) {
7800 		norm = pm_get_normal_power(cfb_dip, ci);
7801 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7802 		    PM_CANBLOCK_BYPASS, 0, &unused);
7803 	}
7804 #ifdef DEBUG
7805 	mutex_enter(&pm_debug_lock);
7806 	pm_divertdebug--;
7807 	mutex_exit(&pm_debug_lock);
7808 #endif
7809 }
7810 
7811 /*
7812  * Check if the console framebuffer is powered up.  If not power it up.
7813  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7814  * must be released by calling pm_cfb_rele when the console fb operation
7815  * is completed.
7816  */
7817 void
7818 pm_cfb_check_and_powerup(void)
7819 {
7820 	if (pm_cfb_check_and_hold())
7821 		pm_cfb_powerup();
7822 }
7823 
7824 /*
7825  * Trigger a low level interrupt to power up console frame buffer.
7826  */
7827 void
7828 pm_cfb_trigger(void)
7829 {
7830 	if (cfb_dip == NULL)
7831 		return;
7832 
7833 	mutex_enter(&pm_cfb_lock);
7834 	/*
7835 	 * If machine appears to be hung, pulling the keyboard connector of
7836 	 * the console will cause a high level interrupt and go to debug_enter.
7837 	 * But, if the fb is powered down, this routine will be called to bring
7838 	 * it up (by generating a softint to do the work).  If soft interrupts
7839 	 * are not running, and the keyboard connector is pulled again, the
7840 	 * following code detects this condition and calls panic which allows
7841 	 * the fb to be brought up from high level.
7842 	 *
7843 	 * If two nearly simultaneous calls to debug_enter occur (both from
7844 	 * high level) the code described above will cause a panic.
7845 	 */
7846 	if (lbolt <= pm_soft_pending) {
7847 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7848 		panic(panicstr);	/* does a power up at any intr level */
7849 		/* NOTREACHED */
7850 	}
7851 	pm_soft_pending = lbolt;
7852 	mutex_exit(&pm_cfb_lock);
7853 	ddi_trigger_softintr(pm_soft_id);
7854 }
7855 
7856 major_t
7857 pm_path_to_major(char *path)
7858 {
7859 	PMD_FUNC(pmf, "path_to_major")
7860 	char *np, *ap, *bp;
7861 	major_t ret;
7862 	size_t len;
7863 	static major_t i_path_to_major(char *, char *);
7864 
7865 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7866 
7867 	np = strrchr(path, '/');
7868 	if (np != NULL)
7869 		np++;
7870 	else
7871 		np = path;
7872 	len = strlen(np) + 1;
7873 	bp = kmem_alloc(len, KM_SLEEP);
7874 	(void) strcpy(bp, np);
7875 	if ((ap = strchr(bp, '@')) != NULL) {
7876 		*ap = '\0';
7877 	}
7878 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7879 	ret = i_path_to_major(path, np);
7880 	kmem_free(bp, len);
7881 	return (ret);
7882 }
7883 
7884 #ifdef DEBUG
7885 #ifndef sparc
7886 clock_t pt_sleep = 1;
7887 #endif
7888 
7889 char	*pm_msgp;
7890 char	*pm_bufend;
7891 char	*pm_msgbuf = NULL;
7892 int	pm_logpages = 0x100;
7893 #include <sys/sunldi.h>
7894 #include <sys/uio.h>
7895 clock_t	pm_log_sleep = 1000;
7896 int	pm_extra_cr = 1;
7897 volatile int pm_tty = 1;
7898 
7899 #define	PMLOGPGS	pm_logpages
7900 
7901 #if defined(__x86)
7902 void pm_printf(char *s);
7903 #endif
7904 
7905 /*PRINTFLIKE1*/
7906 void
7907 pm_log(const char *fmt, ...)
7908 {
7909 	va_list adx;
7910 	size_t size;
7911 
7912 	mutex_enter(&pm_debug_lock);
7913 	if (pm_msgbuf == NULL) {
7914 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7915 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7916 		pm_msgp = pm_msgbuf;
7917 	}
7918 	va_start(adx, fmt);
7919 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7920 	va_end(adx);
7921 	va_start(adx, fmt);
7922 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7923 		bzero(pm_msgp, pm_bufend - pm_msgp);
7924 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7925 		if (!pm_divertdebug)
7926 			prom_printf("%s", pm_msgp);
7927 #if defined(__x86)
7928 		if (pm_tty) {
7929 			pm_printf(pm_msgp);
7930 			if (pm_extra_cr)
7931 				pm_printf("\r");
7932 		}
7933 #endif
7934 		pm_msgp = pm_msgbuf + size;
7935 	} else {
7936 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7937 #if defined(__x86)
7938 		if (pm_tty) {
7939 			pm_printf(pm_msgp);
7940 			if (pm_extra_cr)
7941 				pm_printf("\r");
7942 		}
7943 #endif
7944 		if (!pm_divertdebug)
7945 			prom_printf("%s", pm_msgp);
7946 		pm_msgp += size;
7947 	}
7948 	va_end(adx);
7949 	mutex_exit(&pm_debug_lock);
7950 	drv_usecwait((clock_t)pm_log_sleep);
7951 }
7952 #endif	/* DEBUG */
7953 
7954 /*
7955  * We want to save the state of any directly pm'd devices over the suspend/
7956  * resume process so that we can put them back the way the controlling
7957  * process left them.
7958  */
7959 void
7960 pm_save_direct_levels(void)
7961 {
7962 	pm_processes_stopped = 1;
7963 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7964 }
7965 
7966 static int
7967 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7968 {
7969 	_NOTE(ARGUNUSED(arg))
7970 	int i;
7971 	int *ip;
7972 	pm_info_t *info = PM_GET_PM_INFO(dip);
7973 
7974 	if (!info)
7975 		return (DDI_WALK_CONTINUE);
7976 
7977 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7978 		if (PM_NUMCMPTS(dip) > 2) {
7979 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7980 			    sizeof (int), KM_SLEEP);
7981 			ip = info->pmi_lp;
7982 		} else {
7983 			ip = info->pmi_levels;
7984 		}
7985 		/* autopm and processes are stopped, ok not to lock power */
7986 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7987 			*ip++ = PM_CURPOWER(dip, i);
7988 		/*
7989 		 * There is a small window between stopping the
7990 		 * processes and setting pm_processes_stopped where
7991 		 * a driver could get hung up in a pm_raise_power()
7992 		 * call.  Free any such driver now.
7993 		 */
7994 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7995 	}
7996 
7997 	return (DDI_WALK_CONTINUE);
7998 }
7999 
8000 void
8001 pm_restore_direct_levels(void)
8002 {
8003 	/*
8004 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
8005 	 * threads failed) then we don't want to try to restore them
8006 	 */
8007 	if (!pm_processes_stopped)
8008 		return;
8009 
8010 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
8011 	pm_processes_stopped = 0;
8012 }
8013 
8014 static int
8015 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
8016 {
8017 	_NOTE(ARGUNUSED(arg))
8018 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
8019 	int i, nc, result;
8020 	int *ip;
8021 
8022 	pm_info_t *info = PM_GET_PM_INFO(dip);
8023 	if (!info)
8024 		return (DDI_WALK_CONTINUE);
8025 
8026 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8027 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
8028 			ip = &info->pmi_lp[nc - 1];
8029 		} else {
8030 			ip = &info->pmi_levels[nc - 1];
8031 		}
8032 		/*
8033 		 * Because fb drivers fail attempts to turn off the
8034 		 * fb when the monitor is on, but treat a request to
8035 		 * turn on the monitor as a request to turn on the
8036 		 * fb too, we process components in descending order
8037 		 * Because autopm is disabled and processes aren't
8038 		 * running, it is ok to examine current power outside
8039 		 * of the power lock
8040 		 */
8041 		for (i = nc - 1; i >= 0; i--, ip--) {
8042 			if (PM_CURPOWER(dip, i) == *ip)
8043 				continue;
8044 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
8045 			    PM_CANBLOCK_BYPASS, 0, &result) != DDI_SUCCESS) {
8046 				cmn_err(CE_WARN, "cpr: unable "
8047 				    "to restore power level of "
8048 				    "component %d of directly "
8049 				    "power manged device %s@%s"
8050 				    " to %d",
8051 				    i, PM_NAME(dip),
8052 				    PM_ADDR(dip), *ip);
8053 				PMD(PMD_FAIL, ("%s: failed to restore "
8054 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
8055 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
8056 				    PM_CURPOWER(dip, i), *ip, result))
8057 			}
8058 		}
8059 		if (nc > 2) {
8060 			kmem_free(info->pmi_lp, nc * sizeof (int));
8061 			info->pmi_lp = NULL;
8062 		}
8063 	}
8064 	return (DDI_WALK_CONTINUE);
8065 }
8066 
8067 /*
8068  * Stolen from the bootdev module
8069  * attempt to convert a path to a major number
8070  */
8071 static major_t
8072 i_path_to_major(char *path, char *leaf_name)
8073 {
8074 	extern major_t path_to_major(char *pathname);
8075 	major_t maj;
8076 
8077 	if ((maj = path_to_major(path)) == DDI_MAJOR_T_NONE) {
8078 		maj = ddi_name_to_major(leaf_name);
8079 	}
8080 
8081 	return (maj);
8082 }
8083 
8084 /*
8085  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
8086  * An entry in the list means that the device is detached, so we need to
8087  * adjust its ancestors as if they had just seen this attach, and any detached
8088  * ancestors need to have their list entries adjusted.
8089  */
8090 void
8091 pm_driver_removed(major_t major)
8092 {
8093 	static void i_pm_driver_removed(major_t major);
8094 
8095 	/*
8096 	 * Serialize removal of drivers. This is to keep ancestors of
8097 	 * a node that is being deleted from getting deleted and added back
8098 	 * with different counters.
8099 	 */
8100 	mutex_enter(&pm_remdrv_lock);
8101 	i_pm_driver_removed(major);
8102 	mutex_exit(&pm_remdrv_lock);
8103 }
8104 
8105 /*
8106  * This routine is called recursively by pm_noinvol_process_ancestors()
8107  */
8108 static void
8109 i_pm_driver_removed(major_t major)
8110 {
8111 	PMD_FUNC(pmf, "driver_removed")
8112 	static void adjust_ancestors(char *, int);
8113 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
8114 	static void pm_noinvol_process_ancestors(char *);
8115 	pm_noinvol_t *ip, *pp = NULL;
8116 	int wasvolpmd;
8117 	ASSERT(major != DDI_MAJOR_T_NONE);
8118 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
8119 again:
8120 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8121 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
8122 		if (major != ip->ni_major)
8123 			continue;
8124 		/*
8125 		 * If it is an ancestor of no-invol node, which is
8126 		 * not removed, skip it. This is to cover the case of
8127 		 * ancestor removed without removing its descendants.
8128 		 */
8129 		if (pm_is_noinvol_ancestor(ip)) {
8130 			ip->ni_flags |= PMC_DRIVER_REMOVED;
8131 			continue;
8132 		}
8133 		wasvolpmd = ip->ni_wasvolpmd;
8134 		/*
8135 		 * remove the entry from the list
8136 		 */
8137 		if (pp) {
8138 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8139 			    pmf, ip->ni_path, pp->ni_path))
8140 			pp->ni_next = ip->ni_next;
8141 		} else {
8142 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8143 			    ip->ni_path))
8144 			ASSERT(pm_noinvol_head == ip);
8145 			pm_noinvol_head = ip->ni_next;
8146 		}
8147 		rw_exit(&pm_noinvol_rwlock);
8148 		adjust_ancestors(ip->ni_path, wasvolpmd);
8149 		/*
8150 		 * Had an ancestor been removed before this node, it would have
8151 		 * been skipped. Adjust the no-invol counters for such skipped
8152 		 * ancestors.
8153 		 */
8154 		pm_noinvol_process_ancestors(ip->ni_path);
8155 		kmem_free(ip->ni_path, ip->ni_size);
8156 		kmem_free(ip, sizeof (*ip));
8157 		goto again;
8158 	}
8159 	rw_exit(&pm_noinvol_rwlock);
8160 }
8161 
8162 /*
8163  * returns 1, if *aip is a ancestor of a no-invol node
8164  *	   0, otherwise
8165  */
8166 static int
8167 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8168 {
8169 	pm_noinvol_t *ip;
8170 
8171 	ASSERT(strlen(aip->ni_path) != 0);
8172 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8173 		if (ip == aip)
8174 			continue;
8175 		/*
8176 		 * To be an ancestor, the path must be an initial substring of
8177 		 * the descendent, and end just before a '/' in the
8178 		 * descendent's path.
8179 		 */
8180 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8181 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8182 			return (1);
8183 	}
8184 	return (0);
8185 }
8186 
8187 /*
8188  * scan through the pm_noinvolpm list adjusting ancestors of the current
8189  * node;  Modifies string *path.
8190  */
8191 static void
8192 adjust_ancestors(char *path, int wasvolpmd)
8193 {
8194 	PMD_FUNC(pmf, "adjust_ancestors")
8195 	char *cp;
8196 	pm_noinvol_t *lp;
8197 	pm_noinvol_t *pp = NULL;
8198 	major_t locked = DDI_MAJOR_T_NONE;
8199 	dev_info_t *dip;
8200 	char	*pathbuf;
8201 	size_t pathbuflen = strlen(path) + 1;
8202 
8203 	/*
8204 	 * First we look up the ancestor's dip.  If we find it, then we
8205 	 * adjust counts up the tree
8206 	 */
8207 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8208 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8209 	(void) strcpy(pathbuf, path);
8210 	cp = strrchr(pathbuf, '/');
8211 	if (cp == NULL)	{
8212 		/* if no ancestors, then nothing to do */
8213 		kmem_free(pathbuf, pathbuflen);
8214 		return;
8215 	}
8216 	*cp = '\0';
8217 	dip = pm_name_to_dip(pathbuf, 1);
8218 	if (dip != NULL) {
8219 		locked = PM_MAJOR(dip);
8220 
8221 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8222 		    path, dip);
8223 
8224 		if (locked != DDI_MAJOR_T_NONE)
8225 			ddi_release_devi(dip);
8226 	} else {
8227 		char *apath;
8228 		size_t len = strlen(pathbuf) + 1;
8229 		int  lock_held = 1;
8230 
8231 		/*
8232 		 * Now check for ancestors that exist only in the list
8233 		 */
8234 		apath = kmem_alloc(len, KM_SLEEP);
8235 		(void) strcpy(apath, pathbuf);
8236 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8237 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8238 			/*
8239 			 * This can only happen once.  Since we have to drop
8240 			 * the lock, we need to extract the relevant info.
8241 			 */
8242 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8243 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8244 				    lp->ni_path, lp->ni_noinvolpm,
8245 				    lp->ni_noinvolpm - 1))
8246 				lp->ni_noinvolpm--;
8247 				if (wasvolpmd && lp->ni_volpmd) {
8248 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8249 					    "%d\n", pmf, lp->ni_path,
8250 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8251 					lp->ni_volpmd--;
8252 				}
8253 				/*
8254 				 * remove the entry from the list, if there
8255 				 * are no more no-invol descendants and node
8256 				 * itself is not a no-invol node.
8257 				 */
8258 				if (!(lp->ni_noinvolpm ||
8259 				    (lp->ni_flags & PMC_NO_INVOL))) {
8260 					ASSERT(lp->ni_volpmd == 0);
8261 					if (pp) {
8262 						PMD(PMD_NOINVOL, ("%s: freeing "
8263 						    "%s, prev is %s\n", pmf,
8264 						    lp->ni_path, pp->ni_path))
8265 						pp->ni_next = lp->ni_next;
8266 					} else {
8267 						PMD(PMD_NOINVOL, ("%s: free %s "
8268 						    "head\n", pmf, lp->ni_path))
8269 						ASSERT(pm_noinvol_head == lp);
8270 						pm_noinvol_head = lp->ni_next;
8271 					}
8272 					lock_held = 0;
8273 					rw_exit(&pm_noinvol_rwlock);
8274 					adjust_ancestors(apath, wasvolpmd);
8275 					/* restore apath */
8276 					(void) strcpy(apath, pathbuf);
8277 					kmem_free(lp->ni_path, lp->ni_size);
8278 					kmem_free(lp, sizeof (*lp));
8279 				}
8280 				break;
8281 			}
8282 		}
8283 		if (lock_held)
8284 			rw_exit(&pm_noinvol_rwlock);
8285 		adjust_ancestors(apath, wasvolpmd);
8286 		kmem_free(apath, len);
8287 	}
8288 	kmem_free(pathbuf, pathbuflen);
8289 }
8290 
8291 /*
8292  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8293  * which were skipped even though their drivers were removed.
8294  */
8295 static void
8296 pm_noinvol_process_ancestors(char *path)
8297 {
8298 	pm_noinvol_t *lp;
8299 
8300 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8301 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8302 		if (strstr(path, lp->ni_path) &&
8303 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8304 			rw_exit(&pm_noinvol_rwlock);
8305 			i_pm_driver_removed(lp->ni_major);
8306 			return;
8307 		}
8308 	}
8309 	rw_exit(&pm_noinvol_rwlock);
8310 }
8311 
8312 /*
8313  * Returns true if (detached) device needs to be kept up because it exported the
8314  * "no-involuntary-power-cycles" property or we're pretending it did (console
8315  * fb case) or it is an ancestor of such a device and has used up the "one
8316  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8317  * upon detach.  In any event, we need an exact hit on the path or we return
8318  * false.
8319  */
8320 int
8321 pm_noinvol_detached(char *path)
8322 {
8323 	PMD_FUNC(pmf, "noinvol_detached")
8324 	pm_noinvol_t *ip;
8325 	int ret = 0;
8326 
8327 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8328 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8329 		if (strcmp(path, ip->ni_path) == 0) {
8330 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8331 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8332 				    "%s\n", pmf, path))
8333 				ret = 1;
8334 				break;
8335 			}
8336 #ifdef	DEBUG
8337 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8338 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8339 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8340 				    path))
8341 #endif
8342 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8343 			break;
8344 		}
8345 	}
8346 	rw_exit(&pm_noinvol_rwlock);
8347 	return (ret);
8348 }
8349 
8350 int
8351 pm_is_cfb(dev_info_t *dip)
8352 {
8353 	return (dip == cfb_dip);
8354 }
8355 
8356 #ifdef	DEBUG
8357 /*
8358  * Return true if all components of the console frame buffer are at
8359  * "normal" power, i.e., fully on.  For the case where the console is not
8360  * a framebuffer, we also return true
8361  */
8362 int
8363 pm_cfb_is_up(void)
8364 {
8365 	return (pm_cfb_comps_off == 0);
8366 }
8367 #endif
8368 
8369 /*
8370  * Preventing scan from powering down the node by incrementing the
8371  * kidsupcnt.
8372  */
8373 void
8374 pm_hold_power(dev_info_t *dip)
8375 {
8376 	e_pm_hold_rele_power(dip, 1);
8377 }
8378 
8379 /*
8380  * Releasing the hold by decrementing the kidsupcnt allowing scan
8381  * to power down the node if all conditions are met.
8382  */
8383 void
8384 pm_rele_power(dev_info_t *dip)
8385 {
8386 	e_pm_hold_rele_power(dip, -1);
8387 }
8388 
8389 /*
8390  * A wrapper of pm_all_to_normal() to power up a dip
8391  * to its normal level
8392  */
8393 int
8394 pm_powerup(dev_info_t *dip)
8395 {
8396 	PMD_FUNC(pmf, "pm_powerup")
8397 
8398 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8399 	ASSERT(!(servicing_interrupt()));
8400 
8401 	/*
8402 	 * in case this node is not already participating pm
8403 	 */
8404 	if (!PM_GET_PM_INFO(dip)) {
8405 		if (!DEVI_IS_ATTACHING(dip))
8406 			return (DDI_SUCCESS);
8407 		if (pm_start(dip) != DDI_SUCCESS)
8408 			return (DDI_FAILURE);
8409 		if (!PM_GET_PM_INFO(dip))
8410 			return (DDI_SUCCESS);
8411 	}
8412 
8413 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8414 }
8415 
8416 int
8417 pm_rescan_walk(dev_info_t *dip, void *arg)
8418 {
8419 	_NOTE(ARGUNUSED(arg))
8420 
8421 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8422 		return (DDI_WALK_CONTINUE);
8423 
8424 	/*
8425 	 * Currently pm_cpr_callb/resume code is the only caller
8426 	 * and it needs to make sure that stopped scan get
8427 	 * reactivated. Otherwise, rescan walk needn't reactive
8428 	 * stopped scan.
8429 	 */
8430 	pm_scan_init(dip);
8431 
8432 	(void) pm_rescan(dip);
8433 	return (DDI_WALK_CONTINUE);
8434 }
8435 
8436 static dev_info_t *
8437 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8438 {
8439 	dev_info_t *wdip, *pdip;
8440 
8441 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8442 		pdip = ddi_get_parent(wdip);
8443 		if (pdip == dip)
8444 			return (wdip);
8445 	}
8446 	return (NULL);
8447 }
8448 
8449 int
8450 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8451     void *arg, void *result)
8452 {
8453 	PMD_FUNC(pmf, "bp_bus_power")
8454 	dev_info_t	*cdip;
8455 	pm_info_t	*cinfo;
8456 	pm_bp_child_pwrchg_t	*bpc;
8457 	pm_sp_misc_t		*pspm;
8458 	pm_bp_nexus_pwrup_t *bpn;
8459 	pm_bp_child_pwrchg_t new_bpc;
8460 	pm_bp_noinvol_t *bpi;
8461 	dev_info_t *tdip;
8462 	char *pathbuf;
8463 	int		ret = DDI_SUCCESS;
8464 	int		errno = 0;
8465 	pm_component_t *cp;
8466 
8467 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8468 	    pm_decode_op(op)))
8469 	switch (op) {
8470 	case BUS_POWER_CHILD_PWRCHG:
8471 		bpc = (pm_bp_child_pwrchg_t *)arg;
8472 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8473 		tdip = bpc->bpc_dip;
8474 		cdip = pm_get_next_descendent(dip, tdip);
8475 		cinfo = PM_GET_PM_INFO(cdip);
8476 		if (cdip != tdip) {
8477 			/*
8478 			 * If the node is an involved parent, it needs to
8479 			 * power up the node as it is needed.  There is nothing
8480 			 * else the framework can do here.
8481 			 */
8482 			if (PM_WANTS_NOTIFICATION(cdip)) {
8483 				PMD(PMD_SET, ("%s: call bus_power for "
8484 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8485 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8486 				    impl_arg, op, arg, result));
8487 			}
8488 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8489 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8490 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8491 			/*
8492 			 * we presume that the parent needs to be up in
8493 			 * order for the child to change state (either
8494 			 * because it must already be on if the child is on
8495 			 * (and the pm_all_to_normal_nexus() will be a nop)
8496 			 * or because it will need to be on for the child
8497 			 * to come on; so we make the call regardless
8498 			 */
8499 			pm_hold_power(cdip);
8500 			if (cinfo) {
8501 				pm_canblock_t canblock = pspm->pspm_canblock;
8502 				ret = pm_all_to_normal_nexus(cdip, canblock);
8503 				if (ret != DDI_SUCCESS) {
8504 					pm_rele_power(cdip);
8505 					return (ret);
8506 				}
8507 			}
8508 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8509 			    PM_DEVICE(cdip)))
8510 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8511 			    result);
8512 			pm_rele_power(cdip);
8513 		} else {
8514 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8515 			    result);
8516 		}
8517 		return (ret);
8518 
8519 	case BUS_POWER_NEXUS_PWRUP:
8520 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8521 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8522 
8523 		if (!e_pm_valid_info(dip, NULL) ||
8524 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8525 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8526 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8527 			    pmf, PM_DEVICE(dip)))
8528 			*pspm->pspm_errnop = EIO;
8529 			*(int *)result = DDI_FAILURE;
8530 			return (DDI_FAILURE);
8531 		}
8532 
8533 		ASSERT(bpn->bpn_dip == dip);
8534 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8535 		    PM_DEVICE(dip)))
8536 		new_bpc.bpc_dip = dip;
8537 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8538 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8539 		new_bpc.bpc_comp = bpn->bpn_comp;
8540 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8541 		new_bpc.bpc_nlevel = bpn->bpn_level;
8542 		new_bpc.bpc_private = bpn->bpn_private;
8543 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8544 		    PM_LEVEL_UPONLY;
8545 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8546 		    &errno;
8547 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8548 		    (void *)&new_bpc, result);
8549 		kmem_free(pathbuf, MAXPATHLEN);
8550 		return (ret);
8551 
8552 	case BUS_POWER_NOINVOL:
8553 		bpi = (pm_bp_noinvol_t *)arg;
8554 		tdip = bpi->bpni_dip;
8555 		cdip = pm_get_next_descendent(dip, tdip);
8556 
8557 		/* In case of rem_drv, the leaf node has been removed */
8558 		if (cdip == NULL)
8559 			return (DDI_SUCCESS);
8560 
8561 		cinfo = PM_GET_PM_INFO(cdip);
8562 		if (cdip != tdip) {
8563 			if (PM_WANTS_NOTIFICATION(cdip)) {
8564 				PMD(PMD_NOINVOL,
8565 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8566 				    pmf, PM_DEVICE(cdip)))
8567 				ret = (*PM_BUS_POWER_FUNC(cdip))
8568 				    (cdip, NULL, op, arg, result);
8569 				if ((cinfo) && (ret == DDI_SUCCESS))
8570 					(void) pm_noinvol_update_node(cdip,
8571 					    bpi);
8572 				return (ret);
8573 			} else {
8574 				PMD(PMD_NOINVOL,
8575 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8576 				    PM_DEVICE(cdip)))
8577 				ret = pm_busop_bus_power(cdip, NULL, op,
8578 				    arg, result);
8579 				/*
8580 				 * Update the current node.
8581 				 */
8582 				if ((cinfo) && (ret == DDI_SUCCESS))
8583 					(void) pm_noinvol_update_node(cdip,
8584 					    bpi);
8585 				return (ret);
8586 			}
8587 		} else {
8588 			/*
8589 			 * For attach, detach, power up:
8590 			 * Do nothing for leaf node since its
8591 			 * counts are already updated.
8592 			 * For CFB and driver removal, since the
8593 			 * path and the target dip passed in is up to and incl.
8594 			 * the immediate ancestor, need to do the update.
8595 			 */
8596 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8597 			    "reached\n", pmf, PM_DEVICE(cdip)))
8598 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8599 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8600 				(void) pm_noinvol_update_node(cdip, bpi);
8601 			return (DDI_SUCCESS);
8602 		}
8603 
8604 	default:
8605 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8606 		return (DDI_FAILURE);
8607 	}
8608 }
8609 
8610 static int
8611 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8612     void *arg, void *resultp)
8613 {
8614 	_NOTE(ARGUNUSED(impl_arg))
8615 	PMD_FUNC(pmf, "bp_set_power")
8616 	pm_ppm_devlist_t *devl = NULL;
8617 	int clevel, circ;
8618 #ifdef	DEBUG
8619 	int circ_db, ccirc_db;
8620 #endif
8621 	int ret = DDI_SUCCESS;
8622 	dev_info_t *cdip;
8623 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8624 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8625 	pm_canblock_t canblock = pspm->pspm_canblock;
8626 	int scan = pspm->pspm_scan;
8627 	int comp = bpc->bpc_comp;
8628 	int olevel = bpc->bpc_olevel;
8629 	int nlevel = bpc->bpc_nlevel;
8630 	int comps_off_incr = 0;
8631 	dev_info_t *pdip = ddi_get_parent(dip);
8632 	int dodeps;
8633 	int direction = pspm->pspm_direction;
8634 	int *errnop = pspm->pspm_errnop;
8635 #ifdef PMDDEBUG
8636 	char *dir = pm_decode_direction(direction);
8637 #endif
8638 	int *iresp = (int *)resultp;
8639 	time_t	idletime, thresh;
8640 	pm_component_t *cp = PM_CP(dip, comp);
8641 	int work_type;
8642 
8643 	*iresp = DDI_SUCCESS;
8644 	*errnop = 0;
8645 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8646 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8647 	    pm_decode_op(op)))
8648 
8649 	/*
8650 	 * The following set of conditions indicate we are here to handle a
8651 	 * driver's pm_[raise|lower]_power request, but the device is being
8652 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8653 	 * we want to pm_block and pass a status back to the caller based
8654 	 * on whether the controlling process's next activity on the device
8655 	 * matches the current request or not.  This distinction tells
8656 	 * downstream functions to avoid calling into a driver or changing
8657 	 * the framework's power state.  To actually block, we need:
8658 	 *
8659 	 * PM_ISDIRECT(dip)
8660 	 *	no reason to block unless a process is directly controlling dev
8661 	 * direction != PM_LEVEL_EXACT
8662 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8663 	 * !pm_processes_stopped
8664 	 *	don't block if controlling proc already be stopped for cpr
8665 	 * canblock != PM_CANBLOCK_BYPASS
8666 	 *	our caller must not have explicitly prevented blocking
8667 	 */
8668 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8669 		PM_LOCK_DIP(dip);
8670 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8671 			/* releases dip lock */
8672 			ret = pm_busop_match_request(dip, bpc);
8673 			if (ret == EAGAIN) {
8674 				PM_LOCK_DIP(dip);
8675 				continue;
8676 			}
8677 			return (*iresp = ret);
8678 		}
8679 		PM_UNLOCK_DIP(dip);
8680 	}
8681 	/* BC device is never scanned, so power will stick until we are done */
8682 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8683 	    direction != PM_LEVEL_DOWNONLY) {
8684 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8685 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8686 		    canblock, 0, resultp) != DDI_SUCCESS) {
8687 			/* *resultp set by pm_set_power */
8688 			return (DDI_FAILURE);
8689 		}
8690 	}
8691 	if (PM_WANTS_NOTIFICATION(pdip)) {
8692 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8693 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8694 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8695 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8696 		if (ret != DDI_SUCCESS) {
8697 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8698 			    pmf, PM_DEVICE(pdip)))
8699 			return (DDI_FAILURE);
8700 		}
8701 	} else {
8702 		/*
8703 		 * Since we don't know what the actual power level is,
8704 		 * we place a power hold on the parent no matter what
8705 		 * component and level is changing.
8706 		 */
8707 		pm_hold_power(pdip);
8708 	}
8709 	PM_LOCK_POWER(dip, &circ);
8710 	clevel = PM_CURPOWER(dip, comp);
8711 	/*
8712 	 * It's possible that a call was made to pm_update_maxpower()
8713 	 * on another thread before we took the lock above. So, we need to
8714 	 * make sure that this request isn't processed after the
8715 	 * change of power executed on behalf of pm_update_maxpower().
8716 	 */
8717 	if (nlevel > pm_get_normal_power(dip, comp)) {
8718 		PMD(PMD_SET, ("%s: requested level is higher than normal.\n",
8719 		    pmf))
8720 		ret = DDI_FAILURE;
8721 		*iresp = DDI_FAILURE;
8722 		goto post_notify;
8723 	}
8724 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8725 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8726 	    clevel, dir))
8727 	switch (direction) {
8728 	case PM_LEVEL_UPONLY:
8729 		/* Powering up */
8730 		if (clevel >= nlevel) {
8731 			PMD(PMD_SET, ("%s: current level is already "
8732 			    "at or above the requested level.\n", pmf))
8733 			*iresp = DDI_SUCCESS;
8734 			ret = DDI_SUCCESS;
8735 			goto post_notify;
8736 		}
8737 		break;
8738 	case PM_LEVEL_EXACT:
8739 		/* specific level request */
8740 		if (clevel == nlevel && !PM_ISBC(dip)) {
8741 			PMD(PMD_SET, ("%s: current level is already "
8742 			    "at the requested level.\n", pmf))
8743 			*iresp = DDI_SUCCESS;
8744 			ret = DDI_SUCCESS;
8745 			goto post_notify;
8746 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8747 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8748 			if (!pm_cfb_enabled) {
8749 				PMD(PMD_ERROR | PMD_CFB,
8750 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8751 				*errnop = EINVAL;
8752 				*iresp = DDI_FAILURE;
8753 				ret = DDI_FAILURE;
8754 				goto post_notify;
8755 			}
8756 			mutex_enter(&pm_cfb_lock);
8757 			while (cfb_inuse) {
8758 				mutex_exit(&pm_cfb_lock);
8759 				if (delay_sig(1) == EINTR) {
8760 					ret = DDI_FAILURE;
8761 					*iresp = DDI_FAILURE;
8762 					*errnop = EINTR;
8763 					goto post_notify;
8764 				}
8765 				mutex_enter(&pm_cfb_lock);
8766 			}
8767 			mutex_exit(&pm_cfb_lock);
8768 		}
8769 		break;
8770 	case PM_LEVEL_DOWNONLY:
8771 		/* Powering down */
8772 		thresh = cur_threshold(dip, comp);
8773 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8774 		if (scan && ((PM_KUC(dip) != 0) ||
8775 		    (cp->pmc_busycount > 0) ||
8776 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8777 #ifdef	DEBUG
8778 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8779 				PMD(PMD_SET, ("%s: scan failed: "
8780 				    "kidsupcnt != 0\n", pmf))
8781 			if (cp->pmc_busycount > 0)
8782 				PMD(PMD_SET, ("%s: scan failed: "
8783 				    "device become busy\n", pmf))
8784 			if (idletime < thresh)
8785 				PMD(PMD_SET, ("%s: scan failed: device "
8786 				    "hasn't been idle long enough\n", pmf))
8787 #endif
8788 			*iresp = DDI_FAILURE;
8789 			*errnop = EBUSY;
8790 			ret = DDI_FAILURE;
8791 			goto post_notify;
8792 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8793 			PMD(PMD_SET, ("%s: current level is already at "
8794 			    "or below the requested level.\n", pmf))
8795 			*iresp = DDI_SUCCESS;
8796 			ret = DDI_SUCCESS;
8797 			goto post_notify;
8798 		}
8799 		break;
8800 	}
8801 
8802 	if (PM_IS_CFB(dip) && (comps_off_incr =
8803 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8804 		/*
8805 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8806 		 * component from full power.  Remember that we tried to
8807 		 * lower power in case it fails and we need to back out
8808 		 * the adjustment.
8809 		 */
8810 		update_comps_off(comps_off_incr, dip);
8811 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8812 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8813 		    pm_cfb_comps_off))
8814 	}
8815 
8816 	if ((*iresp = power_dev(dip,
8817 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8818 #ifdef DEBUG
8819 		/*
8820 		 * All descendents of this node should already be powered off.
8821 		 */
8822 		if (PM_CURPOWER(dip, comp) == 0) {
8823 			pm_desc_pwrchk_t pdpchk;
8824 			pdpchk.pdpc_dip = dip;
8825 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8826 			ndi_devi_enter(dip, &circ_db);
8827 			for (cdip = ddi_get_child(dip); cdip != NULL;
8828 			    cdip = ddi_get_next_sibling(cdip)) {
8829 				ndi_devi_enter(cdip, &ccirc_db);
8830 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8831 				    (void *)&pdpchk);
8832 				ndi_devi_exit(cdip, ccirc_db);
8833 			}
8834 			ndi_devi_exit(dip, circ_db);
8835 		}
8836 #endif
8837 		/*
8838 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8839 		 * back up to full power.
8840 		 */
8841 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8842 			update_comps_off(comps_off_incr, dip);
8843 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8844 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8845 			    comp, clevel, nlevel, pm_cfb_comps_off))
8846 		}
8847 		dodeps = 0;
8848 		if (POWERING_OFF(clevel, nlevel)) {
8849 			if (PM_ISBC(dip)) {
8850 				dodeps = (comp == 0);
8851 			} else {
8852 				int i;
8853 				dodeps = 1;
8854 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8855 					/* if some component still on */
8856 					if (PM_CURPOWER(dip, i)) {
8857 						dodeps = 0;
8858 						break;
8859 					}
8860 				}
8861 			}
8862 			if (dodeps)
8863 				work_type = PM_DEP_WK_POWER_OFF;
8864 		} else if (POWERING_ON(clevel, nlevel)) {
8865 			if (PM_ISBC(dip)) {
8866 				dodeps = (comp == 0);
8867 			} else {
8868 				int i;
8869 				dodeps = 1;
8870 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8871 					if (i == comp)
8872 						continue;
8873 					if (PM_CURPOWER(dip, i) > 0) {
8874 						dodeps = 0;
8875 						break;
8876 					}
8877 				}
8878 			}
8879 			if (dodeps)
8880 				work_type = PM_DEP_WK_POWER_ON;
8881 		}
8882 
8883 		if (dodeps) {
8884 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8885 
8886 			(void) ddi_pathname(dip, pathbuf);
8887 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8888 			    PM_DEP_NOWAIT, NULL, 0);
8889 			kmem_free(pathbuf, MAXPATHLEN);
8890 		}
8891 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8892 			int old;
8893 
8894 			/* If old power cached during deadlock, use it. */
8895 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8896 			    cp->pmc_phc_pwr : olevel);
8897 			mutex_enter(&pm_rsvp_lock);
8898 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8899 			    old, canblock);
8900 			pm_enqueue_notify_others(&devl, canblock);
8901 			mutex_exit(&pm_rsvp_lock);
8902 		} else {
8903 			pm_ppm_devlist_t *p;
8904 			pm_ppm_devlist_t *next;
8905 			for (p = devl; p != NULL; p = next) {
8906 				next = p->ppd_next;
8907 				kmem_free(p, sizeof (pm_ppm_devlist_t));
8908 			}
8909 			devl = NULL;
8910 		}
8911 
8912 		/*
8913 		 * If we are coming from a scan, don't do it again,
8914 		 * else we can have infinite loops.
8915 		 */
8916 		if (!scan)
8917 			pm_rescan(dip);
8918 	} else {
8919 		/* if we incremented pm_comps_off_count, but failed */
8920 		if (comps_off_incr > 0) {
8921 			update_comps_off(-comps_off_incr, dip);
8922 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8923 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8924 			    comp, clevel, nlevel, pm_cfb_comps_off))
8925 		}
8926 		*errnop = EIO;
8927 	}
8928 
8929 post_notify:
8930 	/*
8931 	 * This thread may have been in deadlock with pm_power_has_changed.
8932 	 * Before releasing power lock, clear the flag which marks this
8933 	 * condition.
8934 	 */
8935 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8936 
8937 	/*
8938 	 * Update the old power level in the bus power structure with the
8939 	 * actual power level before the transition was made to the new level.
8940 	 * Some involved parents depend on this information to keep track of
8941 	 * their children's power transition.
8942 	 */
8943 	if (*iresp != DDI_FAILURE)
8944 		bpc->bpc_olevel = clevel;
8945 
8946 	if (PM_WANTS_NOTIFICATION(pdip)) {
8947 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8948 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8949 		PM_UNLOCK_POWER(dip, circ);
8950 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8951 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8952 		    PM_DEVICE(dip), ret))
8953 	} else {
8954 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8955 		PM_UNLOCK_POWER(dip, circ);
8956 		/*
8957 		 * Now that we know what power transition has occurred
8958 		 * (if any), release the power hold.  Leave the hold
8959 		 * in effect in the case of OFF->ON transition.
8960 		 */
8961 		if (!(clevel == 0 && nlevel > 0 &&
8962 		    (!PM_ISBC(dip) || comp == 0)))
8963 			pm_rele_power(pdip);
8964 		/*
8965 		 * If the power transition was an ON->OFF transition,
8966 		 * remove the power hold from the parent.
8967 		 */
8968 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8969 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8970 			pm_rele_power(pdip);
8971 	}
8972 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8973 		return (DDI_FAILURE);
8974 	else
8975 		return (DDI_SUCCESS);
8976 }
8977 
8978 /*
8979  * If an app (SunVTS or Xsun) has taken control, then block until it
8980  * gives it up or makes the requested power level change, unless
8981  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8982  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8983  */
8984 static int
8985 pm_busop_match_request(dev_info_t *dip, void *arg)
8986 {
8987 	PMD_FUNC(pmf, "bp_match_request")
8988 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8989 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8990 	int comp = bpc->bpc_comp;
8991 	int nlevel = bpc->bpc_nlevel;
8992 	pm_canblock_t canblock = pspm->pspm_canblock;
8993 	int direction = pspm->pspm_direction;
8994 	int clevel, circ;
8995 
8996 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8997 	PM_LOCK_POWER(dip, &circ);
8998 	clevel = PM_CURPOWER(dip, comp);
8999 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
9000 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
9001 	if (direction == PM_LEVEL_UPONLY) {
9002 		if (clevel >= nlevel) {
9003 			PM_UNLOCK_POWER(dip, circ);
9004 			PM_UNLOCK_DIP(dip);
9005 			return (DDI_SUCCESS);
9006 		}
9007 	} else if (clevel == nlevel) {
9008 		PM_UNLOCK_POWER(dip, circ);
9009 		PM_UNLOCK_DIP(dip);
9010 		return (DDI_SUCCESS);
9011 	}
9012 	if (canblock == PM_CANBLOCK_FAIL) {
9013 		PM_UNLOCK_POWER(dip, circ);
9014 		PM_UNLOCK_DIP(dip);
9015 		return (DDI_FAILURE);
9016 	}
9017 	if (canblock == PM_CANBLOCK_BLOCK) {
9018 		/*
9019 		 * To avoid a deadlock, we must not hold the
9020 		 * power lock when we pm_block.
9021 		 */
9022 		PM_UNLOCK_POWER(dip, circ);
9023 		PMD(PMD_SET, ("%s: blocking\n", pmf))
9024 		/* pm_block releases dip lock */
9025 		switch (pm_block(dip, comp, nlevel, clevel)) {
9026 		case PMP_RELEASE:
9027 			return (EAGAIN);
9028 		case PMP_SUCCEED:
9029 			return (DDI_SUCCESS);
9030 		case PMP_FAIL:
9031 			return (DDI_FAILURE);
9032 		}
9033 	} else {
9034 		ASSERT(0);
9035 	}
9036 	_NOTE(NOTREACHED);
9037 	return (DDI_FAILURE);	/* keep gcc happy */
9038 }
9039 
9040 static int
9041 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
9042 {
9043 	PMD_FUNC(pmf, "all_to_normal_nexus")
9044 	int		*normal;
9045 	int		i, ncomps;
9046 	size_t		size;
9047 	int		changefailed = 0;
9048 	int		ret, result = DDI_SUCCESS;
9049 	pm_bp_nexus_pwrup_t	bpn;
9050 	pm_sp_misc_t	pspm;
9051 
9052 	ASSERT(PM_GET_PM_INFO(dip));
9053 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9054 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
9055 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
9056 		return (DDI_FAILURE);
9057 	}
9058 	ncomps = PM_NUMCMPTS(dip);
9059 	for (i = 0; i < ncomps; i++) {
9060 		bpn.bpn_dip = dip;
9061 		bpn.bpn_comp = i;
9062 		bpn.bpn_level = normal[i];
9063 		pspm.pspm_canblock = canblock;
9064 		pspm.pspm_scan = 0;
9065 		bpn.bpn_private = &pspm;
9066 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
9067 		    (void *)&bpn, (void *)&result);
9068 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
9069 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
9070 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
9071 			    i, normal[i], result))
9072 			changefailed++;
9073 		}
9074 	}
9075 	kmem_free(normal, size);
9076 	if (changefailed) {
9077 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
9078 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
9079 		return (DDI_FAILURE);
9080 	}
9081 	return (DDI_SUCCESS);
9082 }
9083 
9084 int
9085 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
9086     dev_info_t *tdip)
9087 {
9088 	PMD_FUNC(pmf, "noinvol_update")
9089 	pm_bp_noinvol_t args;
9090 	int ret;
9091 	int result = DDI_SUCCESS;
9092 
9093 	args.bpni_path = path;
9094 	args.bpni_dip = tdip;
9095 	args.bpni_cmd = subcmd;
9096 	args.bpni_wasvolpmd = wasvolpmd;
9097 	args.bpni_volpmd = volpmd;
9098 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
9099 	    "volpmd %d wasvolpmd %d\n", pmf,
9100 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
9101 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
9102 	    &args, &result);
9103 	return (ret);
9104 }
9105 
9106 void
9107 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
9108 {
9109 	PMD_FUNC(pmf, "noinvol_update_node")
9110 
9111 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9112 	switch (req->bpni_cmd) {
9113 	case PM_BP_NOINVOL_ATTACH:
9114 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
9115 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9116 		    DEVI(dip)->devi_pm_noinvolpm,
9117 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9118 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9119 		PM_LOCK_DIP(dip);
9120 		DEVI(dip)->devi_pm_noinvolpm--;
9121 		if (req->bpni_wasvolpmd) {
9122 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
9123 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9124 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9125 			    DEVI(dip)->devi_pm_volpmd - 1))
9126 			if (DEVI(dip)->devi_pm_volpmd)
9127 				DEVI(dip)->devi_pm_volpmd--;
9128 		}
9129 		PM_UNLOCK_DIP(dip);
9130 		break;
9131 
9132 	case PM_BP_NOINVOL_DETACH:
9133 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
9134 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
9135 		    DEVI(dip)->devi_pm_noinvolpm,
9136 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9137 		PM_LOCK_DIP(dip);
9138 		DEVI(dip)->devi_pm_noinvolpm++;
9139 		if (req->bpni_wasvolpmd) {
9140 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
9141 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9142 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9143 			    DEVI(dip)->devi_pm_volpmd + 1))
9144 			DEVI(dip)->devi_pm_volpmd++;
9145 		}
9146 		PM_UNLOCK_DIP(dip);
9147 		break;
9148 
9149 	case PM_BP_NOINVOL_REMDRV:
9150 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9151 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9152 		    DEVI(dip)->devi_pm_noinvolpm,
9153 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9154 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9155 		PM_LOCK_DIP(dip);
9156 		DEVI(dip)->devi_pm_noinvolpm--;
9157 		if (req->bpni_wasvolpmd) {
9158 			PMD(PMD_NOINVOL,
9159 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9160 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9161 			    DEVI(dip)->devi_pm_volpmd,
9162 			    DEVI(dip)->devi_pm_volpmd - 1))
9163 			/*
9164 			 * A power up could come in between and
9165 			 * clear the volpmd, if that's the case,
9166 			 * volpmd would be clear.
9167 			 */
9168 			if (DEVI(dip)->devi_pm_volpmd)
9169 				DEVI(dip)->devi_pm_volpmd--;
9170 		}
9171 		PM_UNLOCK_DIP(dip);
9172 		break;
9173 
9174 	case PM_BP_NOINVOL_CFB:
9175 		PMD(PMD_NOINVOL,
9176 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9177 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9178 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9179 		PM_LOCK_DIP(dip);
9180 		DEVI(dip)->devi_pm_noinvolpm++;
9181 		PM_UNLOCK_DIP(dip);
9182 		break;
9183 
9184 	case PM_BP_NOINVOL_POWER:
9185 		PMD(PMD_NOINVOL,
9186 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9187 		    pmf, PM_DEVICE(dip),
9188 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9189 		    req->bpni_volpmd))
9190 		PM_LOCK_DIP(dip);
9191 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9192 		PM_UNLOCK_DIP(dip);
9193 		break;
9194 
9195 	default:
9196 		break;
9197 	}
9198 
9199 }
9200 
9201 #ifdef DEBUG
9202 static int
9203 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9204 {
9205 	PMD_FUNC(pmf, "desc_pwrchk")
9206 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9207 	pm_info_t *info = PM_GET_PM_INFO(dip);
9208 	int i;
9209 	/* LINTED */
9210 	int curpwr, ce_level;
9211 
9212 	if (!info)
9213 		return (DDI_WALK_CONTINUE);
9214 
9215 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9216 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9217 		/* LINTED */
9218 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9219 			continue;
9220 		/* E_FUNC_SET_NOT_USED */
9221 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9222 		    CE_WARN;
9223 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9224 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9225 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9226 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9227 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9228 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9229 	}
9230 	return (DDI_WALK_CONTINUE);
9231 }
9232 #endif
9233 
9234 /*
9235  * Record the fact that one thread is borrowing the lock on a device node.
9236  * Use is restricted to the case where the lending thread will block until
9237  * the borrowing thread (always curthread) completes.
9238  */
9239 void
9240 pm_borrow_lock(kthread_t *lender)
9241 {
9242 	lock_loan_t *prev = &lock_loan_head;
9243 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9244 
9245 	cur->pmlk_borrower = curthread;
9246 	cur->pmlk_lender = lender;
9247 	mutex_enter(&pm_loan_lock);
9248 	cur->pmlk_next = prev->pmlk_next;
9249 	prev->pmlk_next = cur;
9250 	mutex_exit(&pm_loan_lock);
9251 }
9252 
9253 /*
9254  * Return the borrowed lock.  A thread can borrow only one.
9255  */
9256 void
9257 pm_return_lock(void)
9258 {
9259 	lock_loan_t *cur;
9260 	lock_loan_t *prev = &lock_loan_head;
9261 
9262 	mutex_enter(&pm_loan_lock);
9263 	ASSERT(prev->pmlk_next != NULL);
9264 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9265 		if (cur->pmlk_borrower == curthread)
9266 			break;
9267 
9268 	ASSERT(cur != NULL);
9269 	prev->pmlk_next = cur->pmlk_next;
9270 	mutex_exit(&pm_loan_lock);
9271 	kmem_free(cur, sizeof (*cur));
9272 }
9273 
9274 #if defined(__x86)
9275 
9276 #define	CPR_RXR	0x1
9277 #define	CPR_TXR	0x20
9278 #define	CPR_DATAREG	0x3f8
9279 #define	CPR_LSTAT	0x3fd
9280 #define	CPR_INTRCTL	0x3f9
9281 
9282 char
9283 pm_getchar(void)
9284 {
9285 	while ((inb(CPR_LSTAT) & CPR_RXR) != CPR_RXR)
9286 		drv_usecwait(10);
9287 
9288 	return (inb(CPR_DATAREG));
9289 
9290 }
9291 
9292 void
9293 pm_putchar(char c)
9294 {
9295 	while ((inb(CPR_LSTAT) & CPR_TXR) == 0)
9296 		drv_usecwait(10);
9297 
9298 	outb(CPR_DATAREG, c);
9299 }
9300 
9301 void
9302 pm_printf(char *s)
9303 {
9304 	while (*s) {
9305 		pm_putchar(*s++);
9306 	}
9307 }
9308 
9309 #endif
9310 
9311 int
9312 pm_ppm_searchlist(pm_searchargs_t *sp)
9313 {
9314 	power_req_t power_req;
9315 	int result = 0;
9316 	/* LINTED */
9317 	int ret;
9318 
9319 	power_req.request_type = PMR_PPM_SEARCH_LIST;
9320 	power_req.req.ppm_search_list_req.searchlist = sp;
9321 	ASSERT(DEVI(ddi_root_node())->devi_pm_ppm);
9322 	ret = pm_ctlops((dev_info_t *)DEVI(ddi_root_node())->devi_pm_ppm,
9323 	    ddi_root_node(), DDI_CTLOPS_POWER, &power_req, &result);
9324 	PMD(PMD_SX, ("pm_ppm_searchlist returns %d, result %d\n",
9325 	    ret, result))
9326 	return (result);
9327 }
9328