xref: /titanic_52/usr/src/uts/common/os/sunpm.c (revision db2bae3047e71d795bde12e3baa621f4b6cc8930)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * sunpm.c builds sunpm.o	"power management framework"
30  *	kernel-resident power management code.  Implements power management
31  *	policy
32  *	Assumes: all backwards compat. device components wake up on &
33  *		 the pm_info pointer in dev_info is initially NULL
34  *
35  * PM - (device) Power Management
36  *
37  * Each device may have 0 or more components.  If a device has no components,
38  * then it can't be power managed.  Each component has 2 or more
39  * power states.
40  *
41  * "Backwards Compatible" (bc) devices:
42  * There are two different types of devices from the point of view of this
43  * code.  The original type, left over from the original PM implementation on
44  * the voyager platform are known in this code as "backwards compatible"
45  * devices (PM_ISBC(dip) returns true).
46  * They are recognized by the pm code by the lack of a pm-components property
47  * and a call made by the driver to pm_create_components(9F).
48  * For these devices, component 0 is special, and represents the power state
49  * of the device.  If component 0 is to be set to power level 0 (off), then
50  * the framework must first call into the driver's detach(9E) routine with
51  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
52  * After setting component 0 from 0 to a non-zero power level, a call must be
53  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
54  *
55  * Currently, the only way to get a bc device power managed is via a set of
56  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
57  *
58  * For non-bc devices, the driver describes the components by exporting a
59  * pm-components(9P) property that tells how many components there are,
60  * tells what each component's power state values are, and provides human
61  * readable strings (currently unused) for each component name and power state.
62  * Devices which export pm-components(9P) are automatically power managed
63  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
64  * after parsing power.conf(4)). The exception to this rule is that power
65  * manageable CPU devices may be automatically managed independently of autopm
66  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
67  * ioctls) cpupm. If the CPU devices are not managed independently, then they
68  * are managed by autopm. In either case, for automatically power managed
69  * devices, all components are considered independent of each other, and it is
70  * up to the driver to decide when a transition requires saving or restoring
71  * hardware state.
72  *
73  * Each device component also has a threshold time associated with each power
74  * transition (see power.conf(4)), and a busy/idle state maintained by the
75  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
76  * Components are created idle.
77  *
78  * The PM framework provides several functions:
79  * -implement PM policy as described in power.conf(4)
80  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
81  *  Policies consist of:
82  *    -set threshold values (defaults if none provided by pmconfig)
83  *    -set dependencies among devices
84  *    -enable/disable autopm
85  *    -enable/disable cpupm
86  *    -turn down idle components based on thresholds (if autopm or cpupm is
87  *     enabled) (aka scanning)
88  *    -maintain power states based on dependencies among devices
89  *    -upon request, or when the frame buffer powers off, attempt to turn off
90  *     all components that are idle or become idle over the next (10 sec)
91  *     period in an attempt to get down to an EnergyStar compliant state
92  *    -prevent powering off of a device which exported the
93  *     pm-no-involuntary-power-cycles property without active involvement of
94  *     the device's driver (so no removing power when the device driver is
95  *     not attached)
96  * -provide a mechanism for a device driver to request that a device's component
97  *  be brought back to the power level necessary for the use of the device
98  * -allow a process to directly control the power levels of device components
99  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
100  * -ensure that the console frame buffer is powered up before being referenced
101  *  via prom_printf() or other prom calls that might generate console output
102  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
103  * -provide "backwards compatible" behavior for devices without pm-components
104  *  property
105  *
106  * Scanning:
107  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
108  * component of each managed device to its lowest power based on the threshold
109  * of idleness associated with each transition and the busy/idle state of the
110  * component.
111  *
112  * The actual work of this is done by pm_scan_dev(), which cycles through each
113  * component of a device, checking its idleness against its current threshold,
114  * and calling pm_set_power() as appropriate to change the power level.
115  * This function also indicates when it would next be profitable to scan the
116  * device again, and a new scan is scheduled after that time.
117  *
118  * Dependencies:
119  * It is possible to establish a dependency between the power states of two
120  * otherwise unrelated devices.  This is currently done to ensure that the
121  * cdrom is always up whenever the console framebuffer is up, so that the user
122  * can insert a cdrom and see a popup as a result.
123  *
124  * The dependency terminology used in power.conf(4) is not easy to understand,
125  * so we've adopted a different terminology in the implementation.  We write
126  * of a "keeps up" and a "kept up" device.  A relationship can be established
127  * where one device keeps up another.  That means that if the keepsup device
128  * has any component that is at a non-zero power level, all components of the
129  * "kept up" device must be brought to full power.  This relationship is
130  * asynchronous.  When the keeping device is powered up, a request is queued
131  * to a worker thread to bring up the kept device.  The caller does not wait.
132  * Scan will not turn down a kept up device.
133  *
134  * Direct PM:
135  * A device may be directly power managed by a process.  If a device is
136  * directly pm'd, then it will not be scanned, and dependencies will not be
137  * enforced.  * If a directly pm'd device's driver requests a power change (via
138  * pm_raise_power(9F)), then the request is blocked and notification is sent
139  * to the controlling process, which must issue the requested power change for
140  * the driver to proceed.
141  *
142  */
143 
144 #include <sys/types.h>
145 #include <sys/errno.h>
146 #include <sys/callb.h>		/* callback registration during CPR */
147 #include <sys/conf.h>		/* driver flags and functions */
148 #include <sys/open.h>		/* OTYP_CHR definition */
149 #include <sys/stat.h>		/* S_IFCHR definition */
150 #include <sys/pathname.h>	/* name -> dev_info xlation */
151 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
152 #include <sys/kmem.h>		/* memory alloc stuff */
153 #include <sys/debug.h>
154 #include <sys/archsystm.h>
155 #include <sys/pm.h>
156 #include <sys/ddi.h>
157 #include <sys/sunddi.h>
158 #include <sys/sunndi.h>
159 #include <sys/sunpm.h>
160 #include <sys/epm.h>
161 #include <sys/vfs.h>
162 #include <sys/mode.h>
163 #include <sys/mkdev.h>
164 #include <sys/promif.h>
165 #include <sys/consdev.h>
166 #include <sys/esunddi.h>
167 #include <sys/modctl.h>
168 #include <sys/fs/ufs_fs.h>
169 #include <sys/note.h>
170 #include <sys/taskq.h>
171 #include <sys/bootconf.h>
172 #include <sys/reboot.h>
173 #include <sys/spl.h>
174 #include <sys/disp.h>
175 #include <sys/sobject.h>
176 #include <sys/sunmdi.h>
177 #include <sys/systm.h>
178 #include <sys/cpuvar.h>
179 #include <sys/cyclic.h>
180 #include <sys/uadmin.h>
181 #include <sys/srn.h>
182 
183 
184 /*
185  * PM LOCKING
186  *	The list of locks:
187  * Global pm mutex locks.
188  *
189  * pm_scan_lock:
190  *		It protects the timeout id of the scan thread, and the value
191  *		of autopm_enabled and cpupm.  This lock is not held
192  *		concurrently with any other PM locks.
193  *
194  * pm_clone_lock:	Protects the clone list and count of poll events
195  *		pending for the pm driver.
196  *		Lock ordering:
197  *			pm_clone_lock -> pm_pscc_interest_rwlock,
198  *			pm_clone_lock -> pm_pscc_direct_rwlock.
199  *
200  * pm_rsvp_lock:
201  *		Used to synchronize the data structures used for processes
202  *		to rendezvous with state change information when doing
203  *		direct PM.
204  *		Lock ordering:
205  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
206  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
207  *			pm_rsvp_lock -> pm_clone_lock.
208  *
209  * ppm_lock:	protects the list of registered ppm drivers
210  *		Lock ordering:
211  *			ppm_lock -> ppm driver unit_lock
212  *
213  * pm_compcnt_lock:
214  *		Protects count of components that are not at their lowest
215  *		power level.
216  *		Lock ordering:
217  *			pm_compcnt_lock -> ppm_lock.
218  *
219  * pm_dep_thread_lock:
220  *		Protects work list for pm_dep_thread.  Not taken concurrently
221  *		with any other pm lock.
222  *
223  * pm_remdrv_lock:
224  *		Serializes the operation of removing noinvol data structure
225  *		entries for a branch of the tree when a driver has been
226  *		removed from the system (modctl_rem_major).
227  *		Lock ordering:
228  *			pm_remdrv_lock -> pm_noinvol_rwlock.
229  *
230  * pm_cfb_lock: (High level spin lock)
231  *		Protects the count of how many components of the console
232  *		frame buffer are off (so we know if we have to bring up the
233  *		console as a result of a prom_printf, etc.
234  *		No other locks are taken while holding this lock.
235  *
236  * pm_loan_lock:
237  *		Protects the lock_loan list.  List is used to record that one
238  *		thread has acquired a power lock but has launched another thread
239  *		to complete its processing.  An entry in the list indicates that
240  *		the worker thread can borrow the lock held by the other thread,
241  *		which must block on the completion of the worker.  Use is
242  *		specific to module loading.
243  *		No other locks are taken while holding this lock.
244  *
245  * Global PM rwlocks
246  *
247  * pm_thresh_rwlock:
248  *		Protects the list of thresholds recorded for future use (when
249  *		devices attach).
250  *		Lock ordering:
251  *			pm_thresh_rwlock -> devi_pm_lock
252  *
253  * pm_noinvol_rwlock:
254  *		Protects list of detached nodes that had noinvol registered.
255  *		No other PM locks are taken while holding pm_noinvol_rwlock.
256  *
257  * pm_pscc_direct_rwlock:
258  *		Protects the list that maps devices being directly power
259  *		managed to the processes that manage them.
260  *		Lock ordering:
261  *			pm_pscc_direct_rwlock -> psce_lock
262  *
263  * pm_pscc_interest_rwlock;
264  *		Protects the list that maps state change events to processes
265  *		that want to know about them.
266  *		Lock ordering:
267  *			pm_pscc_interest_rwlock -> psce_lock
268  *
269  * per-dip locks:
270  *
271  * Each node has these per-dip locks, which are only used if the device is
272  * a candidate for power management (e.g. has pm components)
273  *
274  * devi_pm_lock:
275  *		Protects all power management state of the node except for
276  *		power level, which is protected by ndi_devi_enter().
277  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
278  *		Lock ordering:
279  *			devi_pm_lock -> pm_rsvp_lock,
280  *			devi_pm_lock -> pm_dep_thread_lock,
281  *			devi_pm_lock -> pm_noinvol_rwlock,
282  *			devi_pm_lock -> power lock
283  *
284  * power lock (ndi_devi_enter()):
285  *		Since changing power level is possibly a slow operation (30
286  *		seconds to spin up a disk drive), this is locked separately.
287  *		Since a call into the driver to change the power level of one
288  *		component may result in a call back into the framework to change
289  *		the power level of another, this lock allows re-entrancy by
290  *		the same thread (ndi_devi_enter is used for this because
291  *		the USB framework uses ndi_devi_enter in its power entry point,
292  *		and use of any other lock would produce a deadlock.
293  *
294  * devi_pm_busy_lock:
295  *		This lock protects the integrity of the busy count.  It is
296  *		only taken by pm_busy_component() and pm_idle_component and
297  *		some code that adjust the busy time after the timer gets set
298  *		up or after a CPR operation.  It is per-dip to keep from
299  *		single-threading all the disk drivers on a system.
300  *		It could be per component instead, but most devices have
301  *		only one component.
302  *		No other PM locks are taken while holding this lock.
303  *
304  */
305 
306 static int stdout_is_framebuffer;
307 static kmutex_t	e_pm_power_lock;
308 static kmutex_t pm_loan_lock;
309 kmutex_t	pm_scan_lock;
310 callb_id_t	pm_cpr_cb_id;
311 callb_id_t	pm_panic_cb_id;
312 callb_id_t	pm_halt_cb_id;
313 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
314 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
315 
316 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
317 clock_t pm_default_min_scan = PM_DEFAULT_MIN_SCAN;
318 clock_t pm_cpu_min_scan = PM_CPU_MIN_SCAN;
319 
320 #define	PM_MIN_SCAN(dip)	(PM_ISCPU(dip) ? pm_cpu_min_scan : \
321 				    pm_default_min_scan)
322 
323 static int pm_busop_set_power(dev_info_t *,
324     void *, pm_bus_power_op_t, void *, void *);
325 static int pm_busop_match_request(dev_info_t *, void *);
326 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
327 static void e_pm_set_max_power(dev_info_t *, int, int);
328 static int e_pm_get_max_power(dev_info_t *, int);
329 
330 /*
331  * Dependency Processing is done thru a seperate thread.
332  */
333 kmutex_t	pm_dep_thread_lock;
334 kcondvar_t	pm_dep_thread_cv;
335 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
336 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
337 
338 /*
339  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
340  * power managing things in single user mode that have been suppressed via
341  * power.conf entries.  Protected by pm_scan_lock.
342  */
343 int		autopm_enabled;
344 
345 /*
346  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
347  * to define the power management behavior of CPU devices separate from
348  * autopm. Protected by pm_scan_lock.
349  */
350 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
351 
352 /*
353  * AutoS3 depends on autopm being enabled, and must be enabled by
354  * PM_START_AUTOS3 command.
355  */
356 int		autoS3_enabled;
357 
358 #if !defined(__sparc)
359 /*
360  * on sparc these live in fillsysinfo.c
361  *
362  * If this variable is non-zero, cpr should return "not supported" when
363  * it is queried even though it would normally be supported on this platform.
364  */
365 int cpr_supported_override;
366 
367 /*
368  * Some platforms may need to support CPR even in the absence of
369  * having the correct platform id information.  If this
370  * variable is non-zero, cpr should proceed even in the absence
371  * of otherwise being qualified.
372  */
373 int cpr_platform_enable = 0;
374 
375 #endif
376 
377 /*
378  * pm_S3_enabled indicates that we believe the platform can support S3,
379  * which we get from pmconfig(1M)
380  */
381 int		pm_S3_enabled;
382 
383 /*
384  * This flag is true while processes are stopped for a checkpoint/resume.
385  * Controlling processes of direct pm'd devices are not available to
386  * participate in power level changes, so we bypass them when this is set.
387  */
388 static int	pm_processes_stopped;
389 
390 #ifdef	DEBUG
391 
392 /*
393  * see common/sys/epm.h for PMD_* values
394  */
395 
396 uint_t		pm_debug = 0;
397 
398 /*
399  * If pm_divertdebug is set, then no prom_printf calls will be made by
400  * PMD(), which will prevent debug output from bringing up the console
401  * frame buffer.  Clearing this variable before setting pm_debug will result
402  * in PMD output going to the console.
403  *
404  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
405  * deadlocks and decremented at the end of pm_set_power()
406  */
407 uint_t		pm_divertdebug = 1;
408 volatile uint_t pm_debug_to_console = 0;
409 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
410 
411 void prdeps(char *);
412 #endif
413 
414 /* Globals */
415 
416 /*
417  * List of recorded thresholds and dependencies
418  */
419 pm_thresh_rec_t *pm_thresh_head;
420 krwlock_t pm_thresh_rwlock;
421 
422 pm_pdr_t *pm_dep_head;
423 static int pm_unresolved_deps = 0;
424 static int pm_prop_deps = 0;
425 
426 /*
427  * List of devices that exported no-involuntary-power-cycles property
428  */
429 pm_noinvol_t *pm_noinvol_head;
430 
431 /*
432  * Locks used in noinvol processing
433  */
434 krwlock_t pm_noinvol_rwlock;
435 kmutex_t pm_remdrv_lock;
436 
437 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
438 int pm_system_idle_threshold;
439 int pm_cpu_idle_threshold;
440 
441 /*
442  * By default nexus has 0 threshold, and depends on its children to keep it up
443  */
444 int pm_default_nexus_threshold = 0;
445 
446 /*
447  * Data structures shared with common/io/pm.c
448  */
449 kmutex_t	pm_clone_lock;
450 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
451 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
452 unsigned char	pm_interest[PM_MAX_CLONE];
453 struct pollhead	pm_pollhead;
454 
455 /*
456  * Data structures shared with common/io/srn.c
457  */
458 kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
459 void (*srn_signal)(int type, int event);
460 int srn_inuse;				/* stop srn detach */
461 
462 extern int	hz;
463 extern char	*platform_module_list[];
464 
465 /*
466  * Wrappers for use in ddi_walk_devs
467  */
468 
469 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
470 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
471 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
472 static int		pm_discard_dep_walk(dev_info_t *, void *);
473 #ifdef DEBUG
474 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
475 #endif
476 
477 /*
478  * Routines for managing noinvol devices
479  */
480 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
481 void			pm_noinvol_update_node(dev_info_t *,
482 			    pm_bp_noinvol_t *req);
483 
484 kmutex_t pm_rsvp_lock;
485 kmutex_t pm_compcnt_lock;
486 krwlock_t pm_pscc_direct_rwlock;
487 krwlock_t pm_pscc_interest_rwlock;
488 
489 #define	PSC_INTEREST	0	/* belongs to interest psc list */
490 #define	PSC_DIRECT	1	/* belongs to direct psc list */
491 
492 pscc_t *pm_pscc_interest;
493 pscc_t *pm_pscc_direct;
494 
495 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
496 #define	PM_IS_NEXUS(dip) NEXUS_DRV(devopsp[PM_MAJOR(dip)])
497 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
498 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
499 
500 #define	PM_INCR_NOTLOWEST(dip) {					\
501 	mutex_enter(&pm_compcnt_lock);					\
502 	if (!PM_IS_NEXUS(dip) ||					\
503 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
504 		if (pm_comps_notlowest == 0)				\
505 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
506 		pm_comps_notlowest++;					\
507 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
508 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
509 	}								\
510 	mutex_exit(&pm_compcnt_lock);					\
511 }
512 #define	PM_DECR_NOTLOWEST(dip) {					\
513 	mutex_enter(&pm_compcnt_lock);					\
514 	if (!PM_IS_NEXUS(dip) ||					\
515 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
516 		ASSERT(pm_comps_notlowest);				\
517 		pm_comps_notlowest--;					\
518 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
519 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
520 		if (pm_comps_notlowest == 0)				\
521 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
522 	}								\
523 	mutex_exit(&pm_compcnt_lock);					\
524 }
525 
526 /*
527  * console frame-buffer power-management is not enabled when
528  * debugging services are present.  to override, set pm_cfb_override
529  * to non-zero.
530  */
531 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
532 kmutex_t pm_cfb_lock;
533 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
534 #ifdef DEBUG
535 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
536 #else
537 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
538 #endif
539 
540 static dev_info_t *cfb_dip = 0;
541 static dev_info_t *cfb_dip_detaching = 0;
542 uint_t cfb_inuse = 0;
543 static ddi_softintr_t pm_soft_id;
544 static clock_t pm_soft_pending;
545 int	pm_scans_disabled = 0;
546 
547 /*
548  * A structure to record the fact that one thread has borrowed a lock held
549  * by another thread.  The context requires that the lender block on the
550  * completion of the borrower.
551  */
552 typedef struct lock_loan {
553 	struct lock_loan	*pmlk_next;
554 	kthread_t		*pmlk_borrower;
555 	kthread_t		*pmlk_lender;
556 	dev_info_t		*pmlk_dip;
557 } lock_loan_t;
558 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
559 
560 #ifdef	DEBUG
561 #ifdef	PMDDEBUG
562 #define	PMD_FUNC(func, name)	char *(func) = (name);
563 #else	/* !PMDDEBUG */
564 #define	PMD_FUNC(func, name)
565 #endif	/* PMDDEBUG */
566 #else	/* !DEBUG */
567 #define	PMD_FUNC(func, name)
568 #endif	/* DEBUG */
569 
570 
571 /*
572  * Must be called before first device (including pseudo) attach
573  */
574 void
575 pm_init_locks(void)
576 {
577 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
578 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
579 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
580 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
581 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
582 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
583 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
584 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
585 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
586 }
587 
588 static boolean_t
589 pm_cpr_callb(void *arg, int code)
590 {
591 	_NOTE(ARGUNUSED(arg))
592 	static int auto_save;
593 	static pm_cpupm_t cpupm_save;
594 	static int pm_reset_timestamps(dev_info_t *, void *);
595 
596 	switch (code) {
597 	case CB_CODE_CPR_CHKPT:
598 		/*
599 		 * Cancel scan or wait for scan in progress to finish
600 		 * Other threads may be trying to restart the scan, so we
601 		 * have to keep at it unil it sticks
602 		 */
603 		mutex_enter(&pm_scan_lock);
604 		ASSERT(!pm_scans_disabled);
605 		pm_scans_disabled = 1;
606 		auto_save = autopm_enabled;
607 		autopm_enabled = 0;
608 		cpupm_save = cpupm;
609 		cpupm = PM_CPUPM_NOTSET;
610 		mutex_exit(&pm_scan_lock);
611 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
612 		break;
613 
614 	case CB_CODE_CPR_RESUME:
615 		ASSERT(!autopm_enabled);
616 		ASSERT(cpupm == PM_CPUPM_NOTSET);
617 		ASSERT(pm_scans_disabled);
618 		pm_scans_disabled = 0;
619 		/*
620 		 * Call pm_reset_timestamps to reset timestamps of each
621 		 * device to the time when the system is resumed so that their
622 		 * idleness can be re-calculated. That's to avoid devices from
623 		 * being powered down right after resume if the system was in
624 		 * suspended mode long enough.
625 		 */
626 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
627 
628 		autopm_enabled = auto_save;
629 		cpupm = cpupm_save;
630 		/*
631 		 * If there is any auto-pm device, get the scanning
632 		 * going. Otherwise don't bother.
633 		 */
634 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
635 		break;
636 	}
637 	return (B_TRUE);
638 }
639 
640 /*
641  * This callback routine is called when there is a system panic.  This function
642  * exists for prototype matching.
643  */
644 static boolean_t
645 pm_panic_callb(void *arg, int code)
646 {
647 	_NOTE(ARGUNUSED(arg, code))
648 	void pm_cfb_check_and_powerup(void);
649 	PMD(PMD_CFB, ("pm_panic_callb\n"))
650 	pm_cfb_check_and_powerup();
651 	return (B_TRUE);
652 }
653 
654 static boolean_t
655 pm_halt_callb(void *arg, int code)
656 {
657 	_NOTE(ARGUNUSED(arg, code))
658 	return (B_TRUE);
659 }
660 
661 /*
662  * This needs to be called after the root and platform drivers are loaded
663  * and be single-threaded with respect to driver attach/detach
664  */
665 void
666 pm_init(void)
667 {
668 	PMD_FUNC(pmf, "pm_init")
669 	char **mod;
670 	extern pri_t minclsyspri;
671 	static void pm_dep_thread(void);
672 
673 	pm_comps_notlowest = 0;
674 	pm_system_idle_threshold = pm_default_idle_threshold;
675 	pm_cpu_idle_threshold = 0;
676 
677 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
678 	    CB_CL_CPR_PM, "pm_cpr");
679 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
680 	    CB_CL_PANIC, "pm_panic");
681 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
682 	    CB_CL_HALT, "pm_halt");
683 
684 	/*
685 	 * Create a thread to do dependency processing.
686 	 */
687 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
688 	    TS_RUN, minclsyspri);
689 
690 	/*
691 	 * loadrootmodules already loaded these ppm drivers, now get them
692 	 * attached so they can claim the root drivers as they attach
693 	 */
694 	for (mod = platform_module_list; *mod; mod++) {
695 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
696 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
697 			    *mod);
698 		} else {
699 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
700 			    ddi_major_to_name(ddi_name_to_major(*mod))))
701 		}
702 	}
703 }
704 
705 /*
706  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
707  * enabled) when device becomes power managed or after a failed detach and
708  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
709  * a CPR resume to get all the devices scanning again.
710  */
711 void
712 pm_scan_init(dev_info_t *dip)
713 {
714 	PMD_FUNC(pmf, "scan_init")
715 	pm_scan_t	*scanp;
716 
717 	ASSERT(!PM_ISBC(dip));
718 
719 	PM_LOCK_DIP(dip);
720 	scanp = PM_GET_PM_SCAN(dip);
721 	if (!scanp) {
722 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
723 		    pmf, PM_DEVICE(dip)))
724 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
725 		DEVI(dip)->devi_pm_scan = scanp;
726 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
727 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
728 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
729 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
730 	}
731 	PM_UNLOCK_DIP(dip);
732 }
733 
734 /*
735  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
736  */
737 void
738 pm_scan_fini(dev_info_t *dip)
739 {
740 	PMD_FUNC(pmf, "scan_fini")
741 	pm_scan_t	*scanp;
742 
743 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
744 	ASSERT(!PM_ISBC(dip));
745 	PM_LOCK_DIP(dip);
746 	scanp = PM_GET_PM_SCAN(dip);
747 	if (!scanp) {
748 		PM_UNLOCK_DIP(dip);
749 		return;
750 	}
751 
752 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
753 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
754 
755 	kmem_free(scanp, sizeof (pm_scan_t));
756 	DEVI(dip)->devi_pm_scan = NULL;
757 	PM_UNLOCK_DIP(dip);
758 }
759 
760 /*
761  * Given a pointer to a component struct, return the current power level
762  * (struct contains index unless it is a continuous level).
763  * Located here in hopes of getting both this and dev_is_needed into the
764  * cache together
765  */
766 static int
767 cur_power(pm_component_t *cp)
768 {
769 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
770 		return (cp->pmc_cur_pwr);
771 
772 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
773 }
774 
775 static char *
776 pm_decode_direction(int direction)
777 {
778 	switch (direction) {
779 	case PM_LEVEL_UPONLY:
780 		return ("up");
781 
782 	case PM_LEVEL_EXACT:
783 		return ("exact");
784 
785 	case PM_LEVEL_DOWNONLY:
786 		return ("down");
787 
788 	default:
789 		return ("INVALID DIRECTION");
790 	}
791 }
792 
793 char *
794 pm_decode_op(pm_bus_power_op_t op)
795 {
796 	switch (op) {
797 	case BUS_POWER_CHILD_PWRCHG:
798 		return ("CHILD_PWRCHG");
799 	case BUS_POWER_NEXUS_PWRUP:
800 		return ("NEXUS_PWRUP");
801 	case BUS_POWER_PRE_NOTIFICATION:
802 		return ("PRE_NOTIFICATION");
803 	case BUS_POWER_POST_NOTIFICATION:
804 		return ("POST_NOTIFICATION");
805 	case BUS_POWER_HAS_CHANGED:
806 		return ("HAS_CHANGED");
807 	case BUS_POWER_NOINVOL:
808 		return ("NOINVOL");
809 	default:
810 		return ("UNKNOWN OP");
811 	}
812 }
813 
814 /*
815  * Returns true if level is a possible (valid) power level for component
816  */
817 int
818 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
819 {
820 	PMD_FUNC(pmf, "e_pm_valid_power")
821 	pm_component_t *cp = PM_CP(dip, cmpt);
822 	int i;
823 	int *ip = cp->pmc_comp.pmc_lvals;
824 	int limit = cp->pmc_comp.pmc_numlevels;
825 
826 	if (level < 0)
827 		return (0);
828 	for (i = 0; i < limit; i++) {
829 		if (level == *ip++)
830 			return (1);
831 	}
832 #ifdef DEBUG
833 	if (pm_debug & PMD_FAIL) {
834 		ip = cp->pmc_comp.pmc_lvals;
835 
836 		for (i = 0; i < limit; i++)
837 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
838 			    pmf, i, *ip++))
839 	}
840 #endif
841 	return (0);
842 }
843 
844 /*
845  * Returns true if device is pm'd (after calling pm_start if need be)
846  */
847 int
848 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
849 {
850 	pm_info_t *info;
851 	static int pm_start(dev_info_t *dip);
852 
853 	/*
854 	 * Check if the device is power managed if not.
855 	 * To make the common case (device is power managed already)
856 	 * fast, we check without the lock.  If device is not already
857 	 * power managed, then we take the lock and the long route through
858 	 * go get it managed.  Devices never go unmanaged until they
859 	 * detach.
860 	 */
861 	info = PM_GET_PM_INFO(dip);
862 	if (!info) {
863 		if (!DEVI_IS_ATTACHING(dip)) {
864 			return (0);
865 		}
866 		if (pm_start(dip) != DDI_SUCCESS) {
867 			return (0);
868 		}
869 		info = PM_GET_PM_INFO(dip);
870 	}
871 	ASSERT(info);
872 	if (infop != NULL)
873 		*infop = info;
874 	return (1);
875 }
876 
877 int
878 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
879 {
880 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
881 		if (cpp != NULL)
882 			*cpp = PM_CP(dip, cmpt);
883 		return (1);
884 	} else {
885 		return (0);
886 	}
887 }
888 
889 /*
890  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
891  */
892 static int
893 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
894 {
895 	PMD_FUNC(pmf, "din")
896 	pm_component_t *cp;
897 	char *pathbuf;
898 	int result;
899 
900 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
901 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
902 	    !e_pm_valid_power(dip, cmpt, level))
903 		return (DDI_FAILURE);
904 
905 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
906 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
907 	    level, cur_power(cp)))
908 
909 	if (pm_set_power(dip, cmpt, level,  direction,
910 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
911 		if (direction == PM_LEVEL_UPONLY) {
912 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
913 			(void) ddi_pathname(dip, pathbuf);
914 			cmn_err(CE_WARN, "Device %s failed to power up.",
915 			    pathbuf);
916 			kmem_free(pathbuf, MAXPATHLEN);
917 		}
918 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
919 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
920 		    pm_decode_direction(direction), level, result))
921 		return (DDI_FAILURE);
922 	}
923 
924 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
925 	    PM_DEVICE(dip)))
926 	pm_rescan(dip);
927 	return (DDI_SUCCESS);
928 }
929 
930 /*
931  * We can get multiple pm_rescan() threads, if one of them discovers
932  * that no scan is running at the moment, it kicks it into action.
933  * Otherwise, it tells the current scanning thread to scan again when
934  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
935  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
936  * thread at a time runs the pm_scan_dev() code.
937  */
938 void
939 pm_rescan(void *arg)
940 {
941 	PMD_FUNC(pmf, "rescan")
942 	dev_info_t	*dip = (dev_info_t *)arg;
943 	pm_info_t	*info;
944 	pm_scan_t	*scanp;
945 	timeout_id_t	scanid;
946 
947 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
948 	PM_LOCK_DIP(dip);
949 	info = PM_GET_PM_INFO(dip);
950 	scanp = PM_GET_PM_SCAN(dip);
951 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
952 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
953 		PM_UNLOCK_DIP(dip);
954 		return;
955 	}
956 	if (scanp->ps_scan_flags & PM_SCANNING) {
957 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
958 		PM_UNLOCK_DIP(dip);
959 		return;
960 	} else if (scanp->ps_scan_id) {
961 		scanid = scanp->ps_scan_id;
962 		scanp->ps_scan_id = 0;
963 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
964 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
965 		PM_UNLOCK_DIP(dip);
966 		(void) untimeout(scanid);
967 		PM_LOCK_DIP(dip);
968 	}
969 
970 	/*
971 	 * Dispatching pm_scan during attach time is risky due to the fact that
972 	 * attach might soon fail and dip dissolved, and panic may happen while
973 	 * attempting to stop scan. So schedule a pm_rescan instead.
974 	 * (Note that if either of the first two terms are true, taskq_dispatch
975 	 * will not be invoked).
976 	 *
977 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
978 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
979 	 * to regulate the dispatching.
980 	 *
981 	 * Scan is stopped before the device is detached (in pm_detaching())
982 	 * but it may get re-started during the post_detach processing if the
983 	 * driver fails to detach.
984 	 */
985 	if (DEVI_IS_ATTACHING(dip) ||
986 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
987 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
988 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
989 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
990 		if (scanp->ps_scan_id) {
991 			scanid = scanp->ps_scan_id;
992 			scanp->ps_scan_id = 0;
993 			PM_UNLOCK_DIP(dip);
994 			(void) untimeout(scanid);
995 			PM_LOCK_DIP(dip);
996 			if (scanp->ps_scan_id) {
997 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
998 				    "thread scheduled pm_rescan, scanid %lx\n",
999 				    pmf, PM_DEVICE(dip),
1000 				    (ulong_t)scanp->ps_scan_id))
1001 				PM_UNLOCK_DIP(dip);
1002 				return;
1003 			}
1004 		}
1005 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1006 		    (scanp->ps_idle_down ? pm_id_ticks :
1007 		    (PM_MIN_SCAN(dip) * hz)));
1008 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1009 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1010 		    (ulong_t)scanp->ps_scan_id))
1011 	} else {
1012 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1013 		    pmf, PM_DEVICE(dip)))
1014 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1015 	}
1016 	PM_UNLOCK_DIP(dip);
1017 }
1018 
1019 void
1020 pm_scan(void *arg)
1021 {
1022 	PMD_FUNC(pmf, "scan")
1023 	dev_info_t	*dip = (dev_info_t *)arg;
1024 	pm_scan_t	*scanp;
1025 	time_t		nextscan;
1026 
1027 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1028 
1029 	PM_LOCK_DIP(dip);
1030 	scanp = PM_GET_PM_SCAN(dip);
1031 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1032 
1033 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1034 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1035 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1036 		PM_UNLOCK_DIP(dip);
1037 		return;
1038 	}
1039 
1040 	if (scanp->ps_idle_down) {
1041 		/*
1042 		 * make sure we remember idledown was in affect until
1043 		 * we've completed the scan
1044 		 */
1045 		PMID_SET_SCANS(scanp->ps_idle_down)
1046 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1047 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1048 	}
1049 
1050 	/* possible having two threads running pm_scan() */
1051 	if (scanp->ps_scan_flags & PM_SCANNING) {
1052 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1053 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1054 		    pmf, PM_DEVICE(dip)))
1055 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1056 		PM_UNLOCK_DIP(dip);
1057 		return;
1058 	}
1059 
1060 	scanp->ps_scan_flags |= PM_SCANNING;
1061 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1062 	do {
1063 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1064 		PM_UNLOCK_DIP(dip);
1065 		nextscan = pm_scan_dev(dip);
1066 		PM_LOCK_DIP(dip);
1067 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1068 
1069 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1070 	scanp->ps_scan_flags &= ~PM_SCANNING;
1071 
1072 	if (scanp->ps_idle_down) {
1073 		scanp->ps_idle_down &= ~PMID_SCANS;
1074 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1075 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1076 	}
1077 
1078 	/* schedule for next idle check */
1079 	if (nextscan != LONG_MAX) {
1080 		if (nextscan > (LONG_MAX / hz))
1081 			nextscan = (LONG_MAX - 1) / hz;
1082 		if (scanp->ps_scan_id) {
1083 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1084 			    "another rescan scheduled scanid(%lx)\n", pmf,
1085 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1086 			PM_UNLOCK_DIP(dip);
1087 			return;
1088 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1089 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1090 			    (clock_t)(nextscan * hz));
1091 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1092 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1093 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1094 		}
1095 	}
1096 	PM_UNLOCK_DIP(dip);
1097 }
1098 
1099 void
1100 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1101 {
1102 	int components = PM_NUMCMPTS(dip);
1103 	int i;
1104 
1105 	ASSERT(components > 0);
1106 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1107 	for (i = 0; i < components; i++) {
1108 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1109 	}
1110 	PM_UNLOCK_BUSY(dip);
1111 }
1112 
1113 /*
1114  * Returns true if device needs to be kept up because it exported the
1115  * "no-involuntary-power-cycles" property or we're pretending it did (console
1116  * fb case) or it is an ancestor of such a device and has used up the "one
1117  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1118  * upon detach
1119  */
1120 int
1121 pm_noinvol(dev_info_t *dip)
1122 {
1123 	PMD_FUNC(pmf, "noinvol")
1124 
1125 	/*
1126 	 * This doesn't change over the life of a driver, so no locking needed
1127 	 */
1128 	if (PM_IS_CFB(dip)) {
1129 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1130 		    pmf, PM_DEVICE(dip)))
1131 		return (1);
1132 	}
1133 	/*
1134 	 * Not an issue if no such kids
1135 	 */
1136 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1137 #ifdef DEBUG
1138 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1139 			dev_info_t *pdip = dip;
1140 			do {
1141 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1142 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1143 				    DEVI(pdip)->devi_pm_noinvolpm,
1144 				    DEVI(pdip)->devi_pm_volpmd))
1145 				pdip = ddi_get_parent(pdip);
1146 			} while (pdip);
1147 		}
1148 #endif
1149 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1150 		return (0);
1151 	}
1152 
1153 	/*
1154 	 * Since we now maintain the counts correct at every node, we no longer
1155 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1156 	 * without the children getting their counts adjusted.
1157 	 */
1158 
1159 #ifdef	DEBUG
1160 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1161 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1162 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1163 		    PM_DEVICE(dip)))
1164 #endif
1165 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1166 }
1167 
1168 /*
1169  * This function performs the actual scanning of the device.
1170  * It attempts to power off the indicated device's components if they have
1171  * been idle and other restrictions are met.
1172  * pm_scan_dev calculates and returns when the next scan should happen for
1173  * this device.
1174  */
1175 time_t
1176 pm_scan_dev(dev_info_t *dip)
1177 {
1178 	PMD_FUNC(pmf, "scan_dev")
1179 	pm_scan_t	*scanp;
1180 	time_t		*timestamp, idletime, now, thresh;
1181 	time_t		timeleft = 0;
1182 #ifdef PMDDEBUG
1183 	int		curpwr;
1184 #endif
1185 	int		i, nxtpwr, pwrndx, unused;
1186 	size_t		size;
1187 	pm_component_t	 *cp;
1188 	dev_info_t	*pdip = ddi_get_parent(dip);
1189 	int		circ;
1190 	static int	cur_threshold(dev_info_t *, int);
1191 	static int	pm_next_lower_power(pm_component_t *, int);
1192 	clock_t		min_scan = pm_default_min_scan;
1193 
1194 	/*
1195 	 * skip attaching device
1196 	 */
1197 	if (DEVI_IS_ATTACHING(dip)) {
1198 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1199 		    pmf, PM_DEVICE(dip), min_scan))
1200 		return (min_scan);
1201 	}
1202 
1203 	PM_LOCK_DIP(dip);
1204 	scanp = PM_GET_PM_SCAN(dip);
1205 	min_scan = PM_MIN_SCAN(dip);
1206 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1207 
1208 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1209 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1210 	    PM_KUC(dip)))
1211 
1212 	/* no scan under the following conditions */
1213 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1214 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1215 	    (PM_KUC(dip) != 0) ||
1216 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1217 		PM_UNLOCK_DIP(dip);
1218 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1219 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1220 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1221 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1222 		    cpupm, PM_KUC(dip),
1223 		    PM_ISDIRECT(dip) ? "is" : "is not",
1224 		    pm_noinvol(dip) ? "is" : "is not"))
1225 		return (LONG_MAX);
1226 	}
1227 	PM_UNLOCK_DIP(dip);
1228 
1229 	if (!ndi_devi_tryenter(pdip, &circ)) {
1230 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1231 		    pmf, PM_DEVICE(pdip)))
1232 		return ((time_t)1);
1233 	}
1234 	now = gethrestime_sec();
1235 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1236 	timestamp = kmem_alloc(size, KM_SLEEP);
1237 	pm_get_timestamps(dip, timestamp);
1238 
1239 	/*
1240 	 * Since we removed support for backwards compatible devices,
1241 	 * (see big comment at top of file)
1242 	 * it is no longer required to deal with component 0 last.
1243 	 */
1244 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1245 		/*
1246 		 * If already off (an optimization, perhaps)
1247 		 */
1248 		cp = PM_CP(dip, i);
1249 		pwrndx = cp->pmc_cur_pwr;
1250 #ifdef PMDDEBUG
1251 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1252 		    PM_LEVEL_UNKNOWN :
1253 		    cp->pmc_comp.pmc_lvals[pwrndx];
1254 #endif
1255 
1256 		if (pwrndx == 0) {
1257 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1258 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1259 			/* skip device if off or at its lowest */
1260 			continue;
1261 		}
1262 
1263 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1264 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1265 			/* were busy or newly became busy by another thread */
1266 			if (timeleft == 0)
1267 				timeleft = max(thresh, min_scan);
1268 			else
1269 				timeleft = min(
1270 				    timeleft, max(thresh, min_scan));
1271 			continue;
1272 		}
1273 
1274 		idletime = now - timestamp[i];		/* idle time */
1275 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1276 		    pmf, PM_DEVICE(dip), i, idletime))
1277 		if (idletime >= thresh || PM_IS_PID(dip)) {
1278 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1279 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1280 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1281 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1282 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1283 			    PM_CURPOWER(dip, i) != nxtpwr) {
1284 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1285 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1286 				    i, curpwr, nxtpwr))
1287 				timeleft = min_scan;
1288 				continue;
1289 			} else {
1290 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1291 				    "%d->%d, GOOD curpwr %d\n", pmf,
1292 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1293 				    cur_power(cp)))
1294 
1295 				if (nxtpwr == 0)	/* component went off */
1296 					continue;
1297 
1298 				/*
1299 				 * scan to next lower level
1300 				 */
1301 				if (timeleft == 0)
1302 					timeleft = max(
1303 					    1, cur_threshold(dip, i));
1304 				else
1305 					timeleft = min(timeleft,
1306 					    max(1, cur_threshold(dip, i)));
1307 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1308 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1309 				    i, timeleft))
1310 			}
1311 		} else {	/* comp not idle long enough */
1312 			if (timeleft == 0)
1313 				timeleft = thresh - idletime;
1314 			else
1315 				timeleft = min(timeleft, (thresh - idletime));
1316 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1317 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1318 		}
1319 	}
1320 	ndi_devi_exit(pdip, circ);
1321 	kmem_free(timestamp, size);
1322 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1323 	    PM_DEVICE(dip), timeleft))
1324 
1325 	/*
1326 	 * if components are already at lowest level, timeleft is left 0
1327 	 */
1328 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1329 }
1330 
1331 /*
1332  * pm_scan_stop - cancel scheduled pm_rescan,
1333  *                wait for termination of dispatched pm_scan thread
1334  *                     and active pm_scan_dev thread.
1335  */
1336 void
1337 pm_scan_stop(dev_info_t *dip)
1338 {
1339 	PMD_FUNC(pmf, "scan_stop")
1340 	pm_scan_t	*scanp;
1341 	timeout_id_t	scanid;
1342 
1343 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1344 	PM_LOCK_DIP(dip);
1345 	scanp = PM_GET_PM_SCAN(dip);
1346 	if (!scanp) {
1347 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1348 		    pmf, PM_DEVICE(dip)))
1349 		PM_UNLOCK_DIP(dip);
1350 		return;
1351 	}
1352 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1353 
1354 	/* cancel scheduled scan taskq */
1355 	while (scanp->ps_scan_id) {
1356 		scanid = scanp->ps_scan_id;
1357 		scanp->ps_scan_id = 0;
1358 		PM_UNLOCK_DIP(dip);
1359 		(void) untimeout(scanid);
1360 		PM_LOCK_DIP(dip);
1361 	}
1362 
1363 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1364 		PM_UNLOCK_DIP(dip);
1365 		delay(1);
1366 		PM_LOCK_DIP(dip);
1367 	}
1368 	PM_UNLOCK_DIP(dip);
1369 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1370 }
1371 
1372 int
1373 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1374 {
1375 	_NOTE(ARGUNUSED(arg))
1376 
1377 	if (!PM_GET_PM_SCAN(dip))
1378 		return (DDI_WALK_CONTINUE);
1379 	ASSERT(!PM_ISBC(dip));
1380 	pm_scan_stop(dip);
1381 	return (DDI_WALK_CONTINUE);
1382 }
1383 
1384 /*
1385  * Converts a power level value to its index
1386  */
1387 static int
1388 power_val_to_index(pm_component_t *cp, int val)
1389 {
1390 	int limit, i, *ip;
1391 
1392 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1393 	    val != PM_LEVEL_EXACT);
1394 	/*  convert power value into index (i) */
1395 	limit = cp->pmc_comp.pmc_numlevels;
1396 	ip = cp->pmc_comp.pmc_lvals;
1397 	for (i = 0; i < limit; i++)
1398 		if (val == *ip++)
1399 			return (i);
1400 	return (-1);
1401 }
1402 
1403 /*
1404  * Converts a numeric power level to a printable string
1405  */
1406 static char *
1407 power_val_to_string(pm_component_t *cp, int val)
1408 {
1409 	int index;
1410 
1411 	if (val == PM_LEVEL_UPONLY)
1412 		return ("<UPONLY>");
1413 
1414 	if (val == PM_LEVEL_UNKNOWN ||
1415 	    (index = power_val_to_index(cp, val)) == -1)
1416 		return ("<LEVEL_UNKNOWN>");
1417 
1418 	return (cp->pmc_comp.pmc_lnames[index]);
1419 }
1420 
1421 /*
1422  * Return true if this node has been claimed by a ppm.
1423  */
1424 static int
1425 pm_ppm_claimed(dev_info_t *dip)
1426 {
1427 	return (PPM(dip) != NULL);
1428 }
1429 
1430 /*
1431  * A node which was voluntarily power managed has just used up its "free cycle"
1432  * and need is volpmd field cleared, and the same done to all its descendents
1433  */
1434 static void
1435 pm_clear_volpm_dip(dev_info_t *dip)
1436 {
1437 	PMD_FUNC(pmf, "clear_volpm_dip")
1438 
1439 	if (dip == NULL)
1440 		return;
1441 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1442 	    PM_DEVICE(dip)))
1443 	DEVI(dip)->devi_pm_volpmd = 0;
1444 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1445 		pm_clear_volpm_dip(dip);
1446 	}
1447 }
1448 
1449 /*
1450  * A node which was voluntarily power managed has used up the "free cycles"
1451  * for the subtree that it is the root of.  Scan through the list of detached
1452  * nodes and adjust the counts of any that are descendents of the node.
1453  */
1454 static void
1455 pm_clear_volpm_list(dev_info_t *dip)
1456 {
1457 	PMD_FUNC(pmf, "clear_volpm_list")
1458 	char	*pathbuf;
1459 	size_t	len;
1460 	pm_noinvol_t *ip;
1461 
1462 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1463 	(void) ddi_pathname(dip, pathbuf);
1464 	len = strlen(pathbuf);
1465 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1466 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1467 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1468 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1469 		    ip->ni_path))
1470 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1471 		    ip->ni_path[len] == '/') {
1472 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1473 			    ip->ni_path))
1474 			ip->ni_volpmd = 0;
1475 			ip->ni_wasvolpmd = 0;
1476 		}
1477 	}
1478 	kmem_free(pathbuf, MAXPATHLEN);
1479 	rw_exit(&pm_noinvol_rwlock);
1480 }
1481 
1482 /*
1483  * Powers a device, suspending or resuming the driver if it is a backward
1484  * compatible device, calling into ppm to change power level.
1485  * Called with the component's power lock held.
1486  */
1487 static int
1488 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1489     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1490 {
1491 	PMD_FUNC(pmf, "power_dev")
1492 	power_req_t power_req;
1493 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1494 	int		resume_needed = 0;
1495 	int		suspended = 0;
1496 	int		result;
1497 #ifdef PMDDEBUG
1498 	struct pm_component *cp = PM_CP(dip, comp);
1499 #endif
1500 	int		bc = PM_ISBC(dip);
1501 	int pm_all_components_off(dev_info_t *);
1502 	int		clearvolpmd = 0;
1503 	char		pathbuf[MAXNAMELEN];
1504 #ifdef PMDDEBUG
1505 	char *ppmname, *ppmaddr;
1506 #endif
1507 	/*
1508 	 * If this is comp 0 of a backwards compat device and we are
1509 	 * going to take the power away, we need to detach it with
1510 	 * DDI_PM_SUSPEND command.
1511 	 */
1512 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1513 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1514 			/* We could not suspend before turning cmpt zero off */
1515 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1516 			    pmf, PM_DEVICE(dip)))
1517 			return (DDI_FAILURE);
1518 		} else {
1519 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1520 			suspended++;
1521 		}
1522 	}
1523 	power_req.request_type = PMR_PPM_SET_POWER;
1524 	power_req.req.ppm_set_power_req.who = dip;
1525 	power_req.req.ppm_set_power_req.cmpt = comp;
1526 	power_req.req.ppm_set_power_req.old_level = old_level;
1527 	power_req.req.ppm_set_power_req.new_level = level;
1528 	power_req.req.ppm_set_power_req.canblock = canblock;
1529 	power_req.req.ppm_set_power_req.cookie = NULL;
1530 #ifdef PMDDEBUG
1531 	if (pm_ppm_claimed(dip)) {
1532 		ppmname = PM_NAME(PPM(dip));
1533 		ppmaddr = PM_ADDR(PPM(dip));
1534 
1535 	} else {
1536 		ppmname = "noppm";
1537 		ppmaddr = "0";
1538 	}
1539 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1540 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1541 	    power_val_to_string(cp, old_level), old_level,
1542 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1543 #endif
1544 	/*
1545 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1546 	 * bc device comp 0 is powering on, then we count it as a power cycle
1547 	 * against its voluntary count.
1548 	 */
1549 	if (DEVI(dip)->devi_pm_volpmd &&
1550 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1551 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1552 		clearvolpmd = 1;
1553 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1554 	    &power_req, &result)) == DDI_SUCCESS) {
1555 		/*
1556 		 * Now do involuntary pm accounting;  If we've just cycled power
1557 		 * on a voluntarily pm'd node, and by inference on its entire
1558 		 * subtree, we need to set the subtree (including those nodes
1559 		 * already detached) volpmd counts to 0, and subtract out the
1560 		 * value of the current node's volpmd count from the ancestors
1561 		 */
1562 		if (clearvolpmd) {
1563 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1564 			pm_clear_volpm_dip(dip);
1565 			pm_clear_volpm_list(dip);
1566 			if (volpmd) {
1567 				(void) ddi_pathname(dip, pathbuf);
1568 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1569 				    volpmd, 0, pathbuf, dip);
1570 			}
1571 		}
1572 	} else {
1573 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1574 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1575 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1576 	}
1577 	/*
1578 	 * If some other devices were also powered up (e.g. other cpus in
1579 	 * the same domain) return a pointer to that list
1580 	 */
1581 	if (devlist) {
1582 		*devlist = (pm_ppm_devlist_t *)
1583 		    power_req.req.ppm_set_power_req.cookie;
1584 	}
1585 	/*
1586 	 * We will have to resume the device if the device is backwards compat
1587 	 * device and either of the following is true:
1588 	 * -This is comp 0 and we have successfully powered it up
1589 	 * -This is comp 0 and we have failed to power it down. Resume is
1590 	 *  needed because we have suspended it above
1591 	 */
1592 
1593 	if (bc && comp == 0) {
1594 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1595 		if (power_op_ret == DDI_SUCCESS) {
1596 			if (POWERING_ON(old_level, level)) {
1597 				/*
1598 				 * It must be either suspended or resumed
1599 				 * via pm_power_has_changed path
1600 				 */
1601 				ASSERT((DEVI(dip)->devi_pm_flags &
1602 				    PMC_SUSPENDED) ||
1603 				    (PM_CP(dip, comp)->pmc_flags &
1604 				    PM_PHC_WHILE_SET_POWER));
1605 
1606 					resume_needed = suspended;
1607 			}
1608 		} else {
1609 			if (POWERING_OFF(old_level, level)) {
1610 				/*
1611 				 * It must be either suspended or resumed
1612 				 * via pm_power_has_changed path
1613 				 */
1614 				ASSERT((DEVI(dip)->devi_pm_flags &
1615 				    PMC_SUSPENDED) ||
1616 				    (PM_CP(dip, comp)->pmc_flags &
1617 				    PM_PHC_WHILE_SET_POWER));
1618 
1619 					resume_needed = suspended;
1620 			}
1621 		}
1622 	}
1623 	if (resume_needed) {
1624 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1625 		/* ppm is not interested in DDI_PM_RESUME */
1626 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1627 		    DDI_SUCCESS) {
1628 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1629 		} else
1630 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1631 			    PM_DEVICE(dip));
1632 	}
1633 	return (power_op_ret);
1634 }
1635 
1636 /*
1637  * Return true if we are the owner or a borrower of the devi lock.  See
1638  * pm_lock_power_single() about borrowing the lock.
1639  */
1640 static int
1641 pm_devi_lock_held(dev_info_t *dip)
1642 {
1643 	lock_loan_t *cur;
1644 
1645 	if (DEVI_BUSY_OWNED(dip))
1646 		return (1);
1647 
1648 	/* return false if no locks borrowed */
1649 	if (lock_loan_head.pmlk_next == NULL)
1650 		return (0);
1651 
1652 	mutex_enter(&pm_loan_lock);
1653 	/* see if our thread is registered as a lock borrower. */
1654 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1655 		if (cur->pmlk_borrower == curthread)
1656 			break;
1657 	mutex_exit(&pm_loan_lock);
1658 
1659 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1660 }
1661 
1662 /*
1663  * pm_set_power: adjusts power level of device.	 Assumes device is power
1664  * manageable & component exists.
1665  *
1666  * Cases which require us to bring up devices we keep up ("wekeepups") for
1667  * backwards compatible devices:
1668  *	component 0 is off and we're bringing it up from 0
1669  *		bring up wekeepup first
1670  *	and recursively when component 0 is off and we bring some other
1671  *	component up from 0
1672  * For devices which are not backward compatible, our dependency notion is much
1673  * simpler.  Unless all components are off, then wekeeps must be on.
1674  * We don't treat component 0 differently.
1675  * Canblock tells how to deal with a direct pm'd device.
1676  * Scan arg tells us if we were called from scan, in which case we don't need
1677  * to go back to the root node and walk down to change power.
1678  */
1679 int
1680 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1681     pm_canblock_t canblock, int scan, int *retp)
1682 {
1683 	PMD_FUNC(pmf, "set_power")
1684 	char		*pathbuf;
1685 	pm_bp_child_pwrchg_t bpc;
1686 	pm_sp_misc_t	pspm;
1687 	int		ret = DDI_SUCCESS;
1688 	int		unused = DDI_SUCCESS;
1689 	dev_info_t	*pdip = ddi_get_parent(dip);
1690 
1691 #ifdef DEBUG
1692 	int		diverted = 0;
1693 
1694 	/*
1695 	 * This prevents operations on the console from calling prom_printf and
1696 	 * either deadlocking or bringing up the console because of debug
1697 	 * output
1698 	 */
1699 	if (dip == cfb_dip) {
1700 		diverted++;
1701 		mutex_enter(&pm_debug_lock);
1702 		pm_divertdebug++;
1703 		mutex_exit(&pm_debug_lock);
1704 	}
1705 #endif
1706 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1707 	    direction == PM_LEVEL_EXACT);
1708 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1709 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1710 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1711 	(void) ddi_pathname(dip, pathbuf);
1712 	bpc.bpc_dip = dip;
1713 	bpc.bpc_path = pathbuf;
1714 	bpc.bpc_comp = comp;
1715 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1716 	bpc.bpc_nlevel = level;
1717 	pspm.pspm_direction = direction;
1718 	pspm.pspm_errnop = retp;
1719 	pspm.pspm_canblock = canblock;
1720 	pspm.pspm_scan = scan;
1721 	bpc.bpc_private = &pspm;
1722 
1723 	/*
1724 	 * If a config operation is being done (we've locked the parent) or
1725 	 * we already hold the power lock (we've locked the node)
1726 	 * then we can operate directly on the node because we have already
1727 	 * brought up all the ancestors, otherwise, we have to go back to the
1728 	 * top of the tree.
1729 	 */
1730 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1731 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1732 		    (void *)&bpc, (void *)&unused);
1733 	else
1734 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1735 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1736 #ifdef DEBUG
1737 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1738 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1739 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1740 	}
1741 	if (diverted) {
1742 		mutex_enter(&pm_debug_lock);
1743 		pm_divertdebug--;
1744 		mutex_exit(&pm_debug_lock);
1745 	}
1746 #endif
1747 	kmem_free(pathbuf, MAXPATHLEN);
1748 	return (ret);
1749 }
1750 
1751 /*
1752  * If holddip is set, then if a dip is found we return with the node held.
1753  *
1754  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1755  * (resolve_pathname), but it does not drive attach.
1756  */
1757 dev_info_t *
1758 pm_name_to_dip(char *pathname, int holddip)
1759 {
1760 	struct pathname pn;
1761 	char		*component;
1762 	dev_info_t	*parent, *child;
1763 	int		circ;
1764 
1765 	if ((pathname == NULL) || (*pathname != '/'))
1766 		return (NULL);
1767 
1768 	/* setup pathname and allocate component */
1769 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1770 		return (NULL);
1771 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1772 
1773 	/* start at top, process '/' component */
1774 	parent = child = ddi_root_node();
1775 	ndi_hold_devi(parent);
1776 	pn_skipslash(&pn);
1777 	ASSERT(i_ddi_devi_attached(parent));
1778 
1779 	/* process components of pathname */
1780 	while (pn_pathleft(&pn)) {
1781 		(void) pn_getcomponent(&pn, component);
1782 
1783 		/* enter parent and search for component child */
1784 		ndi_devi_enter(parent, &circ);
1785 		child = ndi_devi_findchild(parent, component);
1786 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1787 			child = NULL;
1788 			ndi_devi_exit(parent, circ);
1789 			ndi_rele_devi(parent);
1790 			goto out;
1791 		}
1792 
1793 		/* attached child found, hold child and release parent */
1794 		ndi_hold_devi(child);
1795 		ndi_devi_exit(parent, circ);
1796 		ndi_rele_devi(parent);
1797 
1798 		/* child becomes parent, and process next component */
1799 		parent = child;
1800 		pn_skipslash(&pn);
1801 
1802 		/* loop with active ndi_devi_hold of child->parent */
1803 	}
1804 
1805 out:
1806 	pn_free(&pn);
1807 	kmem_free(component, MAXNAMELEN);
1808 
1809 	/* if we are not asked to return with hold, drop current hold */
1810 	if (child && !holddip)
1811 		ndi_rele_devi(child);
1812 	return (child);
1813 }
1814 
1815 /*
1816  * Search for a dependency and mark it unsatisfied
1817  */
1818 static void
1819 pm_unsatisfy(char *keeper, char *kept)
1820 {
1821 	PMD_FUNC(pmf, "unsatisfy")
1822 	pm_pdr_t *dp;
1823 
1824 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1825 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1826 		if (!dp->pdr_isprop) {
1827 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1828 			    (dp->pdr_kept_count > 0) &&
1829 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1830 				if (dp->pdr_satisfied) {
1831 					dp->pdr_satisfied = 0;
1832 					pm_unresolved_deps++;
1833 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1834 					    "pm_unresolved_deps now %d\n", pmf,
1835 					    pm_unresolved_deps))
1836 				}
1837 			}
1838 		}
1839 	}
1840 }
1841 
1842 /*
1843  * Device dip is being un power managed, it keeps up count other devices.
1844  * We need to release any hold we have on the kept devices, and also
1845  * mark the dependency no longer satisfied.
1846  */
1847 static void
1848 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1849 {
1850 	PMD_FUNC(pmf, "unkeeps")
1851 	int i, j;
1852 	dev_info_t *kept;
1853 	dev_info_t *dip;
1854 	struct pm_component *cp;
1855 	int keeper_on = 0, circ;
1856 
1857 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1858 	    keeper, (void *)keptpaths))
1859 	/*
1860 	 * Try to grab keeper. Keeper may have gone away by now,
1861 	 * in this case, used the passed in value pwr
1862 	 */
1863 	dip = pm_name_to_dip(keeper, 1);
1864 	for (i = 0; i < count; i++) {
1865 		/* Release power hold */
1866 		kept = pm_name_to_dip(keptpaths[i], 1);
1867 		if (kept) {
1868 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1869 			    PM_DEVICE(kept), i))
1870 			/*
1871 			 * We need to check if we skipped a bringup here
1872 			 * because we could have failed the bringup
1873 			 * (ie DIRECT PM device) and have
1874 			 * not increment the count.
1875 			 */
1876 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1877 				keeper_on = 0;
1878 				PM_LOCK_POWER(dip, &circ);
1879 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1880 					cp = &DEVI(dip)->devi_pm_components[j];
1881 					if (cur_power(cp)) {
1882 						keeper_on++;
1883 						break;
1884 					}
1885 				}
1886 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1887 					pm_rele_power(kept);
1888 					DEVI(kept)->devi_pm_flags
1889 					    &= ~PMC_SKIP_BRINGUP;
1890 				}
1891 				PM_UNLOCK_POWER(dip, circ);
1892 			} else if (pwr) {
1893 				if (PM_SKBU(kept) == 0) {
1894 					pm_rele_power(kept);
1895 					DEVI(kept)->devi_pm_flags
1896 					    &= ~PMC_SKIP_BRINGUP;
1897 				}
1898 			}
1899 			ddi_release_devi(kept);
1900 		}
1901 		/*
1902 		 * mark this dependency not satisfied
1903 		 */
1904 		pm_unsatisfy(keeper, keptpaths[i]);
1905 	}
1906 	if (dip)
1907 		ddi_release_devi(dip);
1908 }
1909 
1910 /*
1911  * Device kept is being un power managed, it is kept up by keeper.
1912  * We need to mark the dependency no longer satisfied.
1913  */
1914 static void
1915 pm_unkepts(char *kept, char *keeper)
1916 {
1917 	PMD_FUNC(pmf, "unkepts")
1918 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1919 	ASSERT(keeper != NULL);
1920 	/*
1921 	 * mark this dependency not satisfied
1922 	 */
1923 	pm_unsatisfy(keeper, kept);
1924 }
1925 
1926 /*
1927  * Removes dependency information and hold on the kepts, if the path is a
1928  * path of a keeper.
1929  */
1930 static void
1931 pm_free_keeper(char *path, int pwr)
1932 {
1933 	pm_pdr_t *dp;
1934 	int i;
1935 	size_t length;
1936 
1937 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1938 		if (strcmp(dp->pdr_keeper, path) != 0)
1939 			continue;
1940 		/*
1941 		 * Remove all our kept holds and the dependency records,
1942 		 * then free up the kept lists.
1943 		 */
1944 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1945 		if (dp->pdr_kept_count)  {
1946 			for (i = 0; i < dp->pdr_kept_count; i++) {
1947 				length = strlen(dp->pdr_kept_paths[i]);
1948 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1949 			}
1950 			kmem_free(dp->pdr_kept_paths,
1951 			    dp->pdr_kept_count * sizeof (char **));
1952 			dp->pdr_kept_paths = NULL;
1953 			dp->pdr_kept_count = 0;
1954 		}
1955 	}
1956 }
1957 
1958 /*
1959  * Removes the device represented by path from the list of kepts, if the
1960  * path is a path of a kept
1961  */
1962 static void
1963 pm_free_kept(char *path)
1964 {
1965 	pm_pdr_t *dp;
1966 	int i;
1967 	int j, count;
1968 	size_t length;
1969 	char **paths;
1970 
1971 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1972 		if (dp->pdr_kept_count == 0)
1973 			continue;
1974 		count = dp->pdr_kept_count;
1975 		/* Remove this device from the kept path lists */
1976 		for (i = 0; i < count; i++) {
1977 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1978 				pm_unkepts(path, dp->pdr_keeper);
1979 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1980 				kmem_free(dp->pdr_kept_paths[i], length);
1981 				dp->pdr_kept_paths[i] = NULL;
1982 				dp->pdr_kept_count--;
1983 			}
1984 		}
1985 		/* Compact the kept paths array */
1986 		if (dp->pdr_kept_count) {
1987 			length = dp->pdr_kept_count * sizeof (char **);
1988 			paths = kmem_zalloc(length, KM_SLEEP);
1989 			j = 0;
1990 			for (i = 0; i < count; i++) {
1991 				if (dp->pdr_kept_paths[i] != NULL) {
1992 					paths[j] = dp->pdr_kept_paths[i];
1993 					j++;
1994 				}
1995 			}
1996 			ASSERT(j == dp->pdr_kept_count);
1997 		}
1998 		/* Now free the old array and point to the new one */
1999 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
2000 		if (dp->pdr_kept_count)
2001 			dp->pdr_kept_paths = paths;
2002 		else
2003 			dp->pdr_kept_paths = NULL;
2004 	}
2005 }
2006 
2007 /*
2008  * Free the dependency information for a device.
2009  */
2010 void
2011 pm_free_keeps(char *path, int pwr)
2012 {
2013 	PMD_FUNC(pmf, "free_keeps")
2014 
2015 #ifdef DEBUG
2016 	int doprdeps = 0;
2017 	void prdeps(char *);
2018 
2019 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2020 	if (pm_debug & PMD_KEEPS) {
2021 		doprdeps = 1;
2022 		prdeps("pm_free_keeps before");
2023 	}
2024 #endif
2025 	/*
2026 	 * First assume we are a keeper and remove all our kepts.
2027 	 */
2028 	pm_free_keeper(path, pwr);
2029 	/*
2030 	 * Now assume we a kept device, and remove all our records.
2031 	 */
2032 	pm_free_kept(path);
2033 #ifdef	DEBUG
2034 	if (doprdeps) {
2035 		prdeps("pm_free_keeps after");
2036 	}
2037 #endif
2038 }
2039 
2040 static int
2041 pm_is_kept(char *path)
2042 {
2043 	pm_pdr_t *dp;
2044 	int i;
2045 
2046 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2047 		if (dp->pdr_kept_count == 0)
2048 			continue;
2049 		for (i = 0; i < dp->pdr_kept_count; i++) {
2050 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2051 				return (1);
2052 		}
2053 	}
2054 	return (0);
2055 }
2056 
2057 static void
2058 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2059 {
2060 	PMD_FUNC(pmf, "hold_rele_power")
2061 	int circ;
2062 
2063 	if ((dip == NULL) ||
2064 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2065 		return;
2066 
2067 	PM_LOCK_POWER(dip, &circ);
2068 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2069 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2070 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2071 
2072 	PM_KUC(dip) += cnt;
2073 
2074 	ASSERT(PM_KUC(dip) >= 0);
2075 	PM_UNLOCK_POWER(dip, circ);
2076 
2077 	if (cnt < 0 && PM_KUC(dip) == 0)
2078 		pm_rescan(dip);
2079 }
2080 
2081 #define	MAX_PPM_HANDLERS	4
2082 
2083 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2084 
2085 struct	ppm_callbacks {
2086 	int (*ppmc_func)(dev_info_t *);
2087 	dev_info_t	*ppmc_dip;
2088 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2089 
2090 
2091 /*
2092  * This routine calls into all the registered ppms to notify them
2093  * that either all components of power-managed devices are at their
2094  * lowest levels or no longer all are at their lowest levels.
2095  */
2096 static void
2097 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2098 {
2099 	struct ppm_callbacks *ppmcp;
2100 	power_req_t power_req;
2101 	int result = 0;
2102 
2103 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2104 	power_req.req.ppm_all_lowest_req.mode = mode;
2105 	mutex_enter(&ppm_lock);
2106 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2107 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2108 		    DDI_CTLOPS_POWER, &power_req, &result);
2109 	mutex_exit(&ppm_lock);
2110 	if (mode == PM_ALL_LOWEST) {
2111 		if (autoS3_enabled) {
2112 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2113 			    "autos3\n"))
2114 			mutex_enter(&srn_clone_lock);
2115 			if (srn_signal) {
2116 				srn_inuse++;
2117 				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2118 				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2119 				srn_inuse--;
2120 			} else {
2121 				PMD(PMD_SX, ("srn_signal NULL\n"))
2122 			}
2123 			mutex_exit(&srn_clone_lock);
2124 		} else {
2125 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2126 			    "disabled\n"));
2127 		}
2128 	}
2129 }
2130 
2131 static void
2132 pm_set_pm_info(dev_info_t *dip, void *value)
2133 {
2134 	DEVI(dip)->devi_pm_info = value;
2135 }
2136 
2137 pm_rsvp_t *pm_blocked_list;
2138 
2139 /*
2140  * Look up an entry in the blocked list by dip and component
2141  */
2142 static pm_rsvp_t *
2143 pm_rsvp_lookup(dev_info_t *dip, int comp)
2144 {
2145 	pm_rsvp_t *p;
2146 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2147 	for (p = pm_blocked_list; p; p = p->pr_next)
2148 		if (p->pr_dip == dip && p->pr_comp == comp) {
2149 			return (p);
2150 		}
2151 	return (NULL);
2152 }
2153 
2154 /*
2155  * Called when a device which is direct power managed (or the parent or
2156  * dependent of such a device) changes power, or when a pm clone is closed
2157  * that was direct power managing a device.  This call results in pm_blocked()
2158  * (below) returning.
2159  */
2160 void
2161 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2162 {
2163 	PMD_FUNC(pmf, "proceed")
2164 	pm_rsvp_t *found = NULL;
2165 	pm_rsvp_t *p;
2166 
2167 	mutex_enter(&pm_rsvp_lock);
2168 	switch (cmd) {
2169 	/*
2170 	 * we're giving up control, let any pending op continue
2171 	 */
2172 	case PMP_RELEASE:
2173 		for (p = pm_blocked_list; p; p = p->pr_next) {
2174 			if (dip == p->pr_dip) {
2175 				p->pr_retval = PMP_RELEASE;
2176 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2177 				    pmf, PM_DEVICE(dip)))
2178 				cv_signal(&p->pr_cv);
2179 			}
2180 		}
2181 		break;
2182 
2183 	/*
2184 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2185 	 * succeed and a non-matching request for the same device fail
2186 	 */
2187 	case PMP_SETPOWER:
2188 		found = pm_rsvp_lookup(dip, comp);
2189 		if (!found)	/* if driver not waiting */
2190 			break;
2191 		/*
2192 		 * This cannot be pm_lower_power, since that can only happen
2193 		 * during detach or probe
2194 		 */
2195 		if (found->pr_newlevel <= newlevel) {
2196 			found->pr_retval = PMP_SUCCEED;
2197 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2198 			    PM_DEVICE(dip)))
2199 		} else {
2200 			found->pr_retval = PMP_FAIL;
2201 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2202 			    PM_DEVICE(dip)))
2203 		}
2204 		cv_signal(&found->pr_cv);
2205 		break;
2206 
2207 	default:
2208 		panic("pm_proceed unknown cmd %d", cmd);
2209 	}
2210 	mutex_exit(&pm_rsvp_lock);
2211 }
2212 
2213 /*
2214  * This routine dispatches new work to the dependency thread. Caller must
2215  * be prepared to block for memory if necessary.
2216  */
2217 void
2218 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2219     int *res, int cached_pwr)
2220 {
2221 	pm_dep_wk_t	*new_work;
2222 
2223 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2224 	new_work->pdw_type = cmd;
2225 	new_work->pdw_wait = wait;
2226 	new_work->pdw_done = 0;
2227 	new_work->pdw_ret = 0;
2228 	new_work->pdw_pwr = cached_pwr;
2229 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2230 	if (keeper != NULL) {
2231 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2232 		    KM_SLEEP);
2233 		(void) strcpy(new_work->pdw_keeper, keeper);
2234 	}
2235 	if (kept != NULL) {
2236 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2237 		(void) strcpy(new_work->pdw_kept, kept);
2238 	}
2239 	mutex_enter(&pm_dep_thread_lock);
2240 	if (pm_dep_thread_workq == NULL) {
2241 		pm_dep_thread_workq = new_work;
2242 		pm_dep_thread_tail = new_work;
2243 		new_work->pdw_next = NULL;
2244 	} else {
2245 		pm_dep_thread_tail->pdw_next = new_work;
2246 		pm_dep_thread_tail = new_work;
2247 		new_work->pdw_next = NULL;
2248 	}
2249 	cv_signal(&pm_dep_thread_cv);
2250 	/* If caller asked for it, wait till it is done. */
2251 	if (wait)  {
2252 		while (!new_work->pdw_done)
2253 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2254 		/*
2255 		 * Pass return status, if any, back.
2256 		 */
2257 		if (res != NULL)
2258 			*res = new_work->pdw_ret;
2259 		/*
2260 		 * If we asked to wait, it is our job to free the request
2261 		 * structure.
2262 		 */
2263 		if (new_work->pdw_keeper)
2264 			kmem_free(new_work->pdw_keeper,
2265 			    strlen(new_work->pdw_keeper) + 1);
2266 		if (new_work->pdw_kept)
2267 			kmem_free(new_work->pdw_kept,
2268 			    strlen(new_work->pdw_kept) + 1);
2269 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2270 	}
2271 	mutex_exit(&pm_dep_thread_lock);
2272 }
2273 
2274 /*
2275  * Release the pm resource for this device.
2276  */
2277 void
2278 pm_rem_info(dev_info_t *dip)
2279 {
2280 	PMD_FUNC(pmf, "rem_info")
2281 	int		i, count = 0;
2282 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2283 	dev_info_t	*pdip = ddi_get_parent(dip);
2284 	char		*pathbuf;
2285 	int		work_type = PM_DEP_WK_DETACH;
2286 
2287 	ASSERT(info);
2288 
2289 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2290 	if (PM_ISDIRECT(dip)) {
2291 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2292 		ASSERT(info->pmi_clone);
2293 		info->pmi_clone = 0;
2294 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2295 	}
2296 	ASSERT(!PM_GET_PM_SCAN(dip));
2297 
2298 	/*
2299 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2300 	 * Others we check all components.  BC node that has already
2301 	 * called pm_destroy_components() has zero component count.
2302 	 * Parents that get notification are not adjusted because their
2303 	 * kidsupcnt is always 0 (or 1 during configuration).
2304 	 */
2305 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2306 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2307 
2308 	/* node is detached, so we can examine power without locking */
2309 	if (PM_ISBC(dip)) {
2310 		count = (PM_CURPOWER(dip, 0) != 0);
2311 	} else {
2312 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2313 			count += (PM_CURPOWER(dip, i) != 0);
2314 	}
2315 
2316 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2317 		e_pm_hold_rele_power(pdip, -count);
2318 
2319 	/* Schedule a request to clean up dependency records */
2320 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2321 	(void) ddi_pathname(dip, pathbuf);
2322 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2323 	    PM_DEP_NOWAIT, NULL, (count > 0));
2324 	kmem_free(pathbuf, MAXPATHLEN);
2325 
2326 	/*
2327 	 * Adjust the pm_comps_notlowest count since this device is
2328 	 * not being power-managed anymore.
2329 	 */
2330 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2331 		if (PM_CURPOWER(dip, i) != 0)
2332 			PM_DECR_NOTLOWEST(dip);
2333 	}
2334 	/*
2335 	 * Once we clear the info pointer, it looks like it is not power
2336 	 * managed to everybody else.
2337 	 */
2338 	pm_set_pm_info(dip, NULL);
2339 	kmem_free(info, sizeof (pm_info_t));
2340 }
2341 
2342 int
2343 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2344 {
2345 	int components = PM_NUMCMPTS(dip);
2346 	int *bufp;
2347 	size_t size;
2348 	int i;
2349 
2350 	if (components <= 0) {
2351 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2352 		    "can't get normal power values\n", PM_DEVICE(dip));
2353 		return (DDI_FAILURE);
2354 	} else {
2355 		size = components * sizeof (int);
2356 		bufp = kmem_alloc(size, KM_SLEEP);
2357 		for (i = 0; i < components; i++) {
2358 			bufp[i] = pm_get_normal_power(dip, i);
2359 		}
2360 	}
2361 	*length = size;
2362 	*valuep = bufp;
2363 	return (DDI_SUCCESS);
2364 }
2365 
2366 static int
2367 pm_reset_timestamps(dev_info_t *dip, void *arg)
2368 {
2369 	_NOTE(ARGUNUSED(arg))
2370 
2371 	int components;
2372 	int	i;
2373 
2374 	if (!PM_GET_PM_INFO(dip))
2375 		return (DDI_WALK_CONTINUE);
2376 	components = PM_NUMCMPTS(dip);
2377 	ASSERT(components > 0);
2378 	PM_LOCK_BUSY(dip);
2379 	for (i = 0; i < components; i++) {
2380 		struct pm_component *cp;
2381 		/*
2382 		 * If the component was not marked as busy,
2383 		 * reset its timestamp to now.
2384 		 */
2385 		cp = PM_CP(dip, i);
2386 		if (cp->pmc_timestamp)
2387 			cp->pmc_timestamp = gethrestime_sec();
2388 	}
2389 	PM_UNLOCK_BUSY(dip);
2390 	return (DDI_WALK_CONTINUE);
2391 }
2392 
2393 /*
2394  * Convert a power level to an index into the levels array (or
2395  * just PM_LEVEL_UNKNOWN in that special case).
2396  */
2397 static int
2398 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2399 {
2400 	PMD_FUNC(pmf, "level_to_index")
2401 	int i;
2402 	int limit = cp->pmc_comp.pmc_numlevels;
2403 	int *ip = cp->pmc_comp.pmc_lvals;
2404 
2405 	if (level == PM_LEVEL_UNKNOWN)
2406 		return (level);
2407 
2408 	for (i = 0; i < limit; i++) {
2409 		if (level == *ip++) {
2410 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2411 			    pmf, PM_DEVICE(dip),
2412 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2413 			return (i);
2414 		}
2415 	}
2416 	panic("pm_level_to_index: level %d not found for device "
2417 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2418 	/*NOTREACHED*/
2419 }
2420 
2421 /*
2422  * Internal function to set current power level
2423  */
2424 static void
2425 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2426 {
2427 	PMD_FUNC(pmf, "set_cur_pwr")
2428 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2429 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2430 
2431 	/*
2432 	 * Nothing to adjust if current & new levels are the same.
2433 	 */
2434 	if (curpwr != PM_LEVEL_UNKNOWN &&
2435 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2436 		return;
2437 
2438 	/*
2439 	 * Keep the count for comps doing transition to/from lowest
2440 	 * level.
2441 	 */
2442 	if (curpwr == 0) {
2443 		PM_INCR_NOTLOWEST(dip);
2444 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2445 		PM_DECR_NOTLOWEST(dip);
2446 	}
2447 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2448 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2449 }
2450 
2451 /*
2452  * This is the default method of setting the power of a device if no ppm
2453  * driver has claimed it.
2454  */
2455 int
2456 pm_power(dev_info_t *dip, int comp, int level)
2457 {
2458 	PMD_FUNC(pmf, "power")
2459 	struct dev_ops	*ops;
2460 	int		(*fn)(dev_info_t *, int, int);
2461 	struct pm_component *cp = PM_CP(dip, comp);
2462 	int retval;
2463 	pm_info_t *info = PM_GET_PM_INFO(dip);
2464 	static int pm_phc_impl(dev_info_t *, int, int, int);
2465 
2466 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2467 	    PM_DEVICE(dip), comp, level))
2468 	if (!(ops = ddi_get_driver(dip))) {
2469 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2470 		    PM_DEVICE(dip)))
2471 		return (DDI_FAILURE);
2472 	}
2473 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2474 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2475 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2476 		    (!fn ? " devo_power NULL" : "")))
2477 		return (DDI_FAILURE);
2478 	}
2479 	cp->pmc_flags |= PM_POWER_OP;
2480 	retval = (*fn)(dip, comp, level);
2481 	cp->pmc_flags &= ~PM_POWER_OP;
2482 	if (retval == DDI_SUCCESS) {
2483 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2484 		return (DDI_SUCCESS);
2485 	}
2486 
2487 	/*
2488 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2489 	 * updated only the power level of the component.  If our attempt to
2490 	 * set the device new to a power level above has failed we sync the
2491 	 * total power state via phc code now.
2492 	 */
2493 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2494 		int phc_lvl =
2495 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2496 
2497 		ASSERT(info);
2498 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2499 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2500 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2501 	}
2502 
2503 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2504 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2505 	    level, power_val_to_string(cp, level)));
2506 	return (DDI_FAILURE);
2507 }
2508 
2509 int
2510 pm_unmanage(dev_info_t *dip)
2511 {
2512 	PMD_FUNC(pmf, "unmanage")
2513 	power_req_t power_req;
2514 	int result, retval = 0;
2515 
2516 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2517 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2518 	    PM_DEVICE(dip)))
2519 	power_req.request_type = PMR_PPM_UNMANAGE;
2520 	power_req.req.ppm_config_req.who = dip;
2521 	if (pm_ppm_claimed(dip))
2522 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2523 		    &power_req, &result);
2524 #ifdef DEBUG
2525 	else
2526 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2527 		    &power_req, &result);
2528 #endif
2529 	ASSERT(retval == DDI_SUCCESS);
2530 	pm_rem_info(dip);
2531 	return (retval);
2532 }
2533 
2534 int
2535 pm_raise_power(dev_info_t *dip, int comp, int level)
2536 {
2537 	if (level < 0)
2538 		return (DDI_FAILURE);
2539 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2540 	    !e_pm_valid_power(dip, comp, level))
2541 		return (DDI_FAILURE);
2542 
2543 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2544 }
2545 
2546 int
2547 pm_lower_power(dev_info_t *dip, int comp, int level)
2548 {
2549 	PMD_FUNC(pmf, "pm_lower_power")
2550 
2551 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2552 	    !e_pm_valid_power(dip, comp, level)) {
2553 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2554 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2555 		return (DDI_FAILURE);
2556 	}
2557 
2558 	if (!DEVI_IS_DETACHING(dip)) {
2559 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2560 		    pmf, PM_DEVICE(dip)))
2561 		return (DDI_FAILURE);
2562 	}
2563 
2564 	/*
2565 	 * If we don't care about saving power, or we're treating this node
2566 	 * specially, then this is a no-op
2567 	 */
2568 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2569 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2570 		    pmf, PM_DEVICE(dip),
2571 		    !autopm_enabled ? "!autopm_enabled " : "",
2572 		    !PM_CPUPM_ENABLED ? "!cpupm_enabled " : "",
2573 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2574 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2575 		return (DDI_SUCCESS);
2576 	}
2577 
2578 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2579 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2580 		    PM_DEVICE(dip)))
2581 		return (DDI_FAILURE);
2582 	}
2583 	return (DDI_SUCCESS);
2584 }
2585 
2586 /*
2587  * Find the entries struct for a given dip in the blocked list, return it locked
2588  */
2589 static psce_t *
2590 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2591 {
2592 	pscc_t *p;
2593 	psce_t *psce;
2594 
2595 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2596 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2597 		if (p->pscc_dip == dip) {
2598 			*psccp = p;
2599 			psce = p->pscc_entries;
2600 			mutex_enter(&psce->psce_lock);
2601 			ASSERT(psce);
2602 			rw_exit(&pm_pscc_direct_rwlock);
2603 			return (psce);
2604 		}
2605 	}
2606 	rw_exit(&pm_pscc_direct_rwlock);
2607 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2608 	/*NOTREACHED*/
2609 }
2610 
2611 /*
2612  * Write an entry indicating a power level change (to be passed to a process
2613  * later) in the given psce.
2614  * If we were called in the path that brings up the console fb in the
2615  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2616  * we create a record that has a size of -1, a physaddr of NULL, and that
2617  * has the overflow flag set.
2618  */
2619 static int
2620 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2621     int old, int which, pm_canblock_t canblock)
2622 {
2623 	char	buf[MAXNAMELEN];
2624 	pm_state_change_t *p;
2625 	size_t	size;
2626 	caddr_t physpath = NULL;
2627 	int	overrun = 0;
2628 
2629 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2630 	(void) ddi_pathname(dip, buf);
2631 	size = strlen(buf) + 1;
2632 	p = psce->psce_in;
2633 	if (canblock == PM_CANBLOCK_BYPASS) {
2634 		physpath = kmem_alloc(size, KM_NOSLEEP);
2635 		if (physpath == NULL) {
2636 			/*
2637 			 * mark current entry as overrun
2638 			 */
2639 			p->flags |= PSC_EVENT_LOST;
2640 			size = (size_t)-1;
2641 		}
2642 	} else
2643 		physpath = kmem_alloc(size, KM_SLEEP);
2644 	if (p->size) {	/* overflow; mark the next entry */
2645 		if (p->size != (size_t)-1)
2646 			kmem_free(p->physpath, p->size);
2647 		ASSERT(psce->psce_out == p);
2648 		if (p == psce->psce_last) {
2649 			psce->psce_first->flags |= PSC_EVENT_LOST;
2650 			psce->psce_out = psce->psce_first;
2651 		} else {
2652 			(p + 1)->flags |= PSC_EVENT_LOST;
2653 			psce->psce_out = (p + 1);
2654 		}
2655 		overrun++;
2656 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2657 		p->flags |= PSC_EVENT_LOST;
2658 		p->size = 0;
2659 		p->physpath = NULL;
2660 	}
2661 	if (which == PSC_INTEREST) {
2662 		mutex_enter(&pm_compcnt_lock);
2663 		if (pm_comps_notlowest == 0)
2664 			p->flags |= PSC_ALL_LOWEST;
2665 		else
2666 			p->flags &= ~PSC_ALL_LOWEST;
2667 		mutex_exit(&pm_compcnt_lock);
2668 	}
2669 	p->event = event;
2670 	p->timestamp = gethrestime_sec();
2671 	p->component = comp;
2672 	p->old_level = old;
2673 	p->new_level = new;
2674 	p->physpath = physpath;
2675 	p->size = size;
2676 	if (physpath != NULL)
2677 		(void) strcpy(p->physpath, buf);
2678 	if (p == psce->psce_last)
2679 		psce->psce_in = psce->psce_first;
2680 	else
2681 		psce->psce_in = ++p;
2682 	mutex_exit(&psce->psce_lock);
2683 	return (overrun);
2684 }
2685 
2686 /*
2687  * Find the next entry on the interest list.  We keep a pointer to the item we
2688  * last returned in the user's cooke.  Returns a locked entries struct.
2689  */
2690 static psce_t *
2691 psc_interest(void **cookie, pscc_t **psccp)
2692 {
2693 	pscc_t *pscc;
2694 	pscc_t **cookiep = (pscc_t **)cookie;
2695 
2696 	if (*cookiep == NULL)
2697 		pscc = pm_pscc_interest;
2698 	else
2699 		pscc = (*cookiep)->pscc_next;
2700 	if (pscc) {
2701 		*cookiep = pscc;
2702 		*psccp = pscc;
2703 		mutex_enter(&pscc->pscc_entries->psce_lock);
2704 		return (pscc->pscc_entries);
2705 	} else {
2706 		return (NULL);
2707 	}
2708 }
2709 
2710 /*
2711  * Create an entry for a process to pick up indicating a power level change.
2712  */
2713 static void
2714 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2715     int newlevel, int oldlevel, pm_canblock_t canblock)
2716 {
2717 	PMD_FUNC(pmf, "enqueue_notify")
2718 	pscc_t	*pscc;
2719 	psce_t	*psce;
2720 	void		*cookie = NULL;
2721 	int	overrun;
2722 
2723 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2724 	switch (cmd) {
2725 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2726 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2727 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2728 		psce = pm_psc_dip_to_direct(dip, &pscc);
2729 		ASSERT(psce);
2730 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2731 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2732 		    pm_poll_cnt[pscc->pscc_clone]))
2733 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2734 		    PSC_DIRECT, canblock);
2735 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2736 		mutex_enter(&pm_clone_lock);
2737 		if (!overrun)
2738 			pm_poll_cnt[pscc->pscc_clone]++;
2739 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2740 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2741 		mutex_exit(&pm_clone_lock);
2742 		break;
2743 	case PSC_HAS_CHANGED:
2744 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2745 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2746 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2747 			psce = pm_psc_dip_to_direct(dip, &pscc);
2748 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2749 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2750 			    pm_poll_cnt[pscc->pscc_clone]))
2751 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2752 			    oldlevel, PSC_DIRECT, canblock);
2753 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2754 			mutex_enter(&pm_clone_lock);
2755 			if (!overrun)
2756 				pm_poll_cnt[pscc->pscc_clone]++;
2757 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2758 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2759 			mutex_exit(&pm_clone_lock);
2760 		}
2761 		mutex_enter(&pm_clone_lock);
2762 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2763 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2764 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2765 			    oldlevel, PSC_INTEREST, canblock);
2766 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2767 		}
2768 		rw_exit(&pm_pscc_interest_rwlock);
2769 		mutex_exit(&pm_clone_lock);
2770 		break;
2771 #ifdef DEBUG
2772 	default:
2773 		ASSERT(0);
2774 #endif
2775 	}
2776 }
2777 
2778 static void
2779 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2780 {
2781 	if (listp) {
2782 		pm_ppm_devlist_t *p, *next = NULL;
2783 
2784 		for (p = *listp; p; p = next) {
2785 			next = p->ppd_next;
2786 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2787 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2788 			    canblock);
2789 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2790 		}
2791 		*listp = NULL;
2792 	}
2793 }
2794 
2795 /*
2796  * Try to get the power locks of the parent node and target (child)
2797  * node.  Return true if successful (with both locks held) or false
2798  * (with no locks held).
2799  */
2800 static int
2801 pm_try_parent_child_locks(dev_info_t *pdip,
2802     dev_info_t *dip, int *pcircp, int *circp)
2803 {
2804 	if (ndi_devi_tryenter(pdip, pcircp))
2805 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2806 			return (1);
2807 		} else {
2808 			ndi_devi_exit(pdip, *pcircp);
2809 		}
2810 	return (0);
2811 }
2812 
2813 /*
2814  * Determine if the power lock owner is blocked by current thread.
2815  * returns :
2816  * 	1 - If the thread owning the effective power lock (the first lock on
2817  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2818  *          a mutex held by the current thread.
2819  *
2820  *	0 - otherwise
2821  *
2822  * Note : This function is called by pm_power_has_changed to determine whether
2823  * it is executing in parallel with pm_set_power.
2824  */
2825 static int
2826 pm_blocked_by_us(dev_info_t *dip)
2827 {
2828 	power_req_t power_req;
2829 	kthread_t *owner;
2830 	int result;
2831 	kmutex_t *mp;
2832 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2833 
2834 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2835 	power_req.req.ppm_power_lock_owner_req.who = dip;
2836 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2837 	    DDI_SUCCESS) {
2838 		/*
2839 		 * It is assumed that if the device is claimed by ppm, ppm
2840 		 * will always implement this request type and it'll always
2841 		 * return success. We panic here, if it fails.
2842 		 */
2843 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2844 		    PM_DEVICE(dip));
2845 		/*NOTREACHED*/
2846 	}
2847 
2848 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2849 	    owner->t_state == TS_SLEEP &&
2850 	    owner->t_sobj_ops &&
2851 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2852 	    (mp = (kmutex_t *)owner->t_wchan) &&
2853 	    mutex_owner(mp) == curthread)
2854 		return (1);
2855 
2856 	return (0);
2857 }
2858 
2859 /*
2860  * Notify parent which wants to hear about a child's power changes.
2861  */
2862 static void
2863 pm_notify_parent(dev_info_t *dip,
2864     dev_info_t *pdip, int comp, int old_level, int level)
2865 {
2866 	pm_bp_has_changed_t bphc;
2867 	pm_sp_misc_t pspm;
2868 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2869 	int result = DDI_SUCCESS;
2870 
2871 	bphc.bphc_dip = dip;
2872 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2873 	bphc.bphc_comp = comp;
2874 	bphc.bphc_olevel = old_level;
2875 	bphc.bphc_nlevel = level;
2876 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2877 	pspm.pspm_scan = 0;
2878 	bphc.bphc_private = &pspm;
2879 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2880 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2881 	kmem_free(pathbuf, MAXPATHLEN);
2882 }
2883 
2884 /*
2885  * Check if we need to resume a BC device, and make the attach call as required.
2886  */
2887 static int
2888 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2889 {
2890 	int ret = DDI_SUCCESS;
2891 
2892 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2893 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2894 		/* ppm is not interested in DDI_PM_RESUME */
2895 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2896 			/* XXX Should we mark it resumed, */
2897 			/* even though it failed? */
2898 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2899 			    PM_NAME(dip), PM_ADDR(dip));
2900 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2901 	}
2902 
2903 	return (ret);
2904 }
2905 
2906 /*
2907  * Tests outside the lock to see if we should bother to enqueue an entry
2908  * for any watching process.  If yes, then caller will take the lock and
2909  * do the full protocol
2910  */
2911 static int
2912 pm_watchers()
2913 {
2914 	if (pm_processes_stopped)
2915 		return (0);
2916 	return (pm_pscc_direct || pm_pscc_interest);
2917 }
2918 
2919 /*
2920  * A driver is reporting that the power of one of its device's components
2921  * has changed.  Update the power state accordingly.
2922  */
2923 int
2924 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2925 {
2926 	PMD_FUNC(pmf, "pm_power_has_changed")
2927 	int ret;
2928 	dev_info_t *pdip = ddi_get_parent(dip);
2929 	struct pm_component *cp;
2930 	int blocked, circ, pcirc, old_level;
2931 	static int pm_phc_impl(dev_info_t *, int, int, int);
2932 
2933 	if (level < 0) {
2934 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2935 		    PM_DEVICE(dip), level))
2936 		return (DDI_FAILURE);
2937 	}
2938 
2939 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2940 	    PM_DEVICE(dip), comp, level))
2941 
2942 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2943 	    !e_pm_valid_power(dip, comp, level))
2944 		return (DDI_FAILURE);
2945 
2946 	/*
2947 	 * A driver thread calling pm_power_has_changed and another thread
2948 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2949 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2950 	 * this specific deadlock.  If we can't get the lock immediately
2951 	 * and we are deadlocked, just update the component's level, do
2952 	 * notifications, and return.  We intend to update the total power
2953 	 * state later (if the other thread fails to set power to the
2954 	 * desired level).  If we were called because of a power change on a
2955 	 * component that isn't involved in a set_power op, update all state
2956 	 * immediately.
2957 	 */
2958 	cp = PM_CP(dip, comp);
2959 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2960 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2961 		    (cp->pmc_flags & PM_POWER_OP)) {
2962 			if (pm_watchers()) {
2963 				mutex_enter(&pm_rsvp_lock);
2964 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2965 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2966 				mutex_exit(&pm_rsvp_lock);
2967 			}
2968 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2969 				pm_notify_parent(dip,
2970 				    pdip, comp, cur_power(cp), level);
2971 			(void) pm_check_and_resume(dip,
2972 			    comp, cur_power(cp), level);
2973 
2974 			/*
2975 			 * Stash the old power index, update curpwr, and flag
2976 			 * that the total power state needs to be synched.
2977 			 */
2978 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2979 			/*
2980 			 * Several pm_power_has_changed calls could arrive
2981 			 * while the set power path remains blocked.  Keep the
2982 			 * oldest old power and the newest new power of any
2983 			 * sequence of phc calls which arrive during deadlock.
2984 			 */
2985 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2986 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2987 			cp->pmc_cur_pwr =
2988 			    pm_level_to_index(dip, cp, level);
2989 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2990 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2991 			return (DDI_SUCCESS);
2992 		} else
2993 			if (blocked) {	/* blocked, but different cmpt? */
2994 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
2995 					cmn_err(CE_NOTE,
2996 					    "!pm: parent kuc not updated due "
2997 					    "to possible deadlock.\n");
2998 					return (pm_phc_impl(dip,
2999 					    comp, level, 1));
3000 				}
3001 				old_level = cur_power(cp);
3002 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3003 				    (!PM_ISBC(dip) || comp == 0) &&
3004 				    POWERING_ON(old_level, level))
3005 					pm_hold_power(pdip);
3006 				ret = pm_phc_impl(dip, comp, level, 1);
3007 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3008 					if ((!PM_ISBC(dip) ||
3009 					    comp == 0) && level == 0 &&
3010 					    old_level != PM_LEVEL_UNKNOWN)
3011 						pm_rele_power(pdip);
3012 				}
3013 				ndi_devi_exit(pdip, pcirc);
3014 				/* child lock not held: deadlock */
3015 				return (ret);
3016 			}
3017 		delay(1);
3018 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3019 	}
3020 
3021 	/* non-deadlock case */
3022 	old_level = cur_power(cp);
3023 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3024 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3025 		pm_hold_power(pdip);
3026 	ret = pm_phc_impl(dip, comp, level, 1);
3027 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3028 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3029 		    old_level != PM_LEVEL_UNKNOWN)
3030 			pm_rele_power(pdip);
3031 	}
3032 	PM_UNLOCK_POWER(dip, circ);
3033 	ndi_devi_exit(pdip, pcirc);
3034 	return (ret);
3035 }
3036 
3037 /*
3038  * Account for power changes to a component of the the console frame buffer.
3039  * If lowering power from full (or "unkown", which is treatd as full)
3040  * we will increment the "components off" count of the fb device.
3041  * Subsequent lowering of the same component doesn't affect the count.  If
3042  * raising a component back to full power, we will decrement the count.
3043  *
3044  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3045  */
3046 static int
3047 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3048 {
3049 	struct pm_component *cp = PM_CP(dip, cmpt);
3050 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3051 	int want_normal = (new == cp->pmc_norm_pwr);
3052 	int incr = 0;
3053 
3054 	if (on && !want_normal)
3055 		incr = 1;
3056 	else if (!on && want_normal)
3057 		incr = -1;
3058 	return (incr);
3059 }
3060 
3061 /*
3062  * Adjust the count of console frame buffer components < full power.
3063  */
3064 static void
3065 update_comps_off(int incr, dev_info_t *dip)
3066 {
3067 		mutex_enter(&pm_cfb_lock);
3068 		pm_cfb_comps_off += incr;
3069 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3070 		mutex_exit(&pm_cfb_lock);
3071 }
3072 
3073 /*
3074  * Update the power state in the framework (via the ppm).  The 'notify'
3075  * argument tells whether to notify watchers.  Power lock is already held.
3076  */
3077 static int
3078 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3079 {
3080 	PMD_FUNC(pmf, "phc_impl")
3081 	power_req_t power_req;
3082 	int i, dodeps = 0;
3083 	dev_info_t *pdip = ddi_get_parent(dip);
3084 	int result;
3085 	int old_level;
3086 	struct pm_component *cp;
3087 	int incr = 0;
3088 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3089 	int work_type = 0;
3090 	char *pathbuf;
3091 
3092 	/* Must use "official" power level for this test. */
3093 	cp = PM_CP(dip, comp);
3094 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3095 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3096 	if (old_level != PM_LEVEL_UNKNOWN)
3097 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3098 
3099 	if (level == old_level) {
3100 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3101 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3102 		return (DDI_SUCCESS);
3103 	}
3104 
3105 	/*
3106 	 * Tell ppm about this.
3107 	 */
3108 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3109 	power_req.req.ppm_notify_level_req.who = dip;
3110 	power_req.req.ppm_notify_level_req.cmpt = comp;
3111 	power_req.req.ppm_notify_level_req.new_level = level;
3112 	power_req.req.ppm_notify_level_req.old_level = old_level;
3113 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3114 	    &result) == DDI_FAILURE) {
3115 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3116 		    pmf, PM_DEVICE(dip), level))
3117 		return (DDI_FAILURE);
3118 	}
3119 
3120 	if (PM_IS_CFB(dip)) {
3121 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3122 
3123 		if (incr) {
3124 			update_comps_off(incr, dip);
3125 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3126 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3127 			    comp, old_level, level, pm_cfb_comps_off))
3128 		}
3129 	}
3130 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3131 	result = DDI_SUCCESS;
3132 
3133 	if (notify) {
3134 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3135 			pm_notify_parent(dip, pdip, comp, old_level, level);
3136 		(void) pm_check_and_resume(dip, comp, old_level, level);
3137 	}
3138 
3139 	/*
3140 	 * Decrement the dependency kidsup count if we turn a device
3141 	 * off.
3142 	 */
3143 	if (POWERING_OFF(old_level, level)) {
3144 		dodeps = 1;
3145 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3146 			cp = PM_CP(dip, i);
3147 			if (cur_power(cp)) {
3148 				dodeps = 0;
3149 				break;
3150 			}
3151 		}
3152 		if (dodeps)
3153 			work_type = PM_DEP_WK_POWER_OFF;
3154 	}
3155 
3156 	/*
3157 	 * Increment if we turn it on. Check to see
3158 	 * if other comps are already on, if so,
3159 	 * dont increment.
3160 	 */
3161 	if (POWERING_ON(old_level, level)) {
3162 		dodeps = 1;
3163 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3164 			cp = PM_CP(dip, i);
3165 			if (comp == i)
3166 				continue;
3167 			/* -1 also treated as 0 in this case */
3168 			if (cur_power(cp) > 0) {
3169 				dodeps = 0;
3170 				break;
3171 			}
3172 		}
3173 		if (dodeps)
3174 			work_type = PM_DEP_WK_POWER_ON;
3175 	}
3176 
3177 	if (dodeps) {
3178 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3179 		(void) ddi_pathname(dip, pathbuf);
3180 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3181 		    PM_DEP_NOWAIT, NULL, 0);
3182 		kmem_free(pathbuf, MAXPATHLEN);
3183 	}
3184 
3185 	if (notify && (level != old_level) && pm_watchers()) {
3186 		mutex_enter(&pm_rsvp_lock);
3187 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3188 		    PM_CANBLOCK_BLOCK);
3189 		mutex_exit(&pm_rsvp_lock);
3190 	}
3191 
3192 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3193 	pm_rescan(dip);
3194 	return (DDI_SUCCESS);
3195 }
3196 
3197 /*
3198  * This function is called at startup time to notify pm of the existence
3199  * of any platform power managers for this platform.  As a result of
3200  * this registration, each function provided will be called each time
3201  * a device node is attached, until one returns true, and it must claim the
3202  * device node (by returning non-zero) if it wants to be involved in the
3203  * node's power management.  If it does claim the node, then it will
3204  * subsequently be notified of attach and detach events.
3205  *
3206  */
3207 
3208 int
3209 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3210 {
3211 	PMD_FUNC(pmf, "register_ppm")
3212 	struct ppm_callbacks *ppmcp;
3213 	pm_component_t *cp;
3214 	int i, pwr, result, circ;
3215 	power_req_t power_req;
3216 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3217 	void pm_ppm_claim(dev_info_t *);
3218 
3219 	mutex_enter(&ppm_lock);
3220 	ppmcp = ppm_callbacks;
3221 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3222 		if (ppmcp->ppmc_func == NULL) {
3223 			ppmcp->ppmc_func = func;
3224 			ppmcp->ppmc_dip = dip;
3225 			break;
3226 		}
3227 	}
3228 	mutex_exit(&ppm_lock);
3229 
3230 	if (i >= MAX_PPM_HANDLERS)
3231 		return (DDI_FAILURE);
3232 	while ((dip = ddi_get_parent(dip)) != NULL) {
3233 		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3234 			continue;
3235 		pm_ppm_claim(dip);
3236 		/* don't bother with the not power-manageable nodes */
3237 		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3238 			/*
3239 			 * Tell ppm about this.
3240 			 */
3241 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3242 			p->old_level = PM_LEVEL_UNKNOWN;
3243 			p->who = dip;
3244 			PM_LOCK_POWER(dip, &circ);
3245 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3246 				cp = PM_CP(dip, i);
3247 				pwr = cp->pmc_cur_pwr;
3248 				if (pwr != PM_LEVEL_UNKNOWN) {
3249 					p->cmpt = i;
3250 					p->new_level = cur_power(cp);
3251 					p->old_level = PM_LEVEL_UNKNOWN;
3252 					if (pm_ctlops(PPM(dip), dip,
3253 					    DDI_CTLOPS_POWER, &power_req,
3254 					    &result) == DDI_FAILURE) {
3255 						PMD(PMD_FAIL, ("%s: pc "
3256 						    "%s@%s(%s#%d) to %d "
3257 						    "fails\n", pmf,
3258 						    PM_DEVICE(dip), pwr))
3259 					}
3260 				}
3261 			}
3262 			PM_UNLOCK_POWER(dip, circ);
3263 		}
3264 	}
3265 	return (DDI_SUCCESS);
3266 }
3267 
3268 /*
3269  * Call the ppm's that have registered and adjust the devinfo struct as
3270  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3271  * by each ppm are assumed to be disjoint.
3272  */
3273 void
3274 pm_ppm_claim(dev_info_t *dip)
3275 {
3276 	struct ppm_callbacks *ppmcp;
3277 
3278 	if (PPM(dip)) {
3279 		return;
3280 	}
3281 	mutex_enter(&ppm_lock);
3282 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3283 		if ((*ppmcp->ppmc_func)(dip)) {
3284 			DEVI(dip)->devi_pm_ppm =
3285 			    (struct dev_info *)ppmcp->ppmc_dip;
3286 			mutex_exit(&ppm_lock);
3287 			return;
3288 		}
3289 	}
3290 	mutex_exit(&ppm_lock);
3291 }
3292 
3293 /*
3294  * Node is being detached so stop autopm until we see if it succeeds, in which
3295  * case pm_stop will be called.  For backwards compatible devices we bring the
3296  * device up to full power on the assumption the detach will succeed.
3297  */
3298 void
3299 pm_detaching(dev_info_t *dip)
3300 {
3301 	PMD_FUNC(pmf, "detaching")
3302 	pm_info_t *info = PM_GET_PM_INFO(dip);
3303 	int iscons;
3304 
3305 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3306 	    PM_NUMCMPTS(dip)))
3307 	if (info == NULL)
3308 		return;
3309 	ASSERT(DEVI_IS_DETACHING(dip));
3310 	PM_LOCK_DIP(dip);
3311 	info->pmi_dev_pm_state |= PM_DETACHING;
3312 	PM_UNLOCK_DIP(dip);
3313 	if (!PM_ISBC(dip))
3314 		pm_scan_stop(dip);
3315 
3316 	/*
3317 	 * console and old-style devices get brought up when detaching.
3318 	 */
3319 	iscons = PM_IS_CFB(dip);
3320 	if (iscons || PM_ISBC(dip)) {
3321 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3322 		if (iscons) {
3323 			mutex_enter(&pm_cfb_lock);
3324 			while (cfb_inuse) {
3325 				mutex_exit(&pm_cfb_lock);
3326 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3327 				delay(1);
3328 				mutex_enter(&pm_cfb_lock);
3329 			}
3330 			ASSERT(cfb_dip_detaching == NULL);
3331 			ASSERT(cfb_dip);
3332 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3333 			cfb_dip = NULL;
3334 			mutex_exit(&pm_cfb_lock);
3335 		}
3336 	}
3337 }
3338 
3339 /*
3340  * Node failed to detach.  If it used to be autopm'd, make it so again.
3341  */
3342 void
3343 pm_detach_failed(dev_info_t *dip)
3344 {
3345 	PMD_FUNC(pmf, "detach_failed")
3346 	pm_info_t *info = PM_GET_PM_INFO(dip);
3347 	int pm_all_at_normal(dev_info_t *);
3348 
3349 	if (info == NULL)
3350 		return;
3351 	ASSERT(DEVI_IS_DETACHING(dip));
3352 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3353 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3354 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3355 			/* Make sure the operation is still needed */
3356 			if (!pm_all_at_normal(dip)) {
3357 				if (pm_all_to_normal(dip,
3358 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3359 					PMD(PMD_ERROR, ("%s: could not bring "
3360 					    "%s@%s(%s#%d) to normal\n", pmf,
3361 					    PM_DEVICE(dip)))
3362 				}
3363 			}
3364 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3365 		}
3366 	}
3367 	if (!PM_ISBC(dip)) {
3368 		mutex_enter(&pm_scan_lock);
3369 		if (PM_SCANABLE(dip))
3370 			pm_scan_init(dip);
3371 		mutex_exit(&pm_scan_lock);
3372 		pm_rescan(dip);
3373 	}
3374 }
3375 
3376 /* generic Backwards Compatible component */
3377 static char *bc_names[] = {"off", "on"};
3378 
3379 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3380 
3381 static void
3382 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3383 {
3384 	pm_comp_t *pmc;
3385 	pmc = &cp->pmc_comp;
3386 	pmc->pmc_numlevels = 2;
3387 	pmc->pmc_lvals[0] = 0;
3388 	pmc->pmc_lvals[1] = norm;
3389 	e_pm_set_cur_pwr(dip, cp, norm);
3390 }
3391 
3392 static void
3393 e_pm_default_components(dev_info_t *dip, int cmpts)
3394 {
3395 	int i;
3396 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3397 
3398 	p = DEVI(dip)->devi_pm_components;
3399 	for (i = 0; i < cmpts; i++, p++) {
3400 		p->pmc_comp = bc_comp;	/* struct assignment */
3401 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3402 		    KM_SLEEP);
3403 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3404 		    KM_SLEEP);
3405 		p->pmc_comp.pmc_numlevels = 2;
3406 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3407 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3408 	}
3409 }
3410 
3411 /*
3412  * Called from functions that require components to exist already to allow
3413  * for their creation by parsing the pm-components property.
3414  * Device will not be power managed as a result of this call
3415  * No locking needed because we're single threaded by the ndi_devi_enter
3416  * done while attaching, and the device isn't visible until after it has
3417  * attached
3418  */
3419 int
3420 pm_premanage(dev_info_t *dip, int style)
3421 {
3422 	PMD_FUNC(pmf, "premanage")
3423 	pm_comp_t	*pcp, *compp;
3424 	int		cmpts, i, norm, error;
3425 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3426 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3427 
3428 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3429 	/*
3430 	 * If this dip has already been processed, don't mess with it
3431 	 */
3432 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3433 		return (DDI_SUCCESS);
3434 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3435 		return (DDI_FAILURE);
3436 	}
3437 	/*
3438 	 * Look up pm-components property and create components accordingly
3439 	 * If that fails, fall back to backwards compatibility
3440 	 */
3441 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3442 		/*
3443 		 * If error is set, the property existed but was not well formed
3444 		 */
3445 		if (error || (style == PM_STYLE_NEW)) {
3446 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3447 			return (DDI_FAILURE);
3448 		}
3449 		/*
3450 		 * If they don't have the pm-components property, then we
3451 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3452 		 * behavior driver must have called pm_create_components, and
3453 		 * we need to flesh out dummy components
3454 		 */
3455 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3456 			/*
3457 			 * Not really failure, but we don't want the
3458 			 * caller to treat it as success
3459 			 */
3460 			return (DDI_FAILURE);
3461 		}
3462 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3463 		e_pm_default_components(dip, cmpts);
3464 		for (i = 0; i < cmpts; i++) {
3465 			/*
3466 			 * if normal power not set yet, we don't really know
3467 			 * what *ANY* of the power values are.  If normal
3468 			 * power is set, then we assume for this backwards
3469 			 * compatible case that the values are 0, normal power.
3470 			 */
3471 			norm = pm_get_normal_power(dip, i);
3472 			if (norm == (uint_t)-1) {
3473 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3474 				    PM_DEVICE(dip), i))
3475 				return (DDI_FAILURE);
3476 			}
3477 			/*
3478 			 * Components of BC devices start at their normal power,
3479 			 * so count them to be not at their lowest power.
3480 			 */
3481 			PM_INCR_NOTLOWEST(dip);
3482 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3483 		}
3484 	} else {
3485 		/*
3486 		 * e_pm_create_components was called from pm_autoconfig(), it
3487 		 * creates components with no descriptions (or known levels)
3488 		 */
3489 		cmpts = PM_NUMCMPTS(dip);
3490 		ASSERT(cmpts != 0);
3491 		pcp = compp;
3492 		p = DEVI(dip)->devi_pm_components;
3493 		for (i = 0; i < cmpts; i++, p++) {
3494 			p->pmc_comp = *pcp++;   /* struct assignment */
3495 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3496 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3497 		}
3498 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3499 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3500 			    PMC_CPU_THRESH);
3501 		else
3502 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3503 			    PMC_DEF_THRESH);
3504 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3505 	}
3506 	return (DDI_SUCCESS);
3507 }
3508 
3509 /*
3510  * Called from during or after the device's attach to let us know it is ready
3511  * to play autopm.   Look up the pm model and manage the device accordingly.
3512  * Returns system call errno value.
3513  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3514  * a little cleaner
3515  *
3516  * Called with dip lock held, return with dip lock unheld.
3517  */
3518 
3519 int
3520 e_pm_manage(dev_info_t *dip, int style)
3521 {
3522 	PMD_FUNC(pmf, "e_manage")
3523 	pm_info_t	*info;
3524 	dev_info_t	*pdip = ddi_get_parent(dip);
3525 	int	pm_thresh_specd(dev_info_t *);
3526 	int	count;
3527 	char	*pathbuf;
3528 
3529 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3530 		return (DDI_FAILURE);
3531 	}
3532 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3533 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3534 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3535 
3536 	/*
3537 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3538 	 * out at their normal power, so they are "up", others start out
3539 	 * unknown, which is effectively "up".  Parent which want notification
3540 	 * get kidsupcnt of 0 always.
3541 	 */
3542 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3543 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3544 		e_pm_hold_rele_power(pdip, count);
3545 
3546 	pm_set_pm_info(dip, info);
3547 	/*
3548 	 * Apply any recorded thresholds
3549 	 */
3550 	(void) pm_thresh_specd(dip);
3551 
3552 	/*
3553 	 * Do dependency processing.
3554 	 */
3555 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3556 	(void) ddi_pathname(dip, pathbuf);
3557 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3558 	    PM_DEP_NOWAIT, NULL, 0);
3559 	kmem_free(pathbuf, MAXPATHLEN);
3560 
3561 	if (!PM_ISBC(dip)) {
3562 		mutex_enter(&pm_scan_lock);
3563 		if (PM_SCANABLE(dip)) {
3564 			pm_scan_init(dip);
3565 			mutex_exit(&pm_scan_lock);
3566 			pm_rescan(dip);
3567 		} else {
3568 			mutex_exit(&pm_scan_lock);
3569 		}
3570 	}
3571 	return (0);
3572 }
3573 
3574 /*
3575  * This is the obsolete exported interface for a driver to find out its
3576  * "normal" (max) power.
3577  * We only get components destroyed while no power management is
3578  * going on (and the device is detached), so we don't need a mutex here
3579  */
3580 int
3581 pm_get_normal_power(dev_info_t *dip, int comp)
3582 {
3583 
3584 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3585 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3586 	}
3587 	return (DDI_FAILURE);
3588 }
3589 
3590 /*
3591  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3592  */
3593 int
3594 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3595 {
3596 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3597 		*levelp = PM_CURPOWER(dip, comp);
3598 		return (DDI_SUCCESS);
3599 	}
3600 	return (DDI_FAILURE);
3601 }
3602 
3603 /*
3604  * Returns current threshold of indicated component
3605  */
3606 static int
3607 cur_threshold(dev_info_t *dip, int comp)
3608 {
3609 	pm_component_t *cp = PM_CP(dip, comp);
3610 	int pwr;
3611 
3612 	if (PM_ISBC(dip)) {
3613 		/*
3614 		 * backwards compatible nodes only have one threshold
3615 		 */
3616 		return (cp->pmc_comp.pmc_thresh[1]);
3617 	}
3618 	pwr = cp->pmc_cur_pwr;
3619 	if (pwr == PM_LEVEL_UNKNOWN) {
3620 		int thresh;
3621 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3622 			thresh = pm_default_nexus_threshold;
3623 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3624 			thresh = pm_cpu_idle_threshold;
3625 		else
3626 			thresh = pm_system_idle_threshold;
3627 		return (thresh);
3628 	}
3629 	ASSERT(cp->pmc_comp.pmc_thresh);
3630 	return (cp->pmc_comp.pmc_thresh[pwr]);
3631 }
3632 
3633 /*
3634  * Compute next lower component power level given power index.
3635  */
3636 static int
3637 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3638 {
3639 	int nxt_pwr;
3640 
3641 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3642 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3643 	} else {
3644 		pwrndx--;
3645 		ASSERT(pwrndx >= 0);
3646 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3647 	}
3648 	return (nxt_pwr);
3649 }
3650 
3651 /*
3652  * Update the maxpower (normal) power of a component. Note that the
3653  * component's power level is only changed if it's current power level
3654  * is higher than the new max power.
3655  */
3656 int
3657 pm_update_maxpower(dev_info_t *dip, int comp, int level)
3658 {
3659 	PMD_FUNC(pmf, "update_maxpower")
3660 	int old;
3661 	int result;
3662 
3663 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3664 	    !e_pm_valid_power(dip, comp, level)) {
3665 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3666 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3667 		return (DDI_FAILURE);
3668 	}
3669 	old = e_pm_get_max_power(dip, comp);
3670 	e_pm_set_max_power(dip, comp, level);
3671 
3672 	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3673 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3674 		e_pm_set_max_power(dip, comp, old);
3675 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3676 		    PM_DEVICE(dip)))
3677 		return (DDI_FAILURE);
3678 	}
3679 	return (DDI_SUCCESS);
3680 }
3681 
3682 /*
3683  * Bring all components of device to normal power
3684  */
3685 int
3686 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3687 {
3688 	PMD_FUNC(pmf, "all_to_normal")
3689 	int		*normal;
3690 	int		i, ncomps, result;
3691 	size_t		size;
3692 	int		changefailed = 0;
3693 
3694 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3695 	ASSERT(PM_GET_PM_INFO(dip));
3696 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3697 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3698 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3699 		return (DDI_FAILURE);
3700 	}
3701 	ncomps = PM_NUMCMPTS(dip);
3702 	for (i = 0; i < ncomps; i++) {
3703 		if (pm_set_power(dip, i, normal[i],
3704 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3705 			changefailed++;
3706 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3707 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3708 			    PM_DEVICE(dip), i, normal[i], result))
3709 		}
3710 	}
3711 	kmem_free(normal, size);
3712 	if (changefailed) {
3713 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3714 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3715 		return (DDI_FAILURE);
3716 	}
3717 	return (DDI_SUCCESS);
3718 }
3719 
3720 /*
3721  * Returns true if all components of device are at normal power
3722  */
3723 int
3724 pm_all_at_normal(dev_info_t *dip)
3725 {
3726 	PMD_FUNC(pmf, "all_at_normal")
3727 	int		*normal;
3728 	int		i;
3729 	size_t		size;
3730 
3731 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3732 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3733 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3734 		return (DDI_FAILURE);
3735 	}
3736 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3737 		int current = PM_CURPOWER(dip, i);
3738 		if (normal[i] > current) {
3739 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3740 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3741 			    normal[i], current))
3742 			break;
3743 		}
3744 	}
3745 	kmem_free(normal, size);
3746 	if (i != PM_NUMCMPTS(dip)) {
3747 		return (0);
3748 	}
3749 	return (1);
3750 }
3751 
3752 static void
3753 bring_wekeeps_up(char *keeper)
3754 {
3755 	PMD_FUNC(pmf, "bring_wekeeps_up")
3756 	int i;
3757 	pm_pdr_t *dp;
3758 	pm_info_t *wku_info;
3759 	char *kept_path;
3760 	dev_info_t *kept;
3761 	static void bring_pmdep_up(dev_info_t *, int);
3762 
3763 	if (panicstr) {
3764 		return;
3765 	}
3766 	/*
3767 	 * We process the request even if the keeper detaches because
3768 	 * detach processing expects this to increment kidsupcnt of kept.
3769 	 */
3770 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3771 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3772 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3773 			continue;
3774 		for (i = 0; i < dp->pdr_kept_count; i++) {
3775 			kept_path = dp->pdr_kept_paths[i];
3776 			if (kept_path == NULL)
3777 				continue;
3778 			ASSERT(kept_path[0] != '\0');
3779 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3780 				continue;
3781 			wku_info = PM_GET_PM_INFO(kept);
3782 			if (wku_info == NULL) {
3783 				if (kept)
3784 					ddi_release_devi(kept);
3785 				continue;
3786 			}
3787 			/*
3788 			 * Don't mess with it if it is being detached, it isn't
3789 			 * safe to call its power entry point
3790 			 */
3791 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3792 				if (kept)
3793 					ddi_release_devi(kept);
3794 				continue;
3795 			}
3796 			bring_pmdep_up(kept, 1);
3797 			ddi_release_devi(kept);
3798 		}
3799 	}
3800 }
3801 
3802 /*
3803  * Bring up the 'kept' device passed as argument
3804  */
3805 static void
3806 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3807 {
3808 	PMD_FUNC(pmf, "bring_pmdep_up")
3809 	int is_all_at_normal = 0;
3810 
3811 	/*
3812 	 * If the kept device has been unmanaged, do nothing.
3813 	 */
3814 	if (!PM_GET_PM_INFO(kept_dip))
3815 		return;
3816 
3817 	/* Just ignore DIRECT PM device till they are released. */
3818 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3819 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3820 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3821 		    "controlling process did something else\n", pmf,
3822 		    PM_DEVICE(kept_dip)))
3823 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3824 		return;
3825 	}
3826 	/* if we got here the keeper had a transition from OFF->ON */
3827 	if (hold)
3828 		pm_hold_power(kept_dip);
3829 
3830 	if (!is_all_at_normal)
3831 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3832 }
3833 
3834 /*
3835  * A bunch of stuff that belongs only to the next routine (or two)
3836  */
3837 
3838 static const char namestr[] = "NAME=";
3839 static const int nameln = sizeof (namestr) - 1;
3840 static const char pmcompstr[] = "pm-components";
3841 
3842 struct pm_comp_pkg {
3843 	pm_comp_t		*comp;
3844 	struct pm_comp_pkg	*next;
3845 };
3846 
3847 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3848 
3849 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3850 			((ch) >= 'A' && (ch) <= 'F'))
3851 
3852 /*
3853  * Rather than duplicate this code ...
3854  * (this code excerpted from the function that follows it)
3855  */
3856 #define	FINISH_COMP { \
3857 	ASSERT(compp); \
3858 	compp->pmc_lnames_sz = size; \
3859 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3860 	compp->pmc_numlevels = level; \
3861 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3862 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3863 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3864 	/* copy string out of prop array into buffer */ \
3865 	for (j = 0; j < level; j++) { \
3866 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3867 		compp->pmc_lvals[j] = lvals[j]; \
3868 		(void) strcpy(tp, lnames[j]); \
3869 		compp->pmc_lnames[j] = tp; \
3870 		tp += lszs[j]; \
3871 	} \
3872 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3873 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3874 	}
3875 
3876 /*
3877  * Create (empty) component data structures.
3878  */
3879 static void
3880 e_pm_create_components(dev_info_t *dip, int num_components)
3881 {
3882 	struct pm_component *compp, *ocompp;
3883 	int i, size = 0;
3884 
3885 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3886 	ASSERT(!DEVI(dip)->devi_pm_components);
3887 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3888 	size = sizeof (struct pm_component) * num_components;
3889 
3890 	compp = kmem_zalloc(size, KM_SLEEP);
3891 	ocompp = compp;
3892 	DEVI(dip)->devi_pm_comp_size = size;
3893 	DEVI(dip)->devi_pm_num_components = num_components;
3894 	PM_LOCK_BUSY(dip);
3895 	for (i = 0; i < num_components;  i++) {
3896 		compp->pmc_timestamp = gethrestime_sec();
3897 		compp->pmc_norm_pwr = (uint_t)-1;
3898 		compp++;
3899 	}
3900 	PM_UNLOCK_BUSY(dip);
3901 	DEVI(dip)->devi_pm_components = ocompp;
3902 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3903 }
3904 
3905 /*
3906  * Parse hex or decimal value from char string
3907  */
3908 static char *
3909 pm_parsenum(char *cp, int *valp)
3910 {
3911 	int ch, offset;
3912 	char numbuf[256];
3913 	char *np = numbuf;
3914 	int value = 0;
3915 
3916 	ch = *cp++;
3917 	if (isdigit(ch)) {
3918 		if (ch == '0') {
3919 			if ((ch = *cp++) == 'x' || ch == 'X') {
3920 				ch = *cp++;
3921 				while (isxdigit(ch)) {
3922 					*np++ = (char)ch;
3923 					ch = *cp++;
3924 				}
3925 				*np = 0;
3926 				cp--;
3927 				goto hexval;
3928 			} else {
3929 				goto digit;
3930 			}
3931 		} else {
3932 digit:
3933 			while (isdigit(ch)) {
3934 				*np++ = (char)ch;
3935 				ch = *cp++;
3936 			}
3937 			*np = 0;
3938 			cp--;
3939 			goto decval;
3940 		}
3941 	} else
3942 		return (NULL);
3943 
3944 hexval:
3945 	for (np = numbuf; *np; np++) {
3946 		if (*np >= 'a' && *np <= 'f')
3947 			offset = 'a' - 10;
3948 		else if (*np >= 'A' && *np <= 'F')
3949 			offset = 'A' - 10;
3950 		else if (*np >= '0' && *np <= '9')
3951 			offset = '0';
3952 		value *= 16;
3953 		value += *np - offset;
3954 	}
3955 	*valp = value;
3956 	return (cp);
3957 
3958 decval:
3959 	offset = '0';
3960 	for (np = numbuf; *np; np++) {
3961 		value *= 10;
3962 		value += *np - offset;
3963 	}
3964 	*valp = value;
3965 	return (cp);
3966 }
3967 
3968 /*
3969  * Set max (previously documented as "normal") power.
3970  */
3971 static void
3972 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3973 {
3974 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3975 }
3976 
3977 /*
3978  * Get max (previously documented as "normal") power.
3979  */
3980 static int
3981 e_pm_get_max_power(dev_info_t *dip, int component_number)
3982 {
3983 	return (PM_CP(dip, component_number)->pmc_norm_pwr);
3984 }
3985 
3986 /*
3987  * Internal routine for destroying components
3988  * It is called even when there might not be any, so it must be forgiving.
3989  */
3990 static void
3991 e_pm_destroy_components(dev_info_t *dip)
3992 {
3993 	int i;
3994 	struct pm_component *cp;
3995 
3996 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3997 	if (PM_NUMCMPTS(dip) == 0)
3998 		return;
3999 	cp = DEVI(dip)->devi_pm_components;
4000 	ASSERT(cp);
4001 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
4002 		int nlevels = cp->pmc_comp.pmc_numlevels;
4003 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
4004 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
4005 		/*
4006 		 * For BC nodes, the rest is static in bc_comp, so skip it
4007 		 */
4008 		if (PM_ISBC(dip))
4009 			continue;
4010 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
4011 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
4012 		kmem_free(cp->pmc_comp.pmc_lname_buf,
4013 		    cp->pmc_comp.pmc_lnames_sz);
4014 	}
4015 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
4016 	DEVI(dip)->devi_pm_components = NULL;
4017 	DEVI(dip)->devi_pm_num_components = 0;
4018 	DEVI(dip)->devi_pm_flags &=
4019 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4020 }
4021 
4022 /*
4023  * Read the pm-components property (if there is one) and use it to set up
4024  * components.  Returns a pointer to an array of component structures if
4025  * pm-components found and successfully parsed, else returns NULL.
4026  * Sets error return *errp to true to indicate a failure (as opposed to no
4027  * property being present).
4028  */
4029 pm_comp_t *
4030 pm_autoconfig(dev_info_t *dip, int *errp)
4031 {
4032 	PMD_FUNC(pmf, "autoconfig")
4033 	uint_t nelems;
4034 	char **pp;
4035 	pm_comp_t *compp = NULL;
4036 	int i, j, level, components = 0;
4037 	size_t size = 0;
4038 	struct pm_comp_pkg *p, *ptail;
4039 	struct pm_comp_pkg *phead = NULL;
4040 	int *lvals = NULL;
4041 	int *lszs = NULL;
4042 	int *np = NULL;
4043 	int npi = 0;
4044 	char **lnames = NULL;
4045 	char *cp, *tp;
4046 	pm_comp_t *ret = NULL;
4047 
4048 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4049 	*errp = 0;	/* assume success */
4050 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4051 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
4052 		return (NULL);
4053 	}
4054 
4055 	if (nelems < 3) {	/* need at least one name and two levels */
4056 		goto errout;
4057 	}
4058 
4059 	/*
4060 	 * pm_create_components is no longer allowed
4061 	 */
4062 	if (PM_NUMCMPTS(dip) != 0) {
4063 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
4064 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4065 		goto errout;
4066 	}
4067 
4068 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4069 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4070 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4071 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4072 
4073 	level = 0;
4074 	phead = NULL;
4075 	for (i = 0; i < nelems; i++) {
4076 		cp = pp[i];
4077 		if (!isdigit(*cp)) {	/*  must be name */
4078 			if (strncmp(cp, namestr, nameln) != 0) {
4079 				goto errout;
4080 			}
4081 			if (i != 0) {
4082 				if (level == 0) {	/* no level spec'd */
4083 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4084 					    pmf))
4085 					goto errout;
4086 				}
4087 				np[npi++] = lvals[level - 1];
4088 				/* finish up previous component levels */
4089 				FINISH_COMP;
4090 			}
4091 			cp += nameln;
4092 			if (!*cp) {
4093 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4094 				goto errout;
4095 			}
4096 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4097 			if (phead == NULL) {
4098 				phead = ptail = p;
4099 			} else {
4100 				ptail->next = p;
4101 				ptail = p;
4102 			}
4103 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4104 			    KM_SLEEP);
4105 			compp->pmc_name_sz = strlen(cp) + 1;
4106 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4107 			    KM_SLEEP);
4108 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4109 			components++;
4110 			level = 0;
4111 		} else {	/* better be power level <num>=<name> */
4112 #ifdef DEBUG
4113 			tp = cp;
4114 #endif
4115 			if (i == 0 ||
4116 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4117 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4118 				goto errout;
4119 			}
4120 #ifdef DEBUG
4121 			tp = cp;
4122 #endif
4123 			if (*cp++ != '=' || !*cp) {
4124 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4125 				goto errout;
4126 			}
4127 
4128 			lszs[level] = strlen(cp) + 1;
4129 			size += lszs[level];
4130 			lnames[level] = cp;	/* points into prop string */
4131 			level++;
4132 		}
4133 	}
4134 	np[npi++] = lvals[level - 1];
4135 	if (level == 0) {	/* ended with a name */
4136 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4137 		goto errout;
4138 	}
4139 	FINISH_COMP;
4140 
4141 
4142 	/*
4143 	 * Now we have a list of components--we have to return instead an
4144 	 * array of them, but we can just copy the top level and leave
4145 	 * the rest as is
4146 	 */
4147 	(void) e_pm_create_components(dip, components);
4148 	for (i = 0; i < components; i++)
4149 		e_pm_set_max_power(dip, i, np[i]);
4150 
4151 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4152 	for (i = 0, p = phead; i < components; i++) {
4153 		ASSERT(p);
4154 		/*
4155 		 * Now sanity-check values:  levels must be monotonically
4156 		 * increasing
4157 		 */
4158 		if (p->comp->pmc_numlevels < 2) {
4159 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4160 			    "levels\n", pmf,
4161 			    p->comp->pmc_name, PM_DEVICE(dip),
4162 			    p->comp->pmc_numlevels))
4163 			goto errout;
4164 		}
4165 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4166 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4167 			    (p->comp->pmc_lvals[j] <=
4168 			    p->comp->pmc_lvals[j - 1]))) {
4169 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4170 				    "not mono. incr, %d follows %d\n", pmf,
4171 				    p->comp->pmc_name, PM_DEVICE(dip),
4172 				    p->comp->pmc_lvals[j],
4173 				    p->comp->pmc_lvals[j - 1]))
4174 				goto errout;
4175 			}
4176 		}
4177 		ret[i] = *p->comp;	/* struct assignment */
4178 		for (j = 0; j < i; j++) {
4179 			/*
4180 			 * Test for unique component names
4181 			 */
4182 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4183 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4184 				    "unique\n", pmf, ret[j].pmc_name,
4185 				    PM_DEVICE(dip)))
4186 				goto errout;
4187 			}
4188 		}
4189 		ptail = p;
4190 		p = p->next;
4191 		phead = p;	/* errout depends on phead making sense */
4192 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4193 		kmem_free(ptail, sizeof (*ptail));
4194 	}
4195 out:
4196 	ddi_prop_free(pp);
4197 	if (lvals)
4198 		kmem_free(lvals, nelems * sizeof (int));
4199 	if (lszs)
4200 		kmem_free(lszs, nelems * sizeof (int));
4201 	if (lnames)
4202 		kmem_free(lnames, nelems * sizeof (char *));
4203 	if (np)
4204 		kmem_free(np, nelems * sizeof (int));
4205 	return (ret);
4206 
4207 errout:
4208 	e_pm_destroy_components(dip);
4209 	*errp = 1;	/* signal failure */
4210 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4211 	for (i = 0; i < nelems - 1; i++)
4212 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4213 	if (nelems != 0)
4214 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4215 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4216 	for (p = phead; p; ) {
4217 		pm_comp_t *pp;
4218 		int n;
4219 
4220 		ptail = p;
4221 		/*
4222 		 * Free component data structures
4223 		 */
4224 		pp = p->comp;
4225 		n = pp->pmc_numlevels;
4226 		if (pp->pmc_name_sz) {
4227 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4228 		}
4229 		if (pp->pmc_lnames_sz) {
4230 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4231 		}
4232 		if (pp->pmc_lnames) {
4233 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4234 		}
4235 		if (pp->pmc_thresh) {
4236 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4237 		}
4238 		if (pp->pmc_lvals) {
4239 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4240 		}
4241 		p = ptail->next;
4242 		kmem_free(ptail, sizeof (*ptail));
4243 	}
4244 	if (ret != NULL)
4245 		kmem_free(ret, components * sizeof (pm_comp_t));
4246 	ret = NULL;
4247 	goto out;
4248 }
4249 
4250 /*
4251  * Set threshold values for a devices components by dividing the target
4252  * threshold (base) by the number of transitions and assign each transition
4253  * that threshold.  This will get the entire device down in the target time if
4254  * all components are idle and even if there are dependencies among components.
4255  *
4256  * Devices may well get powered all the way down before the target time, but
4257  * at least the EPA will be happy.
4258  */
4259 void
4260 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4261 {
4262 	PMD_FUNC(pmf, "set_device_threshold")
4263 	int target_threshold = (base * 95) / 100;
4264 	int level, comp;		/* loop counters */
4265 	int transitions = 0;
4266 	int ncomp = PM_NUMCMPTS(dip);
4267 	int thresh;
4268 	int remainder;
4269 	pm_comp_t *pmc;
4270 	int i, circ;
4271 
4272 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4273 	PM_LOCK_DIP(dip);
4274 	/*
4275 	 * First we handle the easy one.  If we're setting the default
4276 	 * threshold for a node with children, then we set it to the
4277 	 * default nexus threshold (currently 0) and mark it as default
4278 	 * nexus threshold instead
4279 	 */
4280 	if (PM_IS_NEXUS(dip)) {
4281 		if (flag == PMC_DEF_THRESH) {
4282 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4283 			    PM_DEVICE(dip)))
4284 			thresh = pm_default_nexus_threshold;
4285 			for (comp = 0; comp < ncomp; comp++) {
4286 				pmc = &PM_CP(dip, comp)->pmc_comp;
4287 				for (level = 1; level < pmc->pmc_numlevels;
4288 				    level++) {
4289 					pmc->pmc_thresh[level] = thresh;
4290 				}
4291 			}
4292 			DEVI(dip)->devi_pm_dev_thresh =
4293 			    pm_default_nexus_threshold;
4294 			/*
4295 			 * If the nexus node is being reconfigured back to
4296 			 * the default threshold, adjust the notlowest count.
4297 			 */
4298 			if (DEVI(dip)->devi_pm_flags &
4299 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4300 				PM_LOCK_POWER(dip, &circ);
4301 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4302 					if (PM_CURPOWER(dip, i) == 0)
4303 						continue;
4304 					mutex_enter(&pm_compcnt_lock);
4305 					ASSERT(pm_comps_notlowest);
4306 					pm_comps_notlowest--;
4307 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4308 					    "notlowest to %d\n", pmf,
4309 					    PM_DEVICE(dip), pm_comps_notlowest))
4310 					if (pm_comps_notlowest == 0)
4311 						pm_ppm_notify_all_lowest(dip,
4312 						    PM_ALL_LOWEST);
4313 					mutex_exit(&pm_compcnt_lock);
4314 				}
4315 				PM_UNLOCK_POWER(dip, circ);
4316 			}
4317 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4318 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4319 			PM_UNLOCK_DIP(dip);
4320 			return;
4321 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4322 			/*
4323 			 * If the nexus node is being configured for a
4324 			 * non-default threshold, include that node in
4325 			 * the notlowest accounting.
4326 			 */
4327 			PM_LOCK_POWER(dip, &circ);
4328 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4329 				if (PM_CURPOWER(dip, i) == 0)
4330 					continue;
4331 				mutex_enter(&pm_compcnt_lock);
4332 				if (pm_comps_notlowest == 0)
4333 					pm_ppm_notify_all_lowest(dip,
4334 					    PM_NOT_ALL_LOWEST);
4335 				pm_comps_notlowest++;
4336 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4337 				    "notlowest to %d\n", pmf,
4338 				    PM_DEVICE(dip), pm_comps_notlowest))
4339 				mutex_exit(&pm_compcnt_lock);
4340 			}
4341 			PM_UNLOCK_POWER(dip, circ);
4342 		}
4343 	}
4344 	/*
4345 	 * Compute the total number of transitions for all components
4346 	 * of the device.  Distribute the threshold evenly over them
4347 	 */
4348 	for (comp = 0; comp < ncomp; comp++) {
4349 		pmc = &PM_CP(dip, comp)->pmc_comp;
4350 		ASSERT(pmc->pmc_numlevels > 1);
4351 		transitions += pmc->pmc_numlevels - 1;
4352 	}
4353 	ASSERT(transitions);
4354 	thresh = target_threshold / transitions;
4355 
4356 	for (comp = 0; comp < ncomp; comp++) {
4357 		pmc = &PM_CP(dip, comp)->pmc_comp;
4358 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4359 			pmc->pmc_thresh[level] = thresh;
4360 		}
4361 	}
4362 
4363 #ifdef DEBUG
4364 	for (comp = 0; comp < ncomp; comp++) {
4365 		pmc = &PM_CP(dip, comp)->pmc_comp;
4366 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4367 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4368 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4369 			    comp, level, pmc->pmc_thresh[level]))
4370 		}
4371 	}
4372 #endif
4373 	/*
4374 	 * Distribute any remainder till they are all gone
4375 	 */
4376 	remainder = target_threshold - thresh * transitions;
4377 	level = 1;
4378 #ifdef DEBUG
4379 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4380 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4381 	    transitions))
4382 #endif
4383 	while (remainder > 0) {
4384 		comp = 0;
4385 		while (remainder && (comp < ncomp)) {
4386 			pmc = &PM_CP(dip, comp)->pmc_comp;
4387 			if (level < pmc->pmc_numlevels) {
4388 				pmc->pmc_thresh[level] += 1;
4389 				remainder--;
4390 			}
4391 			comp++;
4392 		}
4393 		level++;
4394 	}
4395 #ifdef DEBUG
4396 	for (comp = 0; comp < ncomp; comp++) {
4397 		pmc = &PM_CP(dip, comp)->pmc_comp;
4398 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4399 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4400 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4401 			    comp, level, pmc->pmc_thresh[level]))
4402 		}
4403 	}
4404 #endif
4405 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4406 	DEVI(dip)->devi_pm_dev_thresh = base;
4407 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4408 	DEVI(dip)->devi_pm_flags |= flag;
4409 	PM_UNLOCK_DIP(dip);
4410 }
4411 
4412 /*
4413  * Called when there is no old-style platform power management driver
4414  */
4415 static int
4416 ddi_no_platform_power(power_req_t *req)
4417 {
4418 	_NOTE(ARGUNUSED(req))
4419 	return (DDI_FAILURE);
4420 }
4421 
4422 /*
4423  * This function calls the entry point supplied by the platform-specific
4424  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4425  * The use of global for getting the  function name from platform-specific
4426  * pm driver is not ideal, but it is simple and efficient.
4427  * The previous property lookup was being done in the idle loop on swift
4428  * systems without pmc chips and hurt deskbench performance as well as
4429  * violating scheduler locking rules
4430  */
4431 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4432 
4433 /*
4434  * Old obsolete interface for a device to request a power change (but only
4435  * an increase in power)
4436  */
4437 int
4438 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4439 {
4440 	return (pm_raise_power(dip, cmpt, level));
4441 }
4442 
4443 /*
4444  * The old obsolete interface to platform power management.  Only used by
4445  * Gypsy platform and APM on X86.
4446  */
4447 int
4448 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4449 {
4450 	power_req_t	request;
4451 
4452 	request.request_type = PMR_SET_POWER;
4453 	request.req.set_power_req.who = dip;
4454 	request.req.set_power_req.cmpt = pm_cmpt;
4455 	request.req.set_power_req.level = pm_level;
4456 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4457 }
4458 
4459 /*
4460  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4461  * passed.  Returns true if subsequent processing could result in power being
4462  * removed from the device.  The arg is not currently used because it is
4463  * implicit in the operation of cpr/DR.
4464  */
4465 int
4466 ddi_removing_power(dev_info_t *dip)
4467 {
4468 	_NOTE(ARGUNUSED(dip))
4469 	return (pm_powering_down);
4470 }
4471 
4472 /*
4473  * Returns true if a device indicates that its parent handles suspend/resume
4474  * processing for it.
4475  */
4476 int
4477 e_ddi_parental_suspend_resume(dev_info_t *dip)
4478 {
4479 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4480 }
4481 
4482 /*
4483  * Called for devices which indicate that their parent does suspend/resume
4484  * handling for them
4485  */
4486 int
4487 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4488 {
4489 	power_req_t	request;
4490 	request.request_type = PMR_SUSPEND;
4491 	request.req.suspend_req.who = dip;
4492 	request.req.suspend_req.cmd = cmd;
4493 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4494 }
4495 
4496 /*
4497  * Called for devices which indicate that their parent does suspend/resume
4498  * handling for them
4499  */
4500 int
4501 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4502 {
4503 	power_req_t	request;
4504 	request.request_type = PMR_RESUME;
4505 	request.req.resume_req.who = dip;
4506 	request.req.resume_req.cmd = cmd;
4507 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4508 }
4509 
4510 /*
4511  * Old obsolete exported interface for drivers to create components.
4512  * This is now handled by exporting the pm-components property.
4513  */
4514 int
4515 pm_create_components(dev_info_t *dip, int num_components)
4516 {
4517 	PMD_FUNC(pmf, "pm_create_components")
4518 
4519 	if (num_components < 1)
4520 		return (DDI_FAILURE);
4521 
4522 	if (!DEVI_IS_ATTACHING(dip)) {
4523 		return (DDI_FAILURE);
4524 	}
4525 
4526 	/* don't need to lock dip because attach is single threaded */
4527 	if (DEVI(dip)->devi_pm_components) {
4528 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4529 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4530 		return (DDI_FAILURE);
4531 	}
4532 	e_pm_create_components(dip, num_components);
4533 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4534 	e_pm_default_components(dip, num_components);
4535 	return (DDI_SUCCESS);
4536 }
4537 
4538 /*
4539  * Obsolete interface previously called by drivers to destroy their components
4540  * at detach time.  This is now done automatically.  However, we need to keep
4541  * this for the old drivers.
4542  */
4543 void
4544 pm_destroy_components(dev_info_t *dip)
4545 {
4546 	PMD_FUNC(pmf, "pm_destroy_components")
4547 	dev_info_t *pdip = ddi_get_parent(dip);
4548 
4549 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4550 	    PM_DEVICE(dip)))
4551 	ASSERT(DEVI_IS_DETACHING(dip));
4552 #ifdef DEBUG
4553 	if (!PM_ISBC(dip))
4554 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4555 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4556 		    PM_ADDR(dip));
4557 #endif
4558 	/*
4559 	 * We ignore this unless this is an old-style driver, except for
4560 	 * printing the message above
4561 	 */
4562 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4563 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4564 		    PM_DEVICE(dip)))
4565 		return;
4566 	}
4567 	ASSERT(PM_GET_PM_INFO(dip));
4568 
4569 	/*
4570 	 * pm_unmanage will clear info pointer later, after dealing with
4571 	 * dependencies
4572 	 */
4573 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4574 	/*
4575 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4576 	 * Parents that get notification are not adjusted because their
4577 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4578 	 */
4579 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4580 		pm_rele_power(pdip);
4581 #ifdef DEBUG
4582 	else {
4583 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4584 		    pmf, PM_DEVICE(dip)))
4585 	}
4586 #endif
4587 	e_pm_destroy_components(dip);
4588 	/*
4589 	 * Forget we ever knew anything about the components of this  device
4590 	 */
4591 	DEVI(dip)->devi_pm_flags &=
4592 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4593 }
4594 
4595 /*
4596  * Exported interface for a driver to set a component busy.
4597  */
4598 int
4599 pm_busy_component(dev_info_t *dip, int cmpt)
4600 {
4601 	struct pm_component *cp;
4602 
4603 	ASSERT(dip != NULL);
4604 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4605 		return (DDI_FAILURE);
4606 	PM_LOCK_BUSY(dip);
4607 	cp->pmc_busycount++;
4608 	cp->pmc_timestamp = 0;
4609 	PM_UNLOCK_BUSY(dip);
4610 	return (DDI_SUCCESS);
4611 }
4612 
4613 /*
4614  * Exported interface for a driver to set a component idle.
4615  */
4616 int
4617 pm_idle_component(dev_info_t *dip, int cmpt)
4618 {
4619 	PMD_FUNC(pmf, "pm_idle_component")
4620 	struct pm_component *cp;
4621 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4622 
4623 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4624 		return (DDI_FAILURE);
4625 
4626 	PM_LOCK_BUSY(dip);
4627 	if (cp->pmc_busycount) {
4628 		if (--(cp->pmc_busycount) == 0)
4629 			cp->pmc_timestamp = gethrestime_sec();
4630 	} else {
4631 		cp->pmc_timestamp = gethrestime_sec();
4632 	}
4633 
4634 	PM_UNLOCK_BUSY(dip);
4635 
4636 	/*
4637 	 * if device becomes idle during idle down period, try scan it down
4638 	 */
4639 	if (scanp && PM_IS_PID(dip)) {
4640 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4641 		    PM_DEVICE(dip)))
4642 		pm_rescan(dip);
4643 		return (DDI_SUCCESS);
4644 	}
4645 
4646 	/*
4647 	 * handle scan not running with nexus threshold == 0
4648 	 */
4649 
4650 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4651 		pm_rescan(dip);
4652 	}
4653 
4654 	return (DDI_SUCCESS);
4655 }
4656 
4657 /*
4658  * This is the old  obsolete interface called by drivers to set their normal
4659  * power.  Thus we can't fix its behavior or return a value.
4660  * This functionality is replaced by the pm-component property.
4661  * We'll only get components destroyed while no power management is
4662  * going on (and the device is detached), so we don't need a mutex here
4663  */
4664 void
4665 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4666 {
4667 	PMD_FUNC(pmf, "set_normal_power")
4668 #ifdef DEBUG
4669 	if (!PM_ISBC(dip))
4670 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4671 		    "(driver exporting pm-components property) ignored",
4672 		    PM_NAME(dip), PM_ADDR(dip));
4673 #endif
4674 	if (PM_ISBC(dip)) {
4675 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4676 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4677 		e_pm_set_max_power(dip, comp, level);
4678 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4679 	}
4680 }
4681 
4682 /*
4683  * Called on a successfully detached driver to free pm resources
4684  */
4685 static void
4686 pm_stop(dev_info_t *dip)
4687 {
4688 	PMD_FUNC(pmf, "stop")
4689 	dev_info_t *pdip = ddi_get_parent(dip);
4690 
4691 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4692 	/* stopping scan, destroy scan data structure */
4693 	if (!PM_ISBC(dip)) {
4694 		pm_scan_stop(dip);
4695 		pm_scan_fini(dip);
4696 	}
4697 
4698 	if (PM_GET_PM_INFO(dip) != NULL) {
4699 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4700 			/*
4701 			 * Old style driver may have called
4702 			 * pm_destroy_components already, but just in case ...
4703 			 */
4704 			e_pm_destroy_components(dip);
4705 		} else {
4706 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4707 			    pmf, PM_DEVICE(dip)))
4708 		}
4709 	} else {
4710 		if (PM_NUMCMPTS(dip))
4711 			e_pm_destroy_components(dip);
4712 		else {
4713 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4714 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4715 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4716 					pm_rele_power(pdip);
4717 				} else if (pdip &&
4718 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4719 					(void) mdi_power(pdip,
4720 					    MDI_PM_RELE_POWER,
4721 					    (void *)dip, NULL, 0);
4722 				}
4723 			}
4724 		}
4725 	}
4726 }
4727 
4728 /*
4729  * The node is the subject of a reparse pm props ioctl. Throw away the old
4730  * info and start over.
4731  */
4732 int
4733 e_new_pm_props(dev_info_t *dip)
4734 {
4735 	if (PM_GET_PM_INFO(dip) != NULL) {
4736 		pm_stop(dip);
4737 
4738 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4739 			return (DDI_FAILURE);
4740 		}
4741 	}
4742 	e_pm_props(dip);
4743 	return (DDI_SUCCESS);
4744 }
4745 
4746 /*
4747  * Device has been attached, so process its pm properties
4748  */
4749 void
4750 e_pm_props(dev_info_t *dip)
4751 {
4752 	char *pp;
4753 	int len;
4754 	int flags = 0;
4755 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4756 
4757 	/*
4758 	 * It doesn't matter if we do this more than once, we should always
4759 	 * get the same answers, and if not, then the last one in is the
4760 	 * best one.
4761 	 */
4762 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4763 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4764 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4765 			flags = PMC_NEEDS_SR;
4766 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4767 			flags = PMC_NO_SR;
4768 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4769 			flags = PMC_PARENTAL_SR;
4770 		} else {
4771 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4772 			    "%s property value '%s'", PM_NAME(dip),
4773 			    PM_ADDR(dip), "pm-hardware-state", pp);
4774 		}
4775 		kmem_free(pp, len);
4776 	}
4777 	/*
4778 	 * This next segment (PMC_WANTS_NOTIFY) is in
4779 	 * support of nexus drivers which will want to be involved in
4780 	 * (or at least notified of) their child node's power level transitions.
4781 	 * "pm-want-child-notification?" is defined by the parent.
4782 	 */
4783 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4784 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4785 		flags |= PMC_WANTS_NOTIFY;
4786 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4787 	    dip, propflag, "pm-want-child-notification?"));
4788 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4789 	    "no-involuntary-power-cycles"))
4790 		flags |= PMC_NO_INVOL;
4791 	/*
4792 	 * Is the device a CPU device?
4793 	 */
4794 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4795 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4796 		if (strcmp(pp, "CPU") == 0) {
4797 			flags |= PMC_CPU_DEVICE;
4798 		} else {
4799 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4800 			    "%s property value '%s'", PM_NAME(dip),
4801 			    PM_ADDR(dip), "pm-class", pp);
4802 		}
4803 		kmem_free(pp, len);
4804 	}
4805 	/* devfs single threads us */
4806 	DEVI(dip)->devi_pm_flags |= flags;
4807 }
4808 
4809 /*
4810  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4811  * driver which has claimed a node.
4812  * Sets old_power in arg struct.
4813  */
4814 static int
4815 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4816     ddi_ctl_enum_t ctlop, void *arg, void *result)
4817 {
4818 	_NOTE(ARGUNUSED(dip))
4819 	PMD_FUNC(pmf, "ctlops")
4820 	power_req_t *reqp = (power_req_t *)arg;
4821 	int retval;
4822 	dev_info_t *target_dip;
4823 	int new_level, old_level, cmpt;
4824 #ifdef PMDDEBUG
4825 	char *format;
4826 #endif
4827 
4828 	/*
4829 	 * The interface for doing the actual power level changes is now
4830 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4831 	 * different platform-specific power control drivers.
4832 	 *
4833 	 * This driver implements the "default" version of this interface.
4834 	 * If no ppm driver has been installed then this interface is called
4835 	 * instead.
4836 	 */
4837 	ASSERT(dip == NULL);
4838 	switch (ctlop) {
4839 	case DDI_CTLOPS_POWER:
4840 		switch (reqp->request_type) {
4841 		case PMR_PPM_SET_POWER:
4842 		{
4843 			target_dip = reqp->req.ppm_set_power_req.who;
4844 			ASSERT(target_dip == rdip);
4845 			new_level = reqp->req.ppm_set_power_req.new_level;
4846 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4847 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4848 			old_level = PM_CURPOWER(target_dip, cmpt);
4849 			reqp->req.ppm_set_power_req.old_level = old_level;
4850 			retval = pm_power(target_dip, cmpt, new_level);
4851 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4852 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4853 			    old_level, new_level, (retval == DDI_SUCCESS ?
4854 			    "chd" : "no chg")))
4855 			return (retval);
4856 		}
4857 
4858 		case PMR_PPM_PRE_DETACH:
4859 		case PMR_PPM_POST_DETACH:
4860 		case PMR_PPM_PRE_ATTACH:
4861 		case PMR_PPM_POST_ATTACH:
4862 		case PMR_PPM_PRE_PROBE:
4863 		case PMR_PPM_POST_PROBE:
4864 		case PMR_PPM_PRE_RESUME:
4865 		case PMR_PPM_INIT_CHILD:
4866 		case PMR_PPM_UNINIT_CHILD:
4867 #ifdef PMDDEBUG
4868 			switch (reqp->request_type) {
4869 				case PMR_PPM_PRE_DETACH:
4870 					format = "%s: PMR_PPM_PRE_DETACH "
4871 					    "%s@%s(%s#%d)\n";
4872 					break;
4873 				case PMR_PPM_POST_DETACH:
4874 					format = "%s: PMR_PPM_POST_DETACH "
4875 					    "%s@%s(%s#%d) rets %d\n";
4876 					break;
4877 				case PMR_PPM_PRE_ATTACH:
4878 					format = "%s: PMR_PPM_PRE_ATTACH "
4879 					    "%s@%s(%s#%d)\n";
4880 					break;
4881 				case PMR_PPM_POST_ATTACH:
4882 					format = "%s: PMR_PPM_POST_ATTACH "
4883 					    "%s@%s(%s#%d) rets %d\n";
4884 					break;
4885 				case PMR_PPM_PRE_PROBE:
4886 					format = "%s: PMR_PPM_PRE_PROBE "
4887 					    "%s@%s(%s#%d)\n";
4888 					break;
4889 				case PMR_PPM_POST_PROBE:
4890 					format = "%s: PMR_PPM_POST_PROBE "
4891 					    "%s@%s(%s#%d) rets %d\n";
4892 					break;
4893 				case PMR_PPM_PRE_RESUME:
4894 					format = "%s: PMR_PPM_PRE_RESUME "
4895 					    "%s@%s(%s#%d) rets %d\n";
4896 					break;
4897 				case PMR_PPM_INIT_CHILD:
4898 					format = "%s: PMR_PPM_INIT_CHILD "
4899 					    "%s@%s(%s#%d)\n";
4900 					break;
4901 				case PMR_PPM_UNINIT_CHILD:
4902 					format = "%s: PMR_PPM_UNINIT_CHILD "
4903 					    "%s@%s(%s#%d)\n";
4904 					break;
4905 				default:
4906 					break;
4907 			}
4908 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4909 			    reqp->req.ppm_config_req.result))
4910 #endif
4911 			return (DDI_SUCCESS);
4912 
4913 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4914 			/*
4915 			 * Nothing for us to do
4916 			 */
4917 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4918 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4919 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4920 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4921 			    reqp->req.ppm_notify_level_req.cmpt,
4922 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4923 			    reqp->req.ppm_notify_level_req.cmpt),
4924 			    reqp->req.ppm_notify_level_req.new_level))
4925 			return (DDI_SUCCESS);
4926 
4927 		case PMR_PPM_UNMANAGE:
4928 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4929 			    pmf, PM_DEVICE(rdip)))
4930 			return (DDI_SUCCESS);
4931 
4932 		case PMR_PPM_LOCK_POWER:
4933 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4934 			    reqp->req.ppm_lock_power_req.circp);
4935 			return (DDI_SUCCESS);
4936 
4937 		case PMR_PPM_UNLOCK_POWER:
4938 			pm_unlock_power_single(
4939 			    reqp->req.ppm_unlock_power_req.who,
4940 			    reqp->req.ppm_unlock_power_req.circ);
4941 			return (DDI_SUCCESS);
4942 
4943 		case PMR_PPM_TRY_LOCK_POWER:
4944 			*(int *)result = pm_try_locking_power_single(
4945 			    reqp->req.ppm_lock_power_req.who,
4946 			    reqp->req.ppm_lock_power_req.circp);
4947 			return (DDI_SUCCESS);
4948 
4949 		case PMR_PPM_POWER_LOCK_OWNER:
4950 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4951 			ASSERT(target_dip == rdip);
4952 			reqp->req.ppm_power_lock_owner_req.owner =
4953 			    DEVI(rdip)->devi_busy_thread;
4954 			return (DDI_SUCCESS);
4955 		default:
4956 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4957 			return (DDI_FAILURE);
4958 		}
4959 
4960 	default:
4961 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4962 		return (DDI_FAILURE);
4963 	}
4964 }
4965 
4966 /*
4967  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4968  * power_ops struct for this functionality instead?
4969  * However, we only ever do this on a ppm driver.
4970  */
4971 int
4972 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4973 {
4974 	int (*fp)();
4975 
4976 	/* if no ppm handler, call the default routine */
4977 	if (d == NULL) {
4978 		return (pm_default_ctlops(d, r, op, a, v));
4979 	}
4980 	if (!d || !r)
4981 		return (DDI_FAILURE);
4982 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4983 	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4984 
4985 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4986 	return ((*fp)(d, r, op, a, v));
4987 }
4988 
4989 /*
4990  * Called on a node when attach completes or the driver makes its first pm
4991  * call (whichever comes first).
4992  * In the attach case, device may not be power manageable at all.
4993  * Don't need to lock the dip because we're single threaded by the devfs code
4994  */
4995 static int
4996 pm_start(dev_info_t *dip)
4997 {
4998 	PMD_FUNC(pmf, "start")
4999 	int ret;
5000 	dev_info_t *pdip = ddi_get_parent(dip);
5001 	int e_pm_manage(dev_info_t *, int);
5002 	void pm_noinvol_specd(dev_info_t *dip);
5003 
5004 	e_pm_props(dip);
5005 	pm_noinvol_specd(dip);
5006 	/*
5007 	 * If this dip has already been processed, don't mess with it
5008 	 * (but decrement the speculative count we did above, as whatever
5009 	 * code put it under pm already will have dealt with it)
5010 	 */
5011 	if (PM_GET_PM_INFO(dip)) {
5012 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
5013 		    pmf, PM_DEVICE(dip)))
5014 		return (0);
5015 	}
5016 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
5017 
5018 	if (PM_GET_PM_INFO(dip) == NULL) {
5019 		/*
5020 		 * keep the kidsupcount increment as is
5021 		 */
5022 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
5023 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
5024 			pm_hold_power(pdip);
5025 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
5026 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
5027 			    (void *)dip, NULL, 0);
5028 		}
5029 
5030 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
5031 		    "left up\n", pmf, PM_DEVICE(dip)))
5032 	}
5033 
5034 	return (ret);
5035 }
5036 
5037 /*
5038  * Keep a list of recorded thresholds.  For now we just keep a list and
5039  * search it linearly.  We don't expect too many entries.  Can always hash it
5040  * later if we need to.
5041  */
5042 void
5043 pm_record_thresh(pm_thresh_rec_t *rp)
5044 {
5045 	pm_thresh_rec_t *pptr, *ptr;
5046 
5047 	ASSERT(*rp->ptr_physpath);
5048 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
5049 	for (pptr = NULL, ptr = pm_thresh_head;
5050 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
5051 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
5052 			/* replace this one */
5053 			rp->ptr_next = ptr->ptr_next;
5054 			if (pptr) {
5055 				pptr->ptr_next = rp;
5056 			} else {
5057 				pm_thresh_head = rp;
5058 			}
5059 			rw_exit(&pm_thresh_rwlock);
5060 			kmem_free(ptr, ptr->ptr_size);
5061 			return;
5062 		}
5063 		continue;
5064 	}
5065 	/*
5066 	 * There was not a match in the list, insert this one in front
5067 	 */
5068 	if (pm_thresh_head) {
5069 		rp->ptr_next = pm_thresh_head;
5070 		pm_thresh_head = rp;
5071 	} else {
5072 		rp->ptr_next = NULL;
5073 		pm_thresh_head = rp;
5074 	}
5075 	rw_exit(&pm_thresh_rwlock);
5076 }
5077 
5078 /*
5079  * Create a new dependency record and hang a new dependency entry off of it
5080  */
5081 pm_pdr_t *
5082 newpdr(char *kept, char *keeps, int isprop)
5083 {
5084 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5085 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5086 	p->pdr_size = size;
5087 	p->pdr_isprop = isprop;
5088 	p->pdr_kept_paths = NULL;
5089 	p->pdr_kept_count = 0;
5090 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5091 	(void) strcpy(p->pdr_kept, kept);
5092 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5093 	(void) strcpy(p->pdr_keeper, keeps);
5094 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5095 	    (intptr_t)p + size);
5096 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5097 	    (intptr_t)p + size);
5098 	return (p);
5099 }
5100 
5101 /*
5102  * Keep a list of recorded dependencies.  We only keep the
5103  * keeper -> kept list for simplification. At this point We do not
5104  * care about whether the devices are attached or not yet,
5105  * this would be done in pm_keeper() and pm_kept().
5106  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5107  * and it is up to the user to issue the ioctl again if they want it
5108  * (e.g. pmconfig)
5109  * Returns true if dependency already exists in the list.
5110  */
5111 int
5112 pm_record_keeper(char *kept, char *keeper, int isprop)
5113 {
5114 	PMD_FUNC(pmf, "record_keeper")
5115 	pm_pdr_t *npdr, *ppdr, *pdr;
5116 
5117 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5118 	ASSERT(kept && keeper);
5119 #ifdef DEBUG
5120 	if (pm_debug & PMD_KEEPS)
5121 		prdeps("pm_record_keeper entry");
5122 #endif
5123 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5124 	    ppdr = pdr, pdr = pdr->pdr_next) {
5125 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5126 		    pdr->pdr_keeper))
5127 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5128 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5129 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5130 			return (1);
5131 		}
5132 	}
5133 	/*
5134 	 * We did not find any match, so we have to make an entry
5135 	 */
5136 	npdr = newpdr(kept, keeper, isprop);
5137 	if (ppdr) {
5138 		ASSERT(ppdr->pdr_next == NULL);
5139 		ppdr->pdr_next = npdr;
5140 	} else {
5141 		ASSERT(pm_dep_head == NULL);
5142 		pm_dep_head = npdr;
5143 	}
5144 #ifdef DEBUG
5145 	if (pm_debug & PMD_KEEPS)
5146 		prdeps("pm_record_keeper after new record");
5147 #endif
5148 	if (!isprop)
5149 		pm_unresolved_deps++;
5150 	else
5151 		pm_prop_deps++;
5152 	return (0);
5153 }
5154 
5155 /*
5156  * Look up this device in the set of devices we've seen ioctls for
5157  * to see if we are holding a threshold spec for it.  If so, make it so.
5158  * At ioctl time, we were given the physical path of the device.
5159  */
5160 int
5161 pm_thresh_specd(dev_info_t *dip)
5162 {
5163 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5164 	char *path = 0;
5165 	char pathbuf[MAXNAMELEN];
5166 	pm_thresh_rec_t *rp;
5167 
5168 	path = ddi_pathname(dip, pathbuf);
5169 
5170 	rw_enter(&pm_thresh_rwlock, RW_READER);
5171 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5172 		if (strcmp(rp->ptr_physpath, path) != 0)
5173 			continue;
5174 		pm_apply_recorded_thresh(dip, rp);
5175 		rw_exit(&pm_thresh_rwlock);
5176 		return (1);
5177 	}
5178 	rw_exit(&pm_thresh_rwlock);
5179 	return (0);
5180 }
5181 
5182 static int
5183 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5184 {
5185 	PMD_FUNC(pmf, "set_keeping")
5186 	pm_info_t *kept_info;
5187 	int j, up = 0, circ;
5188 	void prdeps(char *);
5189 
5190 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5191 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5192 #ifdef DEBUG
5193 	if (pm_debug & PMD_KEEPS)
5194 		prdeps("Before PAD\n");
5195 #endif
5196 	ASSERT(keeper != kept);
5197 	if (PM_GET_PM_INFO(keeper) == NULL) {
5198 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5199 		    "%s@%s(%s#%d), but the latter is not power managed",
5200 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5201 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5202 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5203 		return (0);
5204 	}
5205 	kept_info = PM_GET_PM_INFO(kept);
5206 	ASSERT(kept_info);
5207 	PM_LOCK_POWER(keeper, &circ);
5208 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5209 		if (PM_CURPOWER(keeper, j)) {
5210 			up++;
5211 			break;
5212 		}
5213 	}
5214 	if (up) {
5215 		/* Bringup and maintain a hold on the kept */
5216 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5217 		    PM_DEVICE(kept)))
5218 		bring_pmdep_up(kept, 1);
5219 	}
5220 	PM_UNLOCK_POWER(keeper, circ);
5221 #ifdef DEBUG
5222 	if (pm_debug & PMD_KEEPS)
5223 		prdeps("After PAD\n");
5224 #endif
5225 	return (1);
5226 }
5227 
5228 /*
5229  * Should this device keep up another device?
5230  * Look up this device in the set of devices we've seen ioctls for
5231  * to see if we are holding a dependency spec for it.  If so, make it so.
5232  * Because we require the kept device to be attached already in order to
5233  * make the list entry (and hold it), we only need to look for keepers.
5234  * At ioctl time, we were given the physical path of the device.
5235  */
5236 int
5237 pm_keeper(char *keeper)
5238 {
5239 	PMD_FUNC(pmf, "keeper")
5240 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5241 	dev_info_t *dip;
5242 	pm_pdr_t *dp;
5243 	dev_info_t *kept = NULL;
5244 	int ret = 0;
5245 	int i;
5246 
5247 	if (!pm_unresolved_deps && !pm_prop_deps)
5248 		return (0);
5249 	ASSERT(keeper != NULL);
5250 	dip = pm_name_to_dip(keeper, 1);
5251 	if (dip == NULL)
5252 		return (0);
5253 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5254 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5255 		if (!dp->pdr_isprop) {
5256 			if (!pm_unresolved_deps)
5257 				continue;
5258 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5259 			if (dp->pdr_satisfied) {
5260 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5261 				continue;
5262 			}
5263 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5264 				ret += pm_apply_recorded_dep(dip, dp);
5265 			}
5266 		} else {
5267 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5268 				continue;
5269 			for (i = 0; i < dp->pdr_kept_count; i++) {
5270 				if (dp->pdr_kept_paths[i] == NULL)
5271 					continue;
5272 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5273 				if (kept == NULL)
5274 					continue;
5275 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5276 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5277 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5278 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5279 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5280 				    dp->pdr_kept_count))
5281 				if (kept != dip) {
5282 					ret += pm_set_keeping(dip, kept);
5283 				}
5284 				ddi_release_devi(kept);
5285 			}
5286 
5287 		}
5288 	}
5289 	ddi_release_devi(dip);
5290 	return (ret);
5291 }
5292 
5293 /*
5294  * Should this device be kept up by another device?
5295  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5296  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5297  * kept device lists.
5298  */
5299 static int
5300 pm_kept(char *keptp)
5301 {
5302 	PMD_FUNC(pmf, "kept")
5303 	pm_pdr_t *dp;
5304 	int found = 0;
5305 	int ret = 0;
5306 	dev_info_t *keeper;
5307 	dev_info_t *kept;
5308 	size_t length;
5309 	int i;
5310 	char **paths;
5311 	char *path;
5312 
5313 	ASSERT(keptp != NULL);
5314 	kept = pm_name_to_dip(keptp, 1);
5315 	if (kept == NULL)
5316 		return (0);
5317 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5318 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5319 		if (dp->pdr_isprop) {
5320 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5321 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5322 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5323 				/*
5324 				 * Dont allow self dependency.
5325 				 */
5326 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5327 					continue;
5328 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5329 				if (keeper == NULL)
5330 					continue;
5331 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5332 				    "%p\n", pmf, (void *)kept))
5333 #ifdef DEBUG
5334 				if (pm_debug & PMD_DEP)
5335 					prdeps("Before Adding from pm_kept\n");
5336 #endif
5337 				/*
5338 				 * Add ourselves to the dip list.
5339 				 */
5340 				if (dp->pdr_kept_count == 0) {
5341 					length = strlen(keptp) + 1;
5342 					path =
5343 					    kmem_alloc(length, KM_SLEEP);
5344 					paths = kmem_alloc(sizeof (char **),
5345 					    KM_SLEEP);
5346 					(void) strcpy(path, keptp);
5347 					paths[0] = path;
5348 					dp->pdr_kept_paths = paths;
5349 					dp->pdr_kept_count++;
5350 				} else {
5351 					/* Check to see if already on list */
5352 					for (i = 0; i < dp->pdr_kept_count;
5353 					    i++) {
5354 						if (strcmp(keptp,
5355 						    dp->pdr_kept_paths[i])
5356 						    == 0) {
5357 							found++;
5358 							break;
5359 						}
5360 					}
5361 					if (found) {
5362 						ddi_release_devi(keeper);
5363 						continue;
5364 					}
5365 					length = dp->pdr_kept_count *
5366 					    sizeof (char **);
5367 					paths = kmem_alloc(
5368 					    length + sizeof (char **),
5369 					    KM_SLEEP);
5370 					if (dp->pdr_kept_count) {
5371 						bcopy(dp->pdr_kept_paths,
5372 						    paths, length);
5373 						kmem_free(dp->pdr_kept_paths,
5374 						    length);
5375 					}
5376 					dp->pdr_kept_paths = paths;
5377 					length = strlen(keptp) + 1;
5378 					path =
5379 					    kmem_alloc(length, KM_SLEEP);
5380 					(void) strcpy(path, keptp);
5381 					dp->pdr_kept_paths[i] = path;
5382 					dp->pdr_kept_count++;
5383 				}
5384 #ifdef DEBUG
5385 				if (pm_debug & PMD_DEP)
5386 					prdeps("After from pm_kept\n");
5387 #endif
5388 				if (keeper) {
5389 					ret += pm_set_keeping(keeper, kept);
5390 					ddi_release_devi(keeper);
5391 				}
5392 			}
5393 		} else {
5394 			/*
5395 			 * pm_keeper would be called later to do
5396 			 * the actual pm_set_keeping.
5397 			 */
5398 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5399 			    pmf, (void *)kept))
5400 #ifdef DEBUG
5401 			if (pm_debug & PMD_DEP)
5402 				prdeps("Before Adding from pm_kept\n");
5403 #endif
5404 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5405 				if (dp->pdr_kept_paths == NULL) {
5406 					length = strlen(keptp) + 1;
5407 					path =
5408 					    kmem_alloc(length, KM_SLEEP);
5409 					paths = kmem_alloc(sizeof (char **),
5410 					    KM_SLEEP);
5411 					(void) strcpy(path, keptp);
5412 					paths[0] = path;
5413 					dp->pdr_kept_paths = paths;
5414 					dp->pdr_kept_count++;
5415 				}
5416 			}
5417 #ifdef DEBUG
5418 			if (pm_debug & PMD_DEP)
5419 				prdeps("After from pm_kept\n");
5420 #endif
5421 		}
5422 	}
5423 	ddi_release_devi(kept);
5424 	return (ret);
5425 }
5426 
5427 /*
5428  * Apply a recorded dependency.  dp specifies the dependency, and
5429  * keeper is already known to be the device that keeps up the other (kept) one.
5430  * We have to the whole tree for the "kept" device, then apply
5431  * the dependency (which may already be applied).
5432  */
5433 int
5434 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5435 {
5436 	PMD_FUNC(pmf, "apply_recorded_dep")
5437 	dev_info_t *kept = NULL;
5438 	int ret = 0;
5439 	char *keptp = NULL;
5440 
5441 	/*
5442 	 * Device to Device dependency can only be 1 to 1.
5443 	 */
5444 	if (dp->pdr_kept_paths == NULL)
5445 		return (0);
5446 	keptp = dp->pdr_kept_paths[0];
5447 	if (keptp == NULL)
5448 		return (0);
5449 	ASSERT(*keptp != '\0');
5450 	kept = pm_name_to_dip(keptp, 1);
5451 	if (kept == NULL)
5452 		return (0);
5453 	if (kept) {
5454 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5455 		    dp->pdr_keeper, keptp))
5456 		if (pm_set_keeping(keeper, kept)) {
5457 			ASSERT(dp->pdr_satisfied == 0);
5458 			dp->pdr_satisfied = 1;
5459 			ASSERT(pm_unresolved_deps);
5460 			pm_unresolved_deps--;
5461 			ret++;
5462 		}
5463 	}
5464 	ddi_release_devi(kept);
5465 
5466 	return (ret);
5467 }
5468 
5469 /*
5470  * Called from common/io/pm.c
5471  */
5472 int
5473 pm_cur_power(pm_component_t *cp)
5474 {
5475 	return (cur_power(cp));
5476 }
5477 
5478 /*
5479  * External interface to sanity-check a power level.
5480  */
5481 int
5482 pm_valid_power(dev_info_t *dip, int comp, int level)
5483 {
5484 	PMD_FUNC(pmf, "valid_power")
5485 
5486 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5487 		return (e_pm_valid_power(dip, comp, level));
5488 	else {
5489 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5490 		    pmf, comp, PM_NUMCMPTS(dip), level))
5491 		return (0);
5492 	}
5493 }
5494 
5495 /*
5496  * Called when a device that is direct power managed needs to change state.
5497  * This routine arranges to block the request until the process managing
5498  * the device makes the change (or some other incompatible change) or
5499  * the process closes /dev/pm.
5500  */
5501 static int
5502 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5503 {
5504 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5505 	int ret = 0;
5506 	void pm_dequeue_blocked(pm_rsvp_t *);
5507 	void pm_enqueue_blocked(pm_rsvp_t *);
5508 
5509 	ASSERT(!pm_processes_stopped);
5510 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5511 	new->pr_dip = dip;
5512 	new->pr_comp = comp;
5513 	new->pr_newlevel = newpower;
5514 	new->pr_oldlevel = oldpower;
5515 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5516 	mutex_enter(&pm_rsvp_lock);
5517 	pm_enqueue_blocked(new);
5518 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5519 	    PM_CANBLOCK_BLOCK);
5520 	PM_UNLOCK_DIP(dip);
5521 	/*
5522 	 * truss may make the cv_wait_sig return prematurely
5523 	 */
5524 	while (ret == 0) {
5525 		/*
5526 		 * Normally there will be no user context involved, but if
5527 		 * there is (e.g. we are here via an ioctl call to a driver)
5528 		 * then we should allow the process to abort the request,
5529 		 * or we get an unkillable process if the same thread does
5530 		 * PM_DIRECT_PM and pm_raise_power
5531 		 */
5532 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5533 			ret = PMP_FAIL;
5534 		} else {
5535 			ret = new->pr_retval;
5536 		}
5537 	}
5538 	pm_dequeue_blocked(new);
5539 	mutex_exit(&pm_rsvp_lock);
5540 	cv_destroy(&new->pr_cv);
5541 	kmem_free(new, sizeof (*new));
5542 	return (ret);
5543 }
5544 
5545 /*
5546  * Returns true if the process is interested in power level changes (has issued
5547  * PM_GET_STATE_CHANGE ioctl).
5548  */
5549 int
5550 pm_interest_registered(int clone)
5551 {
5552 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5553 	return (pm_interest[clone]);
5554 }
5555 
5556 /*
5557  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5558  * watch all state transitions (dip == NULL).  Set up data
5559  * structs to communicate with process about state changes.
5560  */
5561 void
5562 pm_register_watcher(int clone, dev_info_t *dip)
5563 {
5564 	pscc_t	*p;
5565 	psce_t	*psce;
5566 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5567 
5568 	/*
5569 	 * We definitely need a control struct, then we have to search to see
5570 	 * there is already an entries struct (in the dip != NULL case).
5571 	 */
5572 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5573 	pscc->pscc_clone = clone;
5574 	pscc->pscc_dip = dip;
5575 
5576 	if (dip) {
5577 		int found = 0;
5578 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5579 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5580 			/*
5581 			 * Already an entry for this clone, so just use it
5582 			 * for the new one (for the case where a single
5583 			 * process is watching multiple devices)
5584 			 */
5585 			if (p->pscc_clone == clone) {
5586 				pscc->pscc_entries = p->pscc_entries;
5587 				pscc->pscc_entries->psce_references++;
5588 				found++;
5589 				break;
5590 			}
5591 		}
5592 		if (!found) {		/* create a new one */
5593 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5594 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5595 			psce->psce_first =
5596 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5597 			    KM_SLEEP);
5598 			psce->psce_in = psce->psce_out = psce->psce_first;
5599 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5600 			psce->psce_references = 1;
5601 			pscc->pscc_entries = psce;
5602 		}
5603 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5604 		rw_exit(&pm_pscc_direct_rwlock);
5605 	} else {
5606 		ASSERT(!pm_interest_registered(clone));
5607 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5608 #ifdef DEBUG
5609 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5610 			/*
5611 			 * Should not be an entry for this clone!
5612 			 */
5613 			ASSERT(p->pscc_clone != clone);
5614 		}
5615 #endif
5616 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5617 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5618 		    PSCCOUNT, KM_SLEEP);
5619 		psce->psce_in = psce->psce_out = psce->psce_first;
5620 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5621 		psce->psce_references = 1;
5622 		pscc->pscc_entries = psce;
5623 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5624 		pm_interest[clone] = 1;
5625 		rw_exit(&pm_pscc_interest_rwlock);
5626 	}
5627 }
5628 
5629 /*
5630  * Remove the given entry from the blocked list
5631  */
5632 void
5633 pm_dequeue_blocked(pm_rsvp_t *p)
5634 {
5635 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5636 	if (pm_blocked_list == p) {
5637 		ASSERT(p->pr_prev == NULL);
5638 		if (p->pr_next != NULL)
5639 			p->pr_next->pr_prev = NULL;
5640 		pm_blocked_list = p->pr_next;
5641 	} else {
5642 		ASSERT(p->pr_prev != NULL);
5643 		p->pr_prev->pr_next = p->pr_next;
5644 		if (p->pr_next != NULL)
5645 			p->pr_next->pr_prev = p->pr_prev;
5646 	}
5647 }
5648 
5649 /*
5650  * Remove the given control struct from the given list
5651  */
5652 static void
5653 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5654 {
5655 	if (*list == p) {
5656 		ASSERT(p->pscc_prev == NULL);
5657 		if (p->pscc_next != NULL)
5658 			p->pscc_next->pscc_prev = NULL;
5659 		*list = p->pscc_next;
5660 	} else {
5661 		ASSERT(p->pscc_prev != NULL);
5662 		p->pscc_prev->pscc_next = p->pscc_next;
5663 		if (p->pscc_next != NULL)
5664 			p->pscc_next->pscc_prev = p->pscc_prev;
5665 	}
5666 }
5667 
5668 /*
5669  * Stick the control struct specified on the front of the list
5670  */
5671 static void
5672 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5673 {
5674 	pscc_t *h;	/* entry at head of list */
5675 	if ((h = *list) == NULL) {
5676 		*list = p;
5677 		ASSERT(p->pscc_next == NULL);
5678 		ASSERT(p->pscc_prev == NULL);
5679 	} else {
5680 		p->pscc_next = h;
5681 		ASSERT(h->pscc_prev == NULL);
5682 		h->pscc_prev = p;
5683 		ASSERT(p->pscc_prev == NULL);
5684 		*list = p;
5685 	}
5686 }
5687 
5688 /*
5689  * If dip is NULL, process is closing "clone" clean up all its registrations.
5690  * Otherwise only clean up those for dip because process is just giving up
5691  * control of a direct device.
5692  */
5693 void
5694 pm_deregister_watcher(int clone, dev_info_t *dip)
5695 {
5696 	pscc_t	*p, *pn;
5697 	psce_t	*psce;
5698 	int found = 0;
5699 
5700 	if (dip == NULL) {
5701 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5702 		for (p = pm_pscc_interest; p; p = pn) {
5703 			pn = p->pscc_next;
5704 			if (p->pscc_clone == clone) {
5705 				pm_dequeue_pscc(p, &pm_pscc_interest);
5706 				psce = p->pscc_entries;
5707 				ASSERT(psce->psce_references == 1);
5708 				mutex_destroy(&psce->psce_lock);
5709 				kmem_free(psce->psce_first,
5710 				    sizeof (pm_state_change_t) * PSCCOUNT);
5711 				kmem_free(psce, sizeof (*psce));
5712 				kmem_free(p, sizeof (*p));
5713 			}
5714 		}
5715 		pm_interest[clone] = 0;
5716 		rw_exit(&pm_pscc_interest_rwlock);
5717 	}
5718 	found = 0;
5719 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5720 	for (p = pm_pscc_direct; p; p = pn) {
5721 		pn = p->pscc_next;
5722 		if ((dip && p->pscc_dip == dip) ||
5723 		    (dip == NULL && clone == p->pscc_clone)) {
5724 			ASSERT(clone == p->pscc_clone);
5725 			found++;
5726 			/*
5727 			 * Remove from control list
5728 			 */
5729 			pm_dequeue_pscc(p, &pm_pscc_direct);
5730 			/*
5731 			 * If we're the last reference, free the
5732 			 * entries struct.
5733 			 */
5734 			psce = p->pscc_entries;
5735 			ASSERT(psce);
5736 			if (psce->psce_references == 1) {
5737 				kmem_free(psce->psce_first,
5738 				    PSCCOUNT * sizeof (pm_state_change_t));
5739 				kmem_free(psce, sizeof (*psce));
5740 			} else {
5741 				psce->psce_references--;
5742 			}
5743 			kmem_free(p, sizeof (*p));
5744 		}
5745 	}
5746 	ASSERT(dip == NULL || found);
5747 	rw_exit(&pm_pscc_direct_rwlock);
5748 }
5749 
5750 /*
5751  * Search the indicated list for an entry that matches clone, and return a
5752  * pointer to it.  To be interesting, the entry must have something ready to
5753  * be passed up to the controlling process.
5754  * The returned entry will be locked upon return from this call.
5755  */
5756 static psce_t *
5757 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5758 {
5759 	pscc_t	*p;
5760 	psce_t	*psce;
5761 	rw_enter(lock, RW_READER);
5762 	for (p = *list; p; p = p->pscc_next) {
5763 		if (clone == p->pscc_clone) {
5764 			psce = p->pscc_entries;
5765 			mutex_enter(&psce->psce_lock);
5766 			if (psce->psce_out->size) {
5767 				rw_exit(lock);
5768 				return (psce);
5769 			} else {
5770 				mutex_exit(&psce->psce_lock);
5771 			}
5772 		}
5773 	}
5774 	rw_exit(lock);
5775 	return (NULL);
5776 }
5777 
5778 /*
5779  * Find an entry for a particular clone in the direct list.
5780  */
5781 psce_t *
5782 pm_psc_clone_to_direct(int clone)
5783 {
5784 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5785 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5786 	    &pm_pscc_direct_rwlock));
5787 }
5788 
5789 /*
5790  * Find an entry for a particular clone in the interest list.
5791  */
5792 psce_t *
5793 pm_psc_clone_to_interest(int clone)
5794 {
5795 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5796 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5797 	    &pm_pscc_interest_rwlock));
5798 }
5799 
5800 /*
5801  * Put the given entry at the head of the blocked list
5802  */
5803 void
5804 pm_enqueue_blocked(pm_rsvp_t *p)
5805 {
5806 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5807 	ASSERT(p->pr_next == NULL);
5808 	ASSERT(p->pr_prev == NULL);
5809 	if (pm_blocked_list != NULL) {
5810 		p->pr_next = pm_blocked_list;
5811 		ASSERT(pm_blocked_list->pr_prev == NULL);
5812 		pm_blocked_list->pr_prev = p;
5813 		pm_blocked_list = p;
5814 	} else {
5815 		pm_blocked_list = p;
5816 	}
5817 }
5818 
5819 /*
5820  * Sets every power managed device back to its default threshold
5821  */
5822 void
5823 pm_all_to_default_thresholds(void)
5824 {
5825 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5826 	    (void *) &pm_system_idle_threshold);
5827 }
5828 
5829 static int
5830 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5831 {
5832 	int thr = (int)(*(int *)arg);
5833 
5834 	if (!PM_GET_PM_INFO(dip))
5835 		return (DDI_WALK_CONTINUE);
5836 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5837 	return (DDI_WALK_CONTINUE);
5838 }
5839 
5840 /*
5841  * Returns the current threshold value (in seconds) for the indicated component
5842  */
5843 int
5844 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5845 {
5846 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5847 		return (DDI_FAILURE);
5848 	} else {
5849 		*threshp = cur_threshold(dip, comp);
5850 		return (DDI_SUCCESS);
5851 	}
5852 }
5853 
5854 /*
5855  * To be called when changing the power level of a component of a device.
5856  * On some platforms, changing power on one device may require that power
5857  * be changed on other, related devices in the same transaction.  Thus, we
5858  * always pass this request to the platform power manager so that all the
5859  * affected devices will be locked.
5860  */
5861 void
5862 pm_lock_power(dev_info_t *dip, int *circp)
5863 {
5864 	power_req_t power_req;
5865 	int result;
5866 
5867 	power_req.request_type = PMR_PPM_LOCK_POWER;
5868 	power_req.req.ppm_lock_power_req.who = dip;
5869 	power_req.req.ppm_lock_power_req.circp = circp;
5870 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5871 }
5872 
5873 /*
5874  * Release the lock (or locks) acquired to change the power of a device.
5875  * See comments for pm_lock_power.
5876  */
5877 void
5878 pm_unlock_power(dev_info_t *dip, int circ)
5879 {
5880 	power_req_t power_req;
5881 	int result;
5882 
5883 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5884 	power_req.req.ppm_unlock_power_req.who = dip;
5885 	power_req.req.ppm_unlock_power_req.circ = circ;
5886 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5887 }
5888 
5889 
5890 /*
5891  * Attempt (without blocking) to acquire the lock(s) needed to change the
5892  * power of a component of a device.  See comments for pm_lock_power.
5893  *
5894  * Return: 1 if lock(s) acquired, 0 if not.
5895  */
5896 int
5897 pm_try_locking_power(dev_info_t *dip, int *circp)
5898 {
5899 	power_req_t power_req;
5900 	int result;
5901 
5902 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5903 	power_req.req.ppm_lock_power_req.who = dip;
5904 	power_req.req.ppm_lock_power_req.circp = circp;
5905 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5906 	return (result);
5907 }
5908 
5909 
5910 /*
5911  * Lock power state of a device.
5912  *
5913  * The implementation handles a special case where another thread may have
5914  * acquired the lock and created/launched this thread to do the work.  If
5915  * the lock cannot be acquired immediately, we check to see if this thread
5916  * is registered as a borrower of the lock.  If so, we may proceed without
5917  * the lock.  This assumes that the lending thread blocks on the completion
5918  * of this thread.
5919  *
5920  * Note 1: for use by ppm only.
5921  *
5922  * Note 2: On failing to get the lock immediately, we search lock_loan list
5923  * for curthread (as borrower of the lock).  On a hit, we check that the
5924  * lending thread already owns the lock we want.  It is safe to compare
5925  * devi_busy_thread and thread id of the lender because in the == case (the
5926  * only one we care about) we know that the owner is blocked.  Similarly,
5927  * If we find that curthread isn't registered as a lock borrower, it is safe
5928  * to use the blocking call (ndi_devi_enter) because we know that if we
5929  * weren't already listed as a borrower (upstream on the call stack) we won't
5930  * become one.
5931  */
5932 void
5933 pm_lock_power_single(dev_info_t *dip, int *circp)
5934 {
5935 	lock_loan_t *cur;
5936 
5937 	/* if the lock is available, we are done. */
5938 	if (ndi_devi_tryenter(dip, circp))
5939 		return;
5940 
5941 	mutex_enter(&pm_loan_lock);
5942 	/* see if our thread is registered as a lock borrower. */
5943 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5944 		if (cur->pmlk_borrower == curthread)
5945 			break;
5946 	mutex_exit(&pm_loan_lock);
5947 
5948 	/* if this thread not already registered, it is safe to block */
5949 	if (cur == NULL)
5950 		ndi_devi_enter(dip, circp);
5951 	else {
5952 		/* registered: does lender own the lock we want? */
5953 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5954 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5955 			cur->pmlk_dip = dip;
5956 		} else /* no: just block for it */
5957 			ndi_devi_enter(dip, circp);
5958 
5959 	}
5960 }
5961 
5962 /*
5963  * Drop the lock on the device's power state.  See comment for
5964  * pm_lock_power_single() for special implementation considerations.
5965  *
5966  * Note: for use by ppm only.
5967  */
5968 void
5969 pm_unlock_power_single(dev_info_t *dip, int circ)
5970 {
5971 	lock_loan_t *cur;
5972 
5973 	/* optimization: mutex not needed to check empty list */
5974 	if (lock_loan_head.pmlk_next == NULL) {
5975 		ndi_devi_exit(dip, circ);
5976 		return;
5977 	}
5978 
5979 	mutex_enter(&pm_loan_lock);
5980 	/* see if our thread is registered as a lock borrower. */
5981 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5982 		if (cur->pmlk_borrower == curthread)
5983 			break;
5984 	mutex_exit(&pm_loan_lock);
5985 
5986 	if (cur == NULL || cur->pmlk_dip != dip)
5987 		/* we acquired the lock directly, so return it */
5988 		ndi_devi_exit(dip, circ);
5989 }
5990 
5991 /*
5992  * Try to take the lock for changing the power level of a component.
5993  *
5994  * Note: for use by ppm only.
5995  */
5996 int
5997 pm_try_locking_power_single(dev_info_t *dip, int *circp)
5998 {
5999 	return (ndi_devi_tryenter(dip, circp));
6000 }
6001 
6002 #ifdef	DEBUG
6003 /*
6004  * The following are used only to print out data structures for debugging
6005  */
6006 void
6007 prdeps(char *msg)
6008 {
6009 
6010 	pm_pdr_t *rp;
6011 	int i;
6012 
6013 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
6014 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
6015 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
6016 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
6017 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
6018 		    (void *)rp->pdr_next);
6019 		if (rp->pdr_kept_count != 0) {
6020 			pm_log("kept list = ");
6021 			i = 0;
6022 			while (i < rp->pdr_kept_count) {
6023 				pm_log("%s ", rp->pdr_kept_paths[i]);
6024 				i++;
6025 			}
6026 			pm_log("\n");
6027 		}
6028 	}
6029 }
6030 
6031 void
6032 pr_noinvol(char *hdr)
6033 {
6034 	pm_noinvol_t *ip;
6035 
6036 	pm_log("%s\n", hdr);
6037 	rw_enter(&pm_noinvol_rwlock, RW_READER);
6038 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
6039 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
6040 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
6041 	rw_exit(&pm_noinvol_rwlock);
6042 }
6043 #endif
6044 
6045 /*
6046  * Attempt to apply the thresholds indicated by rp to the node specified by
6047  * dip.
6048  */
6049 void
6050 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6051 {
6052 	PMD_FUNC(pmf, "apply_recorded_thresh")
6053 	int i, j;
6054 	int comps = PM_NUMCMPTS(dip);
6055 	struct pm_component *cp;
6056 	pm_pte_t *ep;
6057 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
6058 
6059 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
6060 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
6061 	PM_LOCK_DIP(dip);
6062 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
6063 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
6064 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
6065 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
6066 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
6067 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
6068 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
6069 		PM_UNLOCK_DIP(dip);
6070 		return;
6071 	}
6072 
6073 	ep = rp->ptr_entries;
6074 	/*
6075 	 * Here we do the special case of a device threshold
6076 	 */
6077 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6078 		ASSERT(ep && ep->pte_numthresh == 1);
6079 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6080 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6081 		PM_UNLOCK_DIP(dip);
6082 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6083 		if (PM_SCANABLE(dip))
6084 			pm_rescan(dip);
6085 		return;
6086 	}
6087 	for (i = 0; i < comps; i++) {
6088 		cp = PM_CP(dip, i);
6089 		for (j = 0; j < ep->pte_numthresh; j++) {
6090 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6091 			    "to %x\n", pmf, j, PM_DEVICE(dip),
6092 			    i, ep->pte_thresh[j]))
6093 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6094 		}
6095 		ep++;
6096 	}
6097 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6098 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6099 	PM_UNLOCK_DIP(dip);
6100 
6101 	if (PM_SCANABLE(dip))
6102 		pm_rescan(dip);
6103 }
6104 
6105 /*
6106  * Returns true if the threshold specified by rp could be applied to dip
6107  * (that is, the number of components and transitions are the same)
6108  */
6109 int
6110 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6111 {
6112 	PMD_FUNC(pmf, "valid_thresh")
6113 	int comps, i;
6114 	pm_component_t *cp;
6115 	pm_pte_t *ep;
6116 
6117 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6118 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6119 		    rp->ptr_physpath))
6120 		return (0);
6121 	}
6122 	/*
6123 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6124 	 * an entry with numcomps == 0, (since we don't know how many
6125 	 * components there are in advance).  This is always a valid
6126 	 * spec.
6127 	 */
6128 	if (rp->ptr_numcomps == 0) {
6129 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6130 		return (1);
6131 	}
6132 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6133 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6134 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6135 		return (0);
6136 	}
6137 	ep = rp->ptr_entries;
6138 	for (i = 0; i < comps; i++) {
6139 		cp = PM_CP(dip, i);
6140 		if ((ep + i)->pte_numthresh !=
6141 		    cp->pmc_comp.pmc_numlevels - 1) {
6142 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6143 			    pmf, rp->ptr_physpath, i,
6144 			    cp->pmc_comp.pmc_numlevels - 1,
6145 			    (ep + i)->pte_numthresh))
6146 			return (0);
6147 		}
6148 	}
6149 	return (1);
6150 }
6151 
6152 /*
6153  * Remove any recorded threshold for device physpath
6154  * We know there will be at most one.
6155  */
6156 void
6157 pm_unrecord_threshold(char *physpath)
6158 {
6159 	pm_thresh_rec_t *pptr, *ptr;
6160 
6161 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6162 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6163 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6164 			if (pptr) {
6165 				pptr->ptr_next = ptr->ptr_next;
6166 			} else {
6167 				ASSERT(pm_thresh_head == ptr);
6168 				pm_thresh_head = ptr->ptr_next;
6169 			}
6170 			kmem_free(ptr, ptr->ptr_size);
6171 			break;
6172 		}
6173 		pptr = ptr;
6174 	}
6175 	rw_exit(&pm_thresh_rwlock);
6176 }
6177 
6178 /*
6179  * Discard all recorded thresholds.  We are returning to the default pm state.
6180  */
6181 void
6182 pm_discard_thresholds(void)
6183 {
6184 	pm_thresh_rec_t *rp;
6185 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6186 	while (pm_thresh_head) {
6187 		rp = pm_thresh_head;
6188 		pm_thresh_head = rp->ptr_next;
6189 		kmem_free(rp, rp->ptr_size);
6190 	}
6191 	rw_exit(&pm_thresh_rwlock);
6192 }
6193 
6194 /*
6195  * Discard all recorded dependencies.  We are returning to the default pm state.
6196  */
6197 void
6198 pm_discard_dependencies(void)
6199 {
6200 	pm_pdr_t *rp;
6201 	int i;
6202 	size_t length;
6203 
6204 #ifdef DEBUG
6205 	if (pm_debug & PMD_DEP)
6206 		prdeps("Before discard\n");
6207 #endif
6208 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6209 
6210 #ifdef DEBUG
6211 	if (pm_debug & PMD_DEP)
6212 		prdeps("After discard\n");
6213 #endif
6214 	while (pm_dep_head) {
6215 		rp = pm_dep_head;
6216 		if (!rp->pdr_isprop) {
6217 			ASSERT(rp->pdr_satisfied == 0);
6218 			ASSERT(pm_unresolved_deps);
6219 			pm_unresolved_deps--;
6220 		} else {
6221 			ASSERT(pm_prop_deps);
6222 			pm_prop_deps--;
6223 		}
6224 		pm_dep_head = rp->pdr_next;
6225 		if (rp->pdr_kept_count)  {
6226 			for (i = 0; i < rp->pdr_kept_count; i++) {
6227 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6228 				kmem_free(rp->pdr_kept_paths[i], length);
6229 			}
6230 			kmem_free(rp->pdr_kept_paths,
6231 			    rp->pdr_kept_count * sizeof (char **));
6232 		}
6233 		kmem_free(rp, rp->pdr_size);
6234 	}
6235 }
6236 
6237 
6238 static int
6239 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6240 {
6241 	_NOTE(ARGUNUSED(arg))
6242 	char *pathbuf;
6243 
6244 	if (PM_GET_PM_INFO(dip) == NULL)
6245 		return (DDI_WALK_CONTINUE);
6246 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6247 	(void) ddi_pathname(dip, pathbuf);
6248 	pm_free_keeper(pathbuf, 0);
6249 	kmem_free(pathbuf, MAXPATHLEN);
6250 	return (DDI_WALK_CONTINUE);
6251 }
6252 
6253 static int
6254 pm_kept_walk(dev_info_t *dip, void *arg)
6255 {
6256 	_NOTE(ARGUNUSED(arg))
6257 	char *pathbuf;
6258 
6259 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6260 	(void) ddi_pathname(dip, pathbuf);
6261 	(void) pm_kept(pathbuf);
6262 	kmem_free(pathbuf, MAXPATHLEN);
6263 
6264 	return (DDI_WALK_CONTINUE);
6265 }
6266 
6267 static int
6268 pm_keeper_walk(dev_info_t *dip, void *arg)
6269 {
6270 	_NOTE(ARGUNUSED(arg))
6271 	char *pathbuf;
6272 
6273 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6274 	(void) ddi_pathname(dip, pathbuf);
6275 	(void) pm_keeper(pathbuf);
6276 	kmem_free(pathbuf, MAXPATHLEN);
6277 
6278 	return (DDI_WALK_CONTINUE);
6279 }
6280 
6281 static char *
6282 pdw_type_decode(int type)
6283 {
6284 	switch (type) {
6285 	case PM_DEP_WK_POWER_ON:
6286 		return ("power on");
6287 	case PM_DEP_WK_POWER_OFF:
6288 		return ("power off");
6289 	case PM_DEP_WK_DETACH:
6290 		return ("detach");
6291 	case PM_DEP_WK_REMOVE_DEP:
6292 		return ("remove dep");
6293 	case PM_DEP_WK_BRINGUP_SELF:
6294 		return ("bringup self");
6295 	case PM_DEP_WK_RECORD_KEEPER:
6296 		return ("add dependent");
6297 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6298 		return ("add dependent property");
6299 	case PM_DEP_WK_KEPT:
6300 		return ("kept");
6301 	case PM_DEP_WK_KEEPER:
6302 		return ("keeper");
6303 	case PM_DEP_WK_ATTACH:
6304 		return ("attach");
6305 	case PM_DEP_WK_CHECK_KEPT:
6306 		return ("check kept");
6307 	case PM_DEP_WK_CPR_SUSPEND:
6308 		return ("suspend");
6309 	case PM_DEP_WK_CPR_RESUME:
6310 		return ("resume");
6311 	default:
6312 		return ("unknown");
6313 	}
6314 
6315 }
6316 
6317 static void
6318 pm_rele_dep(char *keeper)
6319 {
6320 	PMD_FUNC(pmf, "rele_dep")
6321 	pm_pdr_t *dp;
6322 	char *kept_path = NULL;
6323 	dev_info_t *kept = NULL;
6324 	int count = 0;
6325 
6326 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6327 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6328 			continue;
6329 		for (count = 0; count < dp->pdr_kept_count; count++) {
6330 			kept_path = dp->pdr_kept_paths[count];
6331 			if (kept_path == NULL)
6332 				continue;
6333 			kept = pm_name_to_dip(kept_path, 1);
6334 			if (kept) {
6335 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6336 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6337 				    keeper))
6338 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6339 				pm_rele_power(kept);
6340 				ddi_release_devi(kept);
6341 			}
6342 		}
6343 	}
6344 }
6345 
6346 /*
6347  * Called when we are just released from direct PM.  Bring ourself up
6348  * if our keeper is up since dependency is not honored while a kept
6349  * device is under direct PM.
6350  */
6351 static void
6352 pm_bring_self_up(char *keptpath)
6353 {
6354 	PMD_FUNC(pmf, "bring_self_up")
6355 	dev_info_t *kept;
6356 	dev_info_t *keeper;
6357 	pm_pdr_t *dp;
6358 	int i, j;
6359 	int up = 0, circ;
6360 
6361 	kept = pm_name_to_dip(keptpath, 1);
6362 	if (kept == NULL)
6363 		return;
6364 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6365 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6366 		if (dp->pdr_kept_count == 0)
6367 			continue;
6368 		for (i = 0; i < dp->pdr_kept_count; i++) {
6369 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6370 				continue;
6371 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6372 			if (keeper) {
6373 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6374 				    pmf, PM_DEVICE(keeper)))
6375 				PM_LOCK_POWER(keeper, &circ);
6376 				for (j = 0; j < PM_NUMCMPTS(keeper);
6377 				    j++) {
6378 					if (PM_CURPOWER(keeper, j)) {
6379 						PMD(PMD_KEEPS, ("%s: comp="
6380 						    "%d is up\n", pmf, j))
6381 						up++;
6382 					}
6383 				}
6384 				if (up) {
6385 					if (PM_SKBU(kept))
6386 						DEVI(kept)->devi_pm_flags &=
6387 						    ~PMC_SKIP_BRINGUP;
6388 					bring_pmdep_up(kept, 1);
6389 				}
6390 				PM_UNLOCK_POWER(keeper, circ);
6391 				ddi_release_devi(keeper);
6392 			}
6393 		}
6394 	}
6395 	ddi_release_devi(kept);
6396 }
6397 
6398 static void
6399 pm_process_dep_request(pm_dep_wk_t *work)
6400 {
6401 	PMD_FUNC(pmf, "dep_req")
6402 	int ret;
6403 
6404 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6405 	    pdw_type_decode(work->pdw_type)))
6406 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6407 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6408 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6409 
6410 	switch (work->pdw_type) {
6411 	case PM_DEP_WK_POWER_ON:
6412 		/* Bring up the kept devices and put a hold on them */
6413 		bring_wekeeps_up(work->pdw_keeper);
6414 		break;
6415 	case PM_DEP_WK_POWER_OFF:
6416 		/* Release the kept devices */
6417 		pm_rele_dep(work->pdw_keeper);
6418 		break;
6419 	case PM_DEP_WK_DETACH:
6420 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6421 		break;
6422 	case PM_DEP_WK_REMOVE_DEP:
6423 		pm_discard_dependencies();
6424 		break;
6425 	case PM_DEP_WK_BRINGUP_SELF:
6426 		/*
6427 		 * We deferred satisfying our dependency till now, so satisfy
6428 		 * it again and bring ourselves up.
6429 		 */
6430 		pm_bring_self_up(work->pdw_kept);
6431 		break;
6432 	case PM_DEP_WK_RECORD_KEEPER:
6433 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6434 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6435 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6436 		break;
6437 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6438 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6439 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6440 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6441 		break;
6442 	case PM_DEP_WK_KEPT:
6443 		ret = pm_kept(work->pdw_kept);
6444 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6445 		    ret))
6446 		break;
6447 	case PM_DEP_WK_KEEPER:
6448 		ret = pm_keeper(work->pdw_keeper);
6449 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6450 		    pmf, ret))
6451 		break;
6452 	case PM_DEP_WK_ATTACH:
6453 		ret = pm_keeper(work->pdw_keeper);
6454 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6455 		    pmf, ret))
6456 		ret = pm_kept(work->pdw_kept);
6457 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6458 		    pmf, ret))
6459 		break;
6460 	case PM_DEP_WK_CHECK_KEPT:
6461 		ret = pm_is_kept(work->pdw_kept);
6462 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6463 		    pmf, work->pdw_kept, ret))
6464 		break;
6465 	case PM_DEP_WK_CPR_SUSPEND:
6466 		pm_discard_dependencies();
6467 		break;
6468 	case PM_DEP_WK_CPR_RESUME:
6469 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6470 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6471 		break;
6472 	default:
6473 		ASSERT(0);
6474 		break;
6475 	}
6476 	/*
6477 	 * Free the work structure if the requester is not waiting
6478 	 * Otherwise it is the requester's responsiblity to free it.
6479 	 */
6480 	if (!work->pdw_wait) {
6481 		if (work->pdw_keeper)
6482 			kmem_free(work->pdw_keeper,
6483 			    strlen(work->pdw_keeper) + 1);
6484 		if (work->pdw_kept)
6485 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6486 		kmem_free(work, sizeof (pm_dep_wk_t));
6487 	} else {
6488 		/*
6489 		 * Notify requester if it is waiting for it.
6490 		 */
6491 		work->pdw_ret = ret;
6492 		work->pdw_done = 1;
6493 		cv_signal(&work->pdw_cv);
6494 	}
6495 }
6496 
6497 /*
6498  * Process PM dependency requests.
6499  */
6500 static void
6501 pm_dep_thread(void)
6502 {
6503 	pm_dep_wk_t *work;
6504 	callb_cpr_t cprinfo;
6505 
6506 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6507 	    "pm_dep_thread");
6508 	for (;;) {
6509 		mutex_enter(&pm_dep_thread_lock);
6510 		if (pm_dep_thread_workq == NULL) {
6511 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6512 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6513 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6514 		}
6515 		work = pm_dep_thread_workq;
6516 		pm_dep_thread_workq = work->pdw_next;
6517 		if (pm_dep_thread_tail == work)
6518 			pm_dep_thread_tail = work->pdw_next;
6519 		mutex_exit(&pm_dep_thread_lock);
6520 		pm_process_dep_request(work);
6521 
6522 	}
6523 	/*NOTREACHED*/
6524 }
6525 
6526 /*
6527  * Set the power level of the indicated device to unknown (if it is not a
6528  * backwards compatible device), as it has just been resumed, and it won't
6529  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6530  */
6531 void
6532 pm_forget_power_level(dev_info_t *dip)
6533 {
6534 	dev_info_t *pdip = ddi_get_parent(dip);
6535 	int i, count = 0;
6536 
6537 	if (!PM_ISBC(dip)) {
6538 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6539 			count += (PM_CURPOWER(dip, i) == 0);
6540 
6541 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6542 			e_pm_hold_rele_power(pdip, count);
6543 
6544 		/*
6545 		 * Count this as a power cycle if we care
6546 		 */
6547 		if (DEVI(dip)->devi_pm_volpmd &&
6548 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6549 			DEVI(dip)->devi_pm_volpmd = 0;
6550 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6551 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6552 	}
6553 }
6554 
6555 /*
6556  * This function advises the caller whether it should make a power-off
6557  * transition at this time or not.  If the transition is not advised
6558  * at this time, the time that the next power-off transition can
6559  * be made from now is returned through "intervalp" pointer.
6560  * This function returns:
6561  *
6562  *  1  power-off advised
6563  *  0  power-off not advised, intervalp will point to seconds from
6564  *	  now that a power-off is advised.  If it is passed the number
6565  *	  of years that policy specifies the device should last,
6566  *	  a large number is returned as the time interval.
6567  *  -1  error
6568  */
6569 int
6570 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6571 {
6572 	PMD_FUNC(pmf, "pm_trans_check")
6573 	char dbuf[DC_SCSI_MFR_LEN];
6574 	struct pm_scsi_cycles *scp;
6575 	int service_years, service_weeks, full_years;
6576 	time_t now, service_seconds, tdiff;
6577 	time_t within_year, when_allowed;
6578 	char *ptr;
6579 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6580 	int cycles_diff, cycles_over;
6581 
6582 	if (datap == NULL) {
6583 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6584 		return (-1);
6585 	}
6586 
6587 	if (datap->format == DC_SCSI_FORMAT) {
6588 		/*
6589 		 * Power cycles of the scsi drives are distributed
6590 		 * over 5 years with the following percentage ratio:
6591 		 *
6592 		 *	30%, 25%, 20%, 15%, and 10%
6593 		 *
6594 		 * The power cycle quota for each year is distributed
6595 		 * linearly through out the year.  The equation for
6596 		 * determining the expected cycles is:
6597 		 *
6598 		 *	e = a * (n / y)
6599 		 *
6600 		 * e = expected cycles
6601 		 * a = allocated cycles for this year
6602 		 * n = number of seconds since beginning of this year
6603 		 * y = number of seconds in a year
6604 		 *
6605 		 * Note that beginning of the year starts the day that
6606 		 * the drive has been put on service.
6607 		 *
6608 		 * If the drive has passed its expected cycles, we
6609 		 * can determine when it can start to power cycle
6610 		 * again to keep it on track to meet the 5-year
6611 		 * life expectancy.  The equation for determining
6612 		 * when to power cycle is:
6613 		 *
6614 		 *	w = y * (c / a)
6615 		 *
6616 		 * w = when it can power cycle again
6617 		 * y = number of seconds in a year
6618 		 * c = current number of cycles
6619 		 * a = allocated cycles for the year
6620 		 *
6621 		 */
6622 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6623 
6624 		scp = &datap->un.scsi_cycles;
6625 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6626 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6627 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6628 		if (scp->ncycles < 0 || scp->flag != 0) {
6629 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6630 			return (-1);
6631 		}
6632 
6633 		if (scp->ncycles > scp->lifemax) {
6634 			*intervalp = (LONG_MAX / hz);
6635 			return (0);
6636 		}
6637 
6638 		/*
6639 		 * convert service date to time_t
6640 		 */
6641 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6642 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6643 		ptr = dbuf;
6644 		service_years = stoi(&ptr) - EPOCH_YEAR;
6645 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6646 		    DC_SCSI_WEEK_LEN);
6647 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6648 
6649 		/*
6650 		 * scsi standard does not specify WW data,
6651 		 * could be (00-51) or (01-52)
6652 		 */
6653 		ptr = dbuf;
6654 		service_weeks = stoi(&ptr);
6655 		if (service_years < 0 ||
6656 		    service_weeks < 0 || service_weeks > 52) {
6657 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6658 			    pmf, service_years, service_weeks))
6659 			return (-1);
6660 		}
6661 
6662 		/*
6663 		 * calculate service date in seconds-since-epoch,
6664 		 * adding one day for each leap-year.
6665 		 *
6666 		 * (years-since-epoch + 2) fixes integer truncation,
6667 		 * example: (8) leap-years during [1972, 2000]
6668 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6669 		 */
6670 		service_seconds = (service_years * DC_SPY) +
6671 		    (service_weeks * DC_SPW) +
6672 		    (((service_years + 2) / 4) * DC_SPD);
6673 
6674 		now = gethrestime_sec();
6675 		/*
6676 		 * since the granularity of 'svc_date' is day not second,
6677 		 * 'now' should be rounded up to full day.
6678 		 */
6679 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6680 		if (service_seconds > now) {
6681 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6682 			    "than now (%ld)!\n", pmf, service_seconds, now))
6683 			return (-1);
6684 		}
6685 
6686 		tdiff = now - service_seconds;
6687 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6688 
6689 		/*
6690 		 * NOTE - Leap years are not considered in the calculations
6691 		 * below.
6692 		 */
6693 		full_years = (tdiff / DC_SPY);
6694 		if ((full_years >= DC_SCSI_NPY) &&
6695 		    (scp->ncycles <= scp->lifemax))
6696 			return (1);
6697 
6698 		/*
6699 		 * Determine what is the normal cycle usage for the
6700 		 * device at the beginning and the end of this year.
6701 		 */
6702 		lower_bound_cycles = (!full_years) ? 0 :
6703 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6704 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6705 
6706 		if (scp->ncycles <= lower_bound_cycles)
6707 			return (1);
6708 
6709 		/*
6710 		 * The linear slope that determines how many cycles
6711 		 * are allowed this year is number of seconds
6712 		 * passed this year over total number of seconds in a year.
6713 		 */
6714 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6715 		within_year = (tdiff % DC_SPY);
6716 		cycles_allowed = lower_bound_cycles +
6717 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6718 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6719 		    full_years, within_year))
6720 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6721 		    cycles_allowed))
6722 
6723 		if (scp->ncycles <= cycles_allowed)
6724 			return (1);
6725 
6726 		/*
6727 		 * The transition is not advised now but we can
6728 		 * determine when the next transition can be made.
6729 		 *
6730 		 * Depending on how many cycles the device has been
6731 		 * over-used, we may need to skip years with
6732 		 * different percentage quota in order to determine
6733 		 * when the next transition can be made.
6734 		 */
6735 		cycles_over = (scp->ncycles - lower_bound_cycles);
6736 		while (cycles_over > cycles_diff) {
6737 			full_years++;
6738 			if (full_years >= DC_SCSI_NPY) {
6739 				*intervalp = (LONG_MAX / hz);
6740 				return (0);
6741 			}
6742 			cycles_over -= cycles_diff;
6743 			lower_bound_cycles = upper_bound_cycles;
6744 			upper_bound_cycles =
6745 			    (scp->lifemax * pcnt[full_years]) / 100;
6746 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6747 		}
6748 
6749 		/*
6750 		 * The linear slope that determines when the next transition
6751 		 * can be made is the relative position of used cycles within a
6752 		 * year over total number of cycles within that year.
6753 		 */
6754 		when_allowed = service_seconds + (full_years * DC_SPY) +
6755 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6756 		*intervalp = (when_allowed - now);
6757 		if (*intervalp > (LONG_MAX / hz))
6758 			*intervalp = (LONG_MAX / hz);
6759 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6760 		    *intervalp))
6761 		return (0);
6762 	}
6763 
6764 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6765 	return (-1);
6766 }
6767 
6768 /*
6769  * Nexus drivers call into pm framework to indicate which child driver is about
6770  * to be installed.  In some platforms, ppm may need to configure the hardware
6771  * for successful installation of a driver.
6772  */
6773 int
6774 pm_init_child(dev_info_t *dip)
6775 {
6776 	power_req_t power_req;
6777 
6778 	ASSERT(ddi_binding_name(dip));
6779 	ASSERT(ddi_get_name_addr(dip));
6780 	pm_ppm_claim(dip);
6781 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6782 		power_req.request_type = PMR_PPM_INIT_CHILD;
6783 		power_req.req.ppm_config_req.who = dip;
6784 		ASSERT(PPM(dip) != NULL);
6785 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6786 		    NULL));
6787 	} else {
6788 #ifdef DEBUG
6789 		/* pass it to the default handler so we can debug things */
6790 		power_req.request_type = PMR_PPM_INIT_CHILD;
6791 		power_req.req.ppm_config_req.who = dip;
6792 		(void) pm_ctlops(NULL, dip,
6793 		    DDI_CTLOPS_POWER, &power_req, NULL);
6794 #endif
6795 	}
6796 	return (DDI_SUCCESS);
6797 }
6798 
6799 /*
6800  * Bring parent of a node that is about to be probed up to full power, and
6801  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6802  * it is time to let it go down again
6803  */
6804 void
6805 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6806 {
6807 	int result;
6808 	power_req_t power_req;
6809 
6810 	bzero(cp, sizeof (*cp));
6811 	cp->ppc_dip = dip;
6812 
6813 	pm_ppm_claim(dip);
6814 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6815 		power_req.request_type = PMR_PPM_PRE_PROBE;
6816 		power_req.req.ppm_config_req.who = dip;
6817 		ASSERT(PPM(dip) != NULL);
6818 		(void) pm_ctlops(PPM(dip), dip,
6819 		    DDI_CTLOPS_POWER, &power_req, &result);
6820 		cp->ppc_ppm = PPM(dip);
6821 	} else {
6822 #ifdef DEBUG
6823 		/* pass it to the default handler so we can debug things */
6824 		power_req.request_type = PMR_PPM_PRE_PROBE;
6825 		power_req.req.ppm_config_req.who = dip;
6826 		(void) pm_ctlops(NULL, dip,
6827 		    DDI_CTLOPS_POWER, &power_req, &result);
6828 #endif
6829 		cp->ppc_ppm = NULL;
6830 	}
6831 }
6832 
6833 int
6834 pm_pre_config(dev_info_t *dip, char *devnm)
6835 {
6836 	PMD_FUNC(pmf, "pre_config")
6837 	int ret;
6838 
6839 	if (MDI_VHCI(dip)) {
6840 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6841 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6842 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6843 	} else if (!PM_GET_PM_INFO(dip))
6844 		return (DDI_SUCCESS);
6845 
6846 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6847 	pm_hold_power(dip);
6848 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6849 	if (ret != DDI_SUCCESS)
6850 		pm_rele_power(dip);
6851 	return (ret);
6852 }
6853 
6854 /*
6855  * This routine is called by devfs during its walk to unconfigue a node.
6856  * If the call is due to auto mod_unloads and the dip is not at its
6857  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6858  * return DDI_SUCCESS.
6859  */
6860 int
6861 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6862 {
6863 	PMD_FUNC(pmf, "pre_unconfig")
6864 	int ret;
6865 
6866 	if (MDI_VHCI(dip)) {
6867 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6868 		    PM_DEVICE(dip), flags))
6869 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6870 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6871 	} else if (!PM_GET_PM_INFO(dip))
6872 		return (DDI_SUCCESS);
6873 
6874 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6875 	    flags))
6876 	*held = 0;
6877 
6878 	/*
6879 	 * If the dip is a leaf node, don't power it up.
6880 	 */
6881 	if (!ddi_get_child(dip))
6882 		return (DDI_SUCCESS);
6883 
6884 	/*
6885 	 * Do not power up the node if it is called due to auto-modunload.
6886 	 */
6887 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6888 		return (DDI_FAILURE);
6889 
6890 	pm_hold_power(dip);
6891 	*held = 1;
6892 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6893 	if (ret != DDI_SUCCESS) {
6894 		pm_rele_power(dip);
6895 		*held = 0;
6896 	}
6897 	return (ret);
6898 }
6899 
6900 /*
6901  * Notify ppm of attach action.  Parent is already held at full power by
6902  * probe action.
6903  */
6904 void
6905 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6906 {
6907 	static char *me = "pm_pre_attach";
6908 	power_req_t power_req;
6909 	int result;
6910 
6911 	/*
6912 	 * Initialize and fill in the PPM cookie
6913 	 */
6914 	bzero(cp, sizeof (*cp));
6915 	cp->ppc_cmd = (int)cmd;
6916 	cp->ppc_ppm = PPM(dip);
6917 	cp->ppc_dip = dip;
6918 
6919 	/*
6920 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6921 	 * Power Management stuff. DDI_RESUME also has to purge it's
6922 	 * powerlevel information.
6923 	 */
6924 	switch (cmd) {
6925 	case DDI_ATTACH:
6926 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6927 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6928 			power_req.req.ppm_config_req.who = dip;
6929 			ASSERT(PPM(dip));
6930 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6931 			    &power_req, &result);
6932 		}
6933 #ifdef DEBUG
6934 		else {
6935 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6936 			power_req.req.ppm_config_req.who = dip;
6937 			(void) pm_ctlops(NULL, dip,
6938 			    DDI_CTLOPS_POWER, &power_req, &result);
6939 		}
6940 #endif
6941 		break;
6942 	case DDI_RESUME:
6943 		pm_forget_power_level(dip);
6944 
6945 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6946 			power_req.request_type = PMR_PPM_PRE_RESUME;
6947 			power_req.req.resume_req.who = cp->ppc_dip;
6948 			power_req.req.resume_req.cmd =
6949 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6950 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6951 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6952 			    DDI_CTLOPS_POWER, &power_req, &result);
6953 		}
6954 #ifdef DEBUG
6955 		else {
6956 			power_req.request_type = PMR_PPM_PRE_RESUME;
6957 			power_req.req.resume_req.who = cp->ppc_dip;
6958 			power_req.req.resume_req.cmd =
6959 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6960 			(void) pm_ctlops(NULL, cp->ppc_dip,
6961 			    DDI_CTLOPS_POWER, &power_req, &result);
6962 		}
6963 #endif
6964 		break;
6965 
6966 	case DDI_PM_RESUME:
6967 		break;
6968 
6969 	default:
6970 		panic(me);
6971 	}
6972 }
6973 
6974 /*
6975  * Nexus drivers call into pm framework to indicate which child driver is
6976  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6977  * hardware since the device driver is no longer installed.
6978  */
6979 int
6980 pm_uninit_child(dev_info_t *dip)
6981 {
6982 	power_req_t power_req;
6983 
6984 	ASSERT(ddi_binding_name(dip));
6985 	ASSERT(ddi_get_name_addr(dip));
6986 	pm_ppm_claim(dip);
6987 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6988 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6989 		power_req.req.ppm_config_req.who = dip;
6990 		ASSERT(PPM(dip));
6991 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6992 		    NULL));
6993 	} else {
6994 #ifdef DEBUG
6995 		/* pass it to the default handler so we can debug things */
6996 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6997 		power_req.req.ppm_config_req.who = dip;
6998 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
6999 #endif
7000 	}
7001 	return (DDI_SUCCESS);
7002 }
7003 /*
7004  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
7005  * Also notify ppm of result of probe if there is a ppm that cares
7006  */
7007 void
7008 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
7009 {
7010 	_NOTE(ARGUNUSED(probe_failed))
7011 	int result;
7012 	power_req_t power_req;
7013 
7014 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7015 		power_req.request_type = PMR_PPM_POST_PROBE;
7016 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7017 		power_req.req.ppm_config_req.result = ret;
7018 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7019 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
7020 		    &power_req, &result);
7021 	}
7022 #ifdef DEBUG
7023 	else {
7024 		power_req.request_type = PMR_PPM_POST_PROBE;
7025 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7026 		power_req.req.ppm_config_req.result = ret;
7027 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
7028 		    &power_req, &result);
7029 	}
7030 #endif
7031 }
7032 
7033 void
7034 pm_post_config(dev_info_t *dip, char *devnm)
7035 {
7036 	PMD_FUNC(pmf, "post_config")
7037 
7038 	if (MDI_VHCI(dip)) {
7039 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7040 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
7041 		return;
7042 	} else if (!PM_GET_PM_INFO(dip))
7043 		return;
7044 
7045 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7046 	pm_rele_power(dip);
7047 }
7048 
7049 void
7050 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
7051 {
7052 	PMD_FUNC(pmf, "post_unconfig")
7053 
7054 	if (MDI_VHCI(dip)) {
7055 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
7056 		    PM_DEVICE(dip), held))
7057 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
7058 		return;
7059 	} else if (!PM_GET_PM_INFO(dip))
7060 		return;
7061 
7062 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
7063 	    held))
7064 	if (!held)
7065 		return;
7066 	/*
7067 	 * We have held power in pre_unconfig, release it here.
7068 	 */
7069 	pm_rele_power(dip);
7070 }
7071 
7072 /*
7073  * Notify ppm of result of attach if there is a ppm that cares
7074  */
7075 void
7076 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
7077 {
7078 	int result;
7079 	power_req_t power_req;
7080 	dev_info_t	*dip;
7081 
7082 	if (cp->ppc_cmd != DDI_ATTACH)
7083 		return;
7084 
7085 	dip = cp->ppc_dip;
7086 
7087 	if (ret == DDI_SUCCESS) {
7088 		/*
7089 		 * Attach succeeded, so proceed to doing post-attach pm tasks
7090 		 */
7091 		if (PM_GET_PM_INFO(dip) == NULL)
7092 			(void) pm_start(dip);
7093 	} else {
7094 		/*
7095 		 * Attach may have got pm started before failing
7096 		 */
7097 		pm_stop(dip);
7098 	}
7099 
7100 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7101 		power_req.request_type = PMR_PPM_POST_ATTACH;
7102 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7103 		power_req.req.ppm_config_req.result = ret;
7104 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7105 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7106 		    DDI_CTLOPS_POWER, &power_req, &result);
7107 	}
7108 #ifdef DEBUG
7109 	else {
7110 		power_req.request_type = PMR_PPM_POST_ATTACH;
7111 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7112 		power_req.req.ppm_config_req.result = ret;
7113 		(void) pm_ctlops(NULL, cp->ppc_dip,
7114 		    DDI_CTLOPS_POWER, &power_req, &result);
7115 	}
7116 #endif
7117 }
7118 
7119 /*
7120  * Notify ppm of attach action.  Parent is already held at full power by
7121  * probe action.
7122  */
7123 void
7124 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7125 {
7126 	int result;
7127 	power_req_t power_req;
7128 
7129 	bzero(cp, sizeof (*cp));
7130 	cp->ppc_dip = dip;
7131 	cp->ppc_cmd = (int)cmd;
7132 
7133 	switch (cmd) {
7134 	case DDI_DETACH:
7135 		pm_detaching(dip);		/* suspend pm while detaching */
7136 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7137 			power_req.request_type = PMR_PPM_PRE_DETACH;
7138 			power_req.req.ppm_config_req.who = dip;
7139 			ASSERT(PPM(dip));
7140 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7141 			    &power_req, &result);
7142 			cp->ppc_ppm = PPM(dip);
7143 		} else {
7144 #ifdef DEBUG
7145 			/* pass to the default handler so we can debug things */
7146 			power_req.request_type = PMR_PPM_PRE_DETACH;
7147 			power_req.req.ppm_config_req.who = dip;
7148 			(void) pm_ctlops(NULL, dip,
7149 			    DDI_CTLOPS_POWER, &power_req, &result);
7150 #endif
7151 			cp->ppc_ppm = NULL;
7152 		}
7153 		break;
7154 
7155 	default:
7156 		break;
7157 	}
7158 }
7159 
7160 /*
7161  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7162  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7163  * make an entry to record the details, which includes certain flag settings.
7164  */
7165 static void
7166 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7167     int wasvolpmd, major_t major)
7168 {
7169 	PMD_FUNC(pmf, "record_invol_path")
7170 	major_t pm_path_to_major(char *);
7171 	size_t plen;
7172 	pm_noinvol_t *ip, *np, *pp;
7173 	pp = NULL;
7174 
7175 	plen = strlen(path) + 1;
7176 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7177 	np->ni_size = plen;
7178 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7179 	np->ni_noinvolpm = noinvolpm;
7180 	np->ni_volpmd = volpmd;
7181 	np->ni_wasvolpmd = wasvolpmd;
7182 	np->ni_flags = flags;
7183 	(void) strcpy(np->ni_path, path);
7184 	/*
7185 	 * If we haven't actually seen the node attached, it is hard to figure
7186 	 * out its major.  If we could hold the node by path, we would be much
7187 	 * happier here.
7188 	 */
7189 	if (major == DDI_MAJOR_T_NONE) {
7190 		np->ni_major = pm_path_to_major(path);
7191 	} else {
7192 		np->ni_major = major;
7193 	}
7194 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7195 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7196 		int comp = strcmp(path, ip->ni_path);
7197 		if (comp < 0) {
7198 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7199 			    pmf, path, ip->ni_path))
7200 			/* insert before current entry */
7201 			np->ni_next = ip;
7202 			if (pp) {
7203 				pp->ni_next = np;
7204 			} else {
7205 				pm_noinvol_head = np;
7206 			}
7207 			rw_exit(&pm_noinvol_rwlock);
7208 #ifdef DEBUG
7209 			if (pm_debug & PMD_NOINVOL)
7210 				pr_noinvol("record_invol_path exit0");
7211 #endif
7212 			return;
7213 		} else if (comp == 0) {
7214 			panic("%s already in pm_noinvol list", path);
7215 		}
7216 	}
7217 	/*
7218 	 * If we did not find an entry in the list that this should go before,
7219 	 * then it must go at the end
7220 	 */
7221 	if (pp) {
7222 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7223 		    pp->ni_path))
7224 		ASSERT(pp->ni_next == 0);
7225 		pp->ni_next = np;
7226 	} else {
7227 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7228 		ASSERT(!pm_noinvol_head);
7229 		pm_noinvol_head = np;
7230 	}
7231 	rw_exit(&pm_noinvol_rwlock);
7232 #ifdef DEBUG
7233 	if (pm_debug & PMD_NOINVOL)
7234 		pr_noinvol("record_invol_path exit");
7235 #endif
7236 }
7237 
7238 void
7239 pm_record_invol(dev_info_t *dip)
7240 {
7241 	char *pathbuf;
7242 	int pm_all_components_off(dev_info_t *);
7243 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7244 
7245 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7246 	(void) ddi_pathname(dip, pathbuf);
7247 
7248 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7249 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7250 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7251 
7252 	/*
7253 	 * If this child's detach will be holding up its ancestors, then we
7254 	 * allow for an exception to that if all children of this type have
7255 	 * gone down voluntarily.
7256 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7257 	 */
7258 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7259 	    dip);
7260 	kmem_free(pathbuf, MAXPATHLEN);
7261 }
7262 
7263 void
7264 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7265 {
7266 	dev_info_t *dip = cp->ppc_dip;
7267 	int result;
7268 	power_req_t power_req;
7269 
7270 	switch (cp->ppc_cmd) {
7271 	case DDI_DETACH:
7272 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7273 			power_req.request_type = PMR_PPM_POST_DETACH;
7274 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7275 			power_req.req.ppm_config_req.result = ret;
7276 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7277 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7278 			    DDI_CTLOPS_POWER, &power_req, &result);
7279 		}
7280 #ifdef DEBUG
7281 		else {
7282 			power_req.request_type = PMR_PPM_POST_DETACH;
7283 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7284 			power_req.req.ppm_config_req.result = ret;
7285 			(void) pm_ctlops(NULL, cp->ppc_dip,
7286 			    DDI_CTLOPS_POWER, &power_req, &result);
7287 		}
7288 #endif
7289 		if (ret == DDI_SUCCESS) {
7290 			/*
7291 			 * For hotplug detach we assume it is *really* gone
7292 			 */
7293 			if (cp->ppc_cmd == DDI_DETACH &&
7294 			    ((DEVI(dip)->devi_pm_flags &
7295 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7296 			    DEVI(dip)->devi_pm_noinvolpm))
7297 				pm_record_invol(dip);
7298 			DEVI(dip)->devi_pm_flags &=
7299 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7300 
7301 			/*
7302 			 * If console fb is detaching, then we don't need to
7303 			 * worry any more about it going off (pm_detaching has
7304 			 * brought up all components)
7305 			 */
7306 			if (PM_IS_CFB(dip)) {
7307 				mutex_enter(&pm_cfb_lock);
7308 				ASSERT(cfb_dip_detaching);
7309 				ASSERT(cfb_dip == NULL);
7310 				ASSERT(pm_cfb_comps_off == 0);
7311 				cfb_dip_detaching = NULL;
7312 				mutex_exit(&pm_cfb_lock);
7313 			}
7314 			pm_stop(dip);	/* make it permanent */
7315 		} else {
7316 			if (PM_IS_CFB(dip)) {
7317 				mutex_enter(&pm_cfb_lock);
7318 				ASSERT(cfb_dip_detaching);
7319 				ASSERT(cfb_dip == NULL);
7320 				ASSERT(pm_cfb_comps_off == 0);
7321 				cfb_dip = cfb_dip_detaching;
7322 				cfb_dip_detaching = NULL;
7323 				mutex_exit(&pm_cfb_lock);
7324 			}
7325 			pm_detach_failed(dip);	/* resume power management */
7326 		}
7327 		break;
7328 	case DDI_PM_SUSPEND:
7329 		break;
7330 	case DDI_SUSPEND:
7331 		break;				/* legal, but nothing to do */
7332 	default:
7333 #ifdef DEBUG
7334 		panic("pm_post_detach: unrecognized cmd %d for detach",
7335 		    cp->ppc_cmd);
7336 		/*NOTREACHED*/
7337 #else
7338 		break;
7339 #endif
7340 	}
7341 }
7342 
7343 /*
7344  * Called after vfs_mountroot has got the clock started to fix up timestamps
7345  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7346  * devices look busy but have a 0 busycnt
7347  */
7348 int
7349 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7350 {
7351 	_NOTE(ARGUNUSED(arg))
7352 
7353 	pm_info_t *info = PM_GET_PM_INFO(dip);
7354 	struct pm_component *cp;
7355 	int i;
7356 
7357 	if (!info)
7358 		return (DDI_WALK_CONTINUE);
7359 	PM_LOCK_BUSY(dip);
7360 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7361 		cp = PM_CP(dip, i);
7362 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7363 			cp->pmc_timestamp = gethrestime_sec();
7364 	}
7365 	PM_UNLOCK_BUSY(dip);
7366 	return (DDI_WALK_CONTINUE);
7367 }
7368 
7369 /*
7370  * Called at attach time to see if the device being attached has a record in
7371  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7372  * parents and set a flag in the dip
7373  */
7374 void
7375 pm_noinvol_specd(dev_info_t *dip)
7376 {
7377 	PMD_FUNC(pmf, "noinvol_specd")
7378 	char *pathbuf;
7379 	pm_noinvol_t *ip, *pp = NULL;
7380 	int wasvolpmd;
7381 	int found = 0;
7382 
7383 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7384 		return;
7385 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7386 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7387 	(void) ddi_pathname(dip, pathbuf);
7388 
7389 	PM_LOCK_DIP(dip);
7390 	DEVI(dip)->devi_pm_volpmd = 0;
7391 	DEVI(dip)->devi_pm_noinvolpm = 0;
7392 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7393 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7394 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7395 		    pmf, pathbuf, ip->ni_path))
7396 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7397 			found++;
7398 			break;
7399 		}
7400 	}
7401 	rw_exit(&pm_noinvol_rwlock);
7402 	if (!found) {
7403 		PM_UNLOCK_DIP(dip);
7404 		kmem_free(pathbuf, MAXPATHLEN);
7405 		return;
7406 	}
7407 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7408 	pp = NULL;
7409 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7410 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7411 		    pmf, pathbuf, ip->ni_path))
7412 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7413 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7414 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7415 			/*
7416 			 * Handle special case of console fb
7417 			 */
7418 			if (PM_IS_CFB(dip)) {
7419 				mutex_enter(&pm_cfb_lock);
7420 				cfb_dip = dip;
7421 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7422 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7423 				mutex_exit(&pm_cfb_lock);
7424 			}
7425 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7426 			ASSERT((DEVI(dip)->devi_pm_flags &
7427 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7428 			    DEVI(dip)->devi_pm_noinvolpm);
7429 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7430 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7431 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7432 			    ip->ni_noinvolpm, ip->ni_volpmd,
7433 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7434 			/*
7435 			 * free the entry in hopes the list will now be empty
7436 			 * and we won't have to search it any more until the
7437 			 * device detaches
7438 			 */
7439 			if (pp) {
7440 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7441 				    pmf, ip->ni_path, pp->ni_path))
7442 				pp->ni_next = ip->ni_next;
7443 			} else {
7444 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7445 				    pmf, ip->ni_path))
7446 				ASSERT(pm_noinvol_head == ip);
7447 				pm_noinvol_head = ip->ni_next;
7448 			}
7449 			PM_UNLOCK_DIP(dip);
7450 			wasvolpmd = ip->ni_wasvolpmd;
7451 			rw_exit(&pm_noinvol_rwlock);
7452 			kmem_free(ip->ni_path, ip->ni_size);
7453 			kmem_free(ip, sizeof (*ip));
7454 			/*
7455 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7456 			 * (and volpmd if appropriate)
7457 			 */
7458 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7459 			    wasvolpmd, pathbuf, dip);
7460 #ifdef DEBUG
7461 			if (pm_debug & PMD_NOINVOL)
7462 				pr_noinvol("noinvol_specd exit");
7463 #endif
7464 			kmem_free(pathbuf, MAXPATHLEN);
7465 			return;
7466 		}
7467 	}
7468 	kmem_free(pathbuf, MAXPATHLEN);
7469 	rw_exit(&pm_noinvol_rwlock);
7470 	PM_UNLOCK_DIP(dip);
7471 }
7472 
7473 int
7474 pm_all_components_off(dev_info_t *dip)
7475 {
7476 	int i;
7477 	pm_component_t *cp;
7478 
7479 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7480 		cp = PM_CP(dip, i);
7481 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7482 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7483 			return (0);
7484 	}
7485 	return (1);	/* all off */
7486 }
7487 
7488 /*
7489  * Make sure that all "no involuntary power cycles" devices are attached.
7490  * Called before doing a cpr suspend to make sure the driver has a say about
7491  * the power cycle
7492  */
7493 int
7494 pm_reattach_noinvol(void)
7495 {
7496 	PMD_FUNC(pmf, "reattach_noinvol")
7497 	pm_noinvol_t *ip;
7498 	char *path;
7499 	dev_info_t *dip;
7500 
7501 	/*
7502 	 * Prevent the modunload thread from unloading any modules until we
7503 	 * have completely stopped all kernel threads.
7504 	 */
7505 	modunload_disable();
7506 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7507 		/*
7508 		 * Forget we'v ever seen any entry
7509 		 */
7510 		ip->ni_persistent = 0;
7511 	}
7512 restart:
7513 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7514 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7515 #ifdef PMDDEBUG
7516 		major_t maj;
7517 		maj = ip->ni_major;
7518 #endif
7519 		path = ip->ni_path;
7520 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7521 			if (ip->ni_persistent) {
7522 				/*
7523 				 * If we weren't able to make this entry
7524 				 * go away, then we give up, as
7525 				 * holding/attaching the driver ought to have
7526 				 * resulted in this entry being deleted
7527 				 */
7528 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7529 				    "(%s|%d)\n", pmf, ip->ni_path,
7530 				    ddi_major_to_name(maj), (int)maj))
7531 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7532 				    ip->ni_path);
7533 				modunload_enable();
7534 				rw_exit(&pm_noinvol_rwlock);
7535 				return (0);
7536 			}
7537 			ip->ni_persistent++;
7538 			rw_exit(&pm_noinvol_rwlock);
7539 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7540 			dip = e_ddi_hold_devi_by_path(path, 0);
7541 			if (dip == NULL) {
7542 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7543 				    pmf, path, (int)maj))
7544 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7545 				    "driver", path);
7546 				modunload_enable();
7547 				return (0);
7548 			} else {
7549 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7550 				/*
7551 				 * Since the modunload thread is stopped, we
7552 				 * don't have to keep the driver held, which
7553 				 * saves a ton of bookkeeping
7554 				 */
7555 				ddi_release_devi(dip);
7556 				goto restart;
7557 			}
7558 		} else {
7559 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7560 			    pmf, ip->ni_path))
7561 			continue;
7562 		}
7563 	}
7564 	rw_exit(&pm_noinvol_rwlock);
7565 	return (1);
7566 }
7567 
7568 void
7569 pm_reattach_noinvol_fini(void)
7570 {
7571 	modunload_enable();
7572 }
7573 
7574 /*
7575  * Display pm support code
7576  */
7577 
7578 
7579 /*
7580  * console frame-buffer power-mgmt gets enabled when debugging
7581  * services are not present or console fbpm override is set
7582  */
7583 void
7584 pm_cfb_setup(const char *stdout_path)
7585 {
7586 	PMD_FUNC(pmf, "cfb_setup")
7587 	extern int obpdebug;
7588 	char *devname;
7589 	dev_info_t *dip;
7590 	int devname_len;
7591 	extern dev_info_t *fbdip;
7592 
7593 	/*
7594 	 * By virtue of this function being called (from consconfig),
7595 	 * we know stdout is a framebuffer.
7596 	 */
7597 	stdout_is_framebuffer = 1;
7598 
7599 	if (obpdebug || (boothowto & RB_DEBUG)) {
7600 		if (pm_cfb_override == 0) {
7601 			/*
7602 			 * Console is frame buffer, but we want to suppress
7603 			 * pm on it because of debugging setup
7604 			 */
7605 			pm_cfb_enabled = 0;
7606 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7607 			    "console power management.");
7608 			/*
7609 			 * however, we still need to know which is the console
7610 			 * fb in order to suppress pm on it
7611 			 */
7612 		} else {
7613 			cmn_err(CE_WARN, "Kernel debugger present: see "
7614 			    "kmdb(1M) for interaction with power management.");
7615 		}
7616 	}
7617 #ifdef DEBUG
7618 	/*
7619 	 * IF console is fb and is power managed, don't do prom_printfs from
7620 	 * pm debug macro
7621 	 */
7622 	if (pm_cfb_enabled && !pm_debug_to_console) {
7623 		if (pm_debug)
7624 			prom_printf("pm debug output will be to log only\n");
7625 		pm_divertdebug++;
7626 	}
7627 #endif
7628 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7629 	devname_len = strlen(devname) + 1;
7630 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7631 	/* if the driver is attached */
7632 	if ((dip = fbdip) != NULL) {
7633 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7634 		    PM_DEVICE(dip)))
7635 		/*
7636 		 * We set up here as if the driver were power manageable in case
7637 		 * we get a later attach of a pm'able driver (which would result
7638 		 * in a panic later)
7639 		 */
7640 		cfb_dip = dip;
7641 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7642 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7643 		    PM_DEVICE(dip)))
7644 #ifdef DEBUG
7645 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7646 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7647 			    pmf, PM_DEVICE(dip)))
7648 		}
7649 #endif
7650 	} else {
7651 		char *ep;
7652 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7653 		pm_record_invol_path(devname,
7654 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7655 		    DDI_MAJOR_T_NONE);
7656 		for (ep = strrchr(devname, '/'); ep != devname;
7657 		    ep = strrchr(devname, '/')) {
7658 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7659 			*ep = '\0';
7660 			dip = pm_name_to_dip(devname, 0);
7661 			if (dip != NULL) {
7662 				/*
7663 				 * Walk up the tree incrementing
7664 				 * devi_pm_noinvolpm
7665 				 */
7666 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7667 				    0, 0, devname, dip);
7668 				break;
7669 			} else {
7670 				pm_record_invol_path(devname,
7671 				    PMC_NO_INVOL, 1, 0, 0, DDI_MAJOR_T_NONE);
7672 			}
7673 		}
7674 	}
7675 	kmem_free(devname, devname_len);
7676 }
7677 
7678 void
7679 pm_cfb_rele(void)
7680 {
7681 	mutex_enter(&pm_cfb_lock);
7682 	/*
7683 	 * this call isn't using the console any  more, it is ok to take it
7684 	 * down if the count goes to 0
7685 	 */
7686 	cfb_inuse--;
7687 	mutex_exit(&pm_cfb_lock);
7688 }
7689 
7690 /*
7691  * software interrupt handler for fbpm; this function exists because we can't
7692  * bring up the frame buffer power from above lock level.  So if we need to,
7693  * we instead schedule a softint that runs this routine and takes us into
7694  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7695  */
7696 static uint_t
7697 pm_cfb_softint(caddr_t int_handler_arg)
7698 {
7699 	_NOTE(ARGUNUSED(int_handler_arg))
7700 	int rval = DDI_INTR_UNCLAIMED;
7701 
7702 	mutex_enter(&pm_cfb_lock);
7703 	if (pm_soft_pending) {
7704 		mutex_exit(&pm_cfb_lock);
7705 		debug_enter((char *)NULL);
7706 		/* acquired in debug_enter before calling pm_cfb_trigger */
7707 		pm_cfb_rele();
7708 		mutex_enter(&pm_cfb_lock);
7709 		pm_soft_pending = 0;
7710 		mutex_exit(&pm_cfb_lock);
7711 		rval = DDI_INTR_CLAIMED;
7712 	} else
7713 		mutex_exit(&pm_cfb_lock);
7714 
7715 	return (rval);
7716 }
7717 
7718 void
7719 pm_cfb_setup_intr(void)
7720 {
7721 	PMD_FUNC(pmf, "cfb_setup_intr")
7722 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7723 	void pm_cfb_check_and_powerup(void);
7724 
7725 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7726 #ifdef PMDDEBUG
7727 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7728 #endif
7729 
7730 	if (!stdout_is_framebuffer) {
7731 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7732 		return;
7733 	}
7734 
7735 	/*
7736 	 * setup software interrupt handler
7737 	 */
7738 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7739 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7740 		panic("pm: unable to register soft intr.");
7741 
7742 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7743 }
7744 
7745 /*
7746  * Checks to see if it is safe to write to the console wrt power management
7747  * (i.e. if the console is a framebuffer, then it must be at full power)
7748  * returns 1 when power is off (power-up is needed)
7749  * returns 0 when power is on (power-up not needed)
7750  */
7751 int
7752 pm_cfb_check_and_hold(void)
7753 {
7754 	/*
7755 	 * cfb_dip is set iff console is a power manageable frame buffer
7756 	 * device
7757 	 */
7758 	extern int modrootloaded;
7759 
7760 	mutex_enter(&pm_cfb_lock);
7761 	cfb_inuse++;
7762 	ASSERT(cfb_inuse);	/* wrap? */
7763 	if (modrootloaded && cfb_dip) {
7764 		/*
7765 		 * don't power down the frame buffer, the prom is using it
7766 		 */
7767 		if (pm_cfb_comps_off) {
7768 			mutex_exit(&pm_cfb_lock);
7769 			return (1);
7770 		}
7771 	}
7772 	mutex_exit(&pm_cfb_lock);
7773 	return (0);
7774 }
7775 
7776 /*
7777  * turn on cfb power (which is known to be off).
7778  * Must be called below lock level!
7779  */
7780 void
7781 pm_cfb_powerup(void)
7782 {
7783 	pm_info_t *info;
7784 	int norm;
7785 	int ccount, ci;
7786 	int unused;
7787 #ifdef DEBUG
7788 	/*
7789 	 * Can't reenter prom_prekern, so suppress pm debug messages
7790 	 * (still go to circular buffer).
7791 	 */
7792 	mutex_enter(&pm_debug_lock);
7793 	pm_divertdebug++;
7794 	mutex_exit(&pm_debug_lock);
7795 #endif
7796 	info = PM_GET_PM_INFO(cfb_dip);
7797 	ASSERT(info);
7798 
7799 	ccount = PM_NUMCMPTS(cfb_dip);
7800 	for (ci = 0; ci < ccount; ci++) {
7801 		norm = pm_get_normal_power(cfb_dip, ci);
7802 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7803 		    PM_CANBLOCK_BYPASS, 0, &unused);
7804 	}
7805 #ifdef DEBUG
7806 	mutex_enter(&pm_debug_lock);
7807 	pm_divertdebug--;
7808 	mutex_exit(&pm_debug_lock);
7809 #endif
7810 }
7811 
7812 /*
7813  * Check if the console framebuffer is powered up.  If not power it up.
7814  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7815  * must be released by calling pm_cfb_rele when the console fb operation
7816  * is completed.
7817  */
7818 void
7819 pm_cfb_check_and_powerup(void)
7820 {
7821 	if (pm_cfb_check_and_hold())
7822 		pm_cfb_powerup();
7823 }
7824 
7825 /*
7826  * Trigger a low level interrupt to power up console frame buffer.
7827  */
7828 void
7829 pm_cfb_trigger(void)
7830 {
7831 	if (cfb_dip == NULL)
7832 		return;
7833 
7834 	mutex_enter(&pm_cfb_lock);
7835 	/*
7836 	 * If machine appears to be hung, pulling the keyboard connector of
7837 	 * the console will cause a high level interrupt and go to debug_enter.
7838 	 * But, if the fb is powered down, this routine will be called to bring
7839 	 * it up (by generating a softint to do the work).  If soft interrupts
7840 	 * are not running, and the keyboard connector is pulled again, the
7841 	 * following code detects this condition and calls panic which allows
7842 	 * the fb to be brought up from high level.
7843 	 *
7844 	 * If two nearly simultaneous calls to debug_enter occur (both from
7845 	 * high level) the code described above will cause a panic.
7846 	 */
7847 	if (lbolt <= pm_soft_pending) {
7848 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7849 		panic(panicstr);	/* does a power up at any intr level */
7850 		/* NOTREACHED */
7851 	}
7852 	pm_soft_pending = lbolt;
7853 	mutex_exit(&pm_cfb_lock);
7854 	ddi_trigger_softintr(pm_soft_id);
7855 }
7856 
7857 major_t
7858 pm_path_to_major(char *path)
7859 {
7860 	PMD_FUNC(pmf, "path_to_major")
7861 	char *np, *ap, *bp;
7862 	major_t ret;
7863 	size_t len;
7864 	static major_t i_path_to_major(char *, char *);
7865 
7866 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7867 
7868 	np = strrchr(path, '/');
7869 	if (np != NULL)
7870 		np++;
7871 	else
7872 		np = path;
7873 	len = strlen(np) + 1;
7874 	bp = kmem_alloc(len, KM_SLEEP);
7875 	(void) strcpy(bp, np);
7876 	if ((ap = strchr(bp, '@')) != NULL) {
7877 		*ap = '\0';
7878 	}
7879 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7880 	ret = i_path_to_major(path, np);
7881 	kmem_free(bp, len);
7882 	return (ret);
7883 }
7884 
7885 #ifdef DEBUG
7886 #ifndef sparc
7887 clock_t pt_sleep = 1;
7888 #endif
7889 
7890 char	*pm_msgp;
7891 char	*pm_bufend;
7892 char	*pm_msgbuf = NULL;
7893 int	pm_logpages = 0x100;
7894 #include <sys/sunldi.h>
7895 #include <sys/uio.h>
7896 clock_t	pm_log_sleep = 1000;
7897 int	pm_extra_cr = 1;
7898 volatile int pm_tty = 1;
7899 
7900 #define	PMLOGPGS	pm_logpages
7901 
7902 #if defined(__x86)
7903 void pm_printf(char *s);
7904 #endif
7905 
7906 /*PRINTFLIKE1*/
7907 void
7908 pm_log(const char *fmt, ...)
7909 {
7910 	va_list adx;
7911 	size_t size;
7912 
7913 	mutex_enter(&pm_debug_lock);
7914 	if (pm_msgbuf == NULL) {
7915 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7916 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7917 		pm_msgp = pm_msgbuf;
7918 	}
7919 	va_start(adx, fmt);
7920 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7921 	va_end(adx);
7922 	va_start(adx, fmt);
7923 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7924 		bzero(pm_msgp, pm_bufend - pm_msgp);
7925 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7926 		if (!pm_divertdebug)
7927 			prom_printf("%s", pm_msgp);
7928 #if defined(__x86)
7929 		if (pm_tty) {
7930 			pm_printf(pm_msgp);
7931 			if (pm_extra_cr)
7932 				pm_printf("\r");
7933 		}
7934 #endif
7935 		pm_msgp = pm_msgbuf + size;
7936 	} else {
7937 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7938 #if defined(__x86)
7939 		if (pm_tty) {
7940 			pm_printf(pm_msgp);
7941 			if (pm_extra_cr)
7942 				pm_printf("\r");
7943 		}
7944 #endif
7945 		if (!pm_divertdebug)
7946 			prom_printf("%s", pm_msgp);
7947 		pm_msgp += size;
7948 	}
7949 	va_end(adx);
7950 	mutex_exit(&pm_debug_lock);
7951 	drv_usecwait((clock_t)pm_log_sleep);
7952 }
7953 #endif	/* DEBUG */
7954 
7955 /*
7956  * We want to save the state of any directly pm'd devices over the suspend/
7957  * resume process so that we can put them back the way the controlling
7958  * process left them.
7959  */
7960 void
7961 pm_save_direct_levels(void)
7962 {
7963 	pm_processes_stopped = 1;
7964 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7965 }
7966 
7967 static int
7968 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7969 {
7970 	_NOTE(ARGUNUSED(arg))
7971 	int i;
7972 	int *ip;
7973 	pm_info_t *info = PM_GET_PM_INFO(dip);
7974 
7975 	if (!info)
7976 		return (DDI_WALK_CONTINUE);
7977 
7978 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7979 		if (PM_NUMCMPTS(dip) > 2) {
7980 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7981 			    sizeof (int), KM_SLEEP);
7982 			ip = info->pmi_lp;
7983 		} else {
7984 			ip = info->pmi_levels;
7985 		}
7986 		/* autopm and processes are stopped, ok not to lock power */
7987 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7988 			*ip++ = PM_CURPOWER(dip, i);
7989 		/*
7990 		 * There is a small window between stopping the
7991 		 * processes and setting pm_processes_stopped where
7992 		 * a driver could get hung up in a pm_raise_power()
7993 		 * call.  Free any such driver now.
7994 		 */
7995 		pm_proceed(dip, PMP_RELEASE, -1, -1);
7996 	}
7997 
7998 	return (DDI_WALK_CONTINUE);
7999 }
8000 
8001 void
8002 pm_restore_direct_levels(void)
8003 {
8004 	/*
8005 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
8006 	 * threads failed) then we don't want to try to restore them
8007 	 */
8008 	if (!pm_processes_stopped)
8009 		return;
8010 
8011 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
8012 	pm_processes_stopped = 0;
8013 }
8014 
8015 static int
8016 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
8017 {
8018 	_NOTE(ARGUNUSED(arg))
8019 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
8020 	int i, nc, result;
8021 	int *ip;
8022 
8023 	pm_info_t *info = PM_GET_PM_INFO(dip);
8024 	if (!info)
8025 		return (DDI_WALK_CONTINUE);
8026 
8027 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8028 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
8029 			ip = &info->pmi_lp[nc - 1];
8030 		} else {
8031 			ip = &info->pmi_levels[nc - 1];
8032 		}
8033 		/*
8034 		 * Because fb drivers fail attempts to turn off the
8035 		 * fb when the monitor is on, but treat a request to
8036 		 * turn on the monitor as a request to turn on the
8037 		 * fb too, we process components in descending order
8038 		 * Because autopm is disabled and processes aren't
8039 		 * running, it is ok to examine current power outside
8040 		 * of the power lock
8041 		 */
8042 		for (i = nc - 1; i >= 0; i--, ip--) {
8043 			if (PM_CURPOWER(dip, i) == *ip)
8044 				continue;
8045 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
8046 			    PM_CANBLOCK_BYPASS, 0, &result) != DDI_SUCCESS) {
8047 				cmn_err(CE_WARN, "cpr: unable "
8048 				    "to restore power level of "
8049 				    "component %d of directly "
8050 				    "power manged device %s@%s"
8051 				    " to %d",
8052 				    i, PM_NAME(dip),
8053 				    PM_ADDR(dip), *ip);
8054 				PMD(PMD_FAIL, ("%s: failed to restore "
8055 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
8056 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
8057 				    PM_CURPOWER(dip, i), *ip, result))
8058 			}
8059 		}
8060 		if (nc > 2) {
8061 			kmem_free(info->pmi_lp, nc * sizeof (int));
8062 			info->pmi_lp = NULL;
8063 		}
8064 	}
8065 	return (DDI_WALK_CONTINUE);
8066 }
8067 
8068 /*
8069  * Stolen from the bootdev module
8070  * attempt to convert a path to a major number
8071  */
8072 static major_t
8073 i_path_to_major(char *path, char *leaf_name)
8074 {
8075 	extern major_t path_to_major(char *pathname);
8076 	major_t maj;
8077 
8078 	if ((maj = path_to_major(path)) == DDI_MAJOR_T_NONE) {
8079 		maj = ddi_name_to_major(leaf_name);
8080 	}
8081 
8082 	return (maj);
8083 }
8084 
8085 /*
8086  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
8087  * An entry in the list means that the device is detached, so we need to
8088  * adjust its ancestors as if they had just seen this attach, and any detached
8089  * ancestors need to have their list entries adjusted.
8090  */
8091 void
8092 pm_driver_removed(major_t major)
8093 {
8094 	static void i_pm_driver_removed(major_t major);
8095 
8096 	/*
8097 	 * Serialize removal of drivers. This is to keep ancestors of
8098 	 * a node that is being deleted from getting deleted and added back
8099 	 * with different counters.
8100 	 */
8101 	mutex_enter(&pm_remdrv_lock);
8102 	i_pm_driver_removed(major);
8103 	mutex_exit(&pm_remdrv_lock);
8104 }
8105 
8106 /*
8107  * This routine is called recursively by pm_noinvol_process_ancestors()
8108  */
8109 static void
8110 i_pm_driver_removed(major_t major)
8111 {
8112 	PMD_FUNC(pmf, "driver_removed")
8113 	static void adjust_ancestors(char *, int);
8114 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
8115 	static void pm_noinvol_process_ancestors(char *);
8116 	pm_noinvol_t *ip, *pp = NULL;
8117 	int wasvolpmd;
8118 	ASSERT(major != DDI_MAJOR_T_NONE);
8119 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
8120 again:
8121 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8122 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
8123 		if (major != ip->ni_major)
8124 			continue;
8125 		/*
8126 		 * If it is an ancestor of no-invol node, which is
8127 		 * not removed, skip it. This is to cover the case of
8128 		 * ancestor removed without removing its descendants.
8129 		 */
8130 		if (pm_is_noinvol_ancestor(ip)) {
8131 			ip->ni_flags |= PMC_DRIVER_REMOVED;
8132 			continue;
8133 		}
8134 		wasvolpmd = ip->ni_wasvolpmd;
8135 		/*
8136 		 * remove the entry from the list
8137 		 */
8138 		if (pp) {
8139 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8140 			    pmf, ip->ni_path, pp->ni_path))
8141 			pp->ni_next = ip->ni_next;
8142 		} else {
8143 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8144 			    ip->ni_path))
8145 			ASSERT(pm_noinvol_head == ip);
8146 			pm_noinvol_head = ip->ni_next;
8147 		}
8148 		rw_exit(&pm_noinvol_rwlock);
8149 		adjust_ancestors(ip->ni_path, wasvolpmd);
8150 		/*
8151 		 * Had an ancestor been removed before this node, it would have
8152 		 * been skipped. Adjust the no-invol counters for such skipped
8153 		 * ancestors.
8154 		 */
8155 		pm_noinvol_process_ancestors(ip->ni_path);
8156 		kmem_free(ip->ni_path, ip->ni_size);
8157 		kmem_free(ip, sizeof (*ip));
8158 		goto again;
8159 	}
8160 	rw_exit(&pm_noinvol_rwlock);
8161 }
8162 
8163 /*
8164  * returns 1, if *aip is a ancestor of a no-invol node
8165  *	   0, otherwise
8166  */
8167 static int
8168 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8169 {
8170 	pm_noinvol_t *ip;
8171 
8172 	ASSERT(strlen(aip->ni_path) != 0);
8173 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8174 		if (ip == aip)
8175 			continue;
8176 		/*
8177 		 * To be an ancestor, the path must be an initial substring of
8178 		 * the descendent, and end just before a '/' in the
8179 		 * descendent's path.
8180 		 */
8181 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8182 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8183 			return (1);
8184 	}
8185 	return (0);
8186 }
8187 
8188 #define	PM_MAJOR(dip) ddi_name_to_major(ddi_binding_name(dip))
8189 /*
8190  * scan through the pm_noinvolpm list adjusting ancestors of the current
8191  * node;  Modifies string *path.
8192  */
8193 static void
8194 adjust_ancestors(char *path, int wasvolpmd)
8195 {
8196 	PMD_FUNC(pmf, "adjust_ancestors")
8197 	char *cp;
8198 	pm_noinvol_t *lp;
8199 	pm_noinvol_t *pp = NULL;
8200 	major_t locked = DDI_MAJOR_T_NONE;
8201 	dev_info_t *dip;
8202 	char	*pathbuf;
8203 	size_t pathbuflen = strlen(path) + 1;
8204 
8205 	/*
8206 	 * First we look up the ancestor's dip.  If we find it, then we
8207 	 * adjust counts up the tree
8208 	 */
8209 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8210 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8211 	(void) strcpy(pathbuf, path);
8212 	cp = strrchr(pathbuf, '/');
8213 	if (cp == NULL)	{
8214 		/* if no ancestors, then nothing to do */
8215 		kmem_free(pathbuf, pathbuflen);
8216 		return;
8217 	}
8218 	*cp = '\0';
8219 	dip = pm_name_to_dip(pathbuf, 1);
8220 	if (dip != NULL) {
8221 		locked = PM_MAJOR(dip);
8222 
8223 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8224 		    path, dip);
8225 
8226 		if (locked != DDI_MAJOR_T_NONE)
8227 			ddi_release_devi(dip);
8228 	} else {
8229 		char *apath;
8230 		size_t len = strlen(pathbuf) + 1;
8231 		int  lock_held = 1;
8232 
8233 		/*
8234 		 * Now check for ancestors that exist only in the list
8235 		 */
8236 		apath = kmem_alloc(len, KM_SLEEP);
8237 		(void) strcpy(apath, pathbuf);
8238 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8239 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8240 			/*
8241 			 * This can only happen once.  Since we have to drop
8242 			 * the lock, we need to extract the relevant info.
8243 			 */
8244 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8245 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8246 				    lp->ni_path, lp->ni_noinvolpm,
8247 				    lp->ni_noinvolpm - 1))
8248 				lp->ni_noinvolpm--;
8249 				if (wasvolpmd && lp->ni_volpmd) {
8250 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8251 					    "%d\n", pmf, lp->ni_path,
8252 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8253 					lp->ni_volpmd--;
8254 				}
8255 				/*
8256 				 * remove the entry from the list, if there
8257 				 * are no more no-invol descendants and node
8258 				 * itself is not a no-invol node.
8259 				 */
8260 				if (!(lp->ni_noinvolpm ||
8261 				    (lp->ni_flags & PMC_NO_INVOL))) {
8262 					ASSERT(lp->ni_volpmd == 0);
8263 					if (pp) {
8264 						PMD(PMD_NOINVOL, ("%s: freeing "
8265 						    "%s, prev is %s\n", pmf,
8266 						    lp->ni_path, pp->ni_path))
8267 						pp->ni_next = lp->ni_next;
8268 					} else {
8269 						PMD(PMD_NOINVOL, ("%s: free %s "
8270 						    "head\n", pmf, lp->ni_path))
8271 						ASSERT(pm_noinvol_head == lp);
8272 						pm_noinvol_head = lp->ni_next;
8273 					}
8274 					lock_held = 0;
8275 					rw_exit(&pm_noinvol_rwlock);
8276 					adjust_ancestors(apath, wasvolpmd);
8277 					/* restore apath */
8278 					(void) strcpy(apath, pathbuf);
8279 					kmem_free(lp->ni_path, lp->ni_size);
8280 					kmem_free(lp, sizeof (*lp));
8281 				}
8282 				break;
8283 			}
8284 		}
8285 		if (lock_held)
8286 			rw_exit(&pm_noinvol_rwlock);
8287 		adjust_ancestors(apath, wasvolpmd);
8288 		kmem_free(apath, len);
8289 	}
8290 	kmem_free(pathbuf, pathbuflen);
8291 }
8292 
8293 /*
8294  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8295  * which were skipped even though their drivers were removed.
8296  */
8297 static void
8298 pm_noinvol_process_ancestors(char *path)
8299 {
8300 	pm_noinvol_t *lp;
8301 
8302 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8303 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8304 		if (strstr(path, lp->ni_path) &&
8305 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8306 			rw_exit(&pm_noinvol_rwlock);
8307 			i_pm_driver_removed(lp->ni_major);
8308 			return;
8309 		}
8310 	}
8311 	rw_exit(&pm_noinvol_rwlock);
8312 }
8313 
8314 /*
8315  * Returns true if (detached) device needs to be kept up because it exported the
8316  * "no-involuntary-power-cycles" property or we're pretending it did (console
8317  * fb case) or it is an ancestor of such a device and has used up the "one
8318  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8319  * upon detach.  In any event, we need an exact hit on the path or we return
8320  * false.
8321  */
8322 int
8323 pm_noinvol_detached(char *path)
8324 {
8325 	PMD_FUNC(pmf, "noinvol_detached")
8326 	pm_noinvol_t *ip;
8327 	int ret = 0;
8328 
8329 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8330 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8331 		if (strcmp(path, ip->ni_path) == 0) {
8332 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8333 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8334 				    "%s\n", pmf, path))
8335 				ret = 1;
8336 				break;
8337 			}
8338 #ifdef	DEBUG
8339 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8340 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8341 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8342 				    path))
8343 #endif
8344 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8345 			break;
8346 		}
8347 	}
8348 	rw_exit(&pm_noinvol_rwlock);
8349 	return (ret);
8350 }
8351 
8352 int
8353 pm_is_cfb(dev_info_t *dip)
8354 {
8355 	return (dip == cfb_dip);
8356 }
8357 
8358 #ifdef	DEBUG
8359 /*
8360  * Return true if all components of the console frame buffer are at
8361  * "normal" power, i.e., fully on.  For the case where the console is not
8362  * a framebuffer, we also return true
8363  */
8364 int
8365 pm_cfb_is_up(void)
8366 {
8367 	return (pm_cfb_comps_off == 0);
8368 }
8369 #endif
8370 
8371 /*
8372  * Preventing scan from powering down the node by incrementing the
8373  * kidsupcnt.
8374  */
8375 void
8376 pm_hold_power(dev_info_t *dip)
8377 {
8378 	e_pm_hold_rele_power(dip, 1);
8379 }
8380 
8381 /*
8382  * Releasing the hold by decrementing the kidsupcnt allowing scan
8383  * to power down the node if all conditions are met.
8384  */
8385 void
8386 pm_rele_power(dev_info_t *dip)
8387 {
8388 	e_pm_hold_rele_power(dip, -1);
8389 }
8390 
8391 /*
8392  * A wrapper of pm_all_to_normal() to power up a dip
8393  * to its normal level
8394  */
8395 int
8396 pm_powerup(dev_info_t *dip)
8397 {
8398 	PMD_FUNC(pmf, "pm_powerup")
8399 
8400 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8401 	ASSERT(!(servicing_interrupt()));
8402 
8403 	/*
8404 	 * in case this node is not already participating pm
8405 	 */
8406 	if (!PM_GET_PM_INFO(dip)) {
8407 		if (!DEVI_IS_ATTACHING(dip))
8408 			return (DDI_SUCCESS);
8409 		if (pm_start(dip) != DDI_SUCCESS)
8410 			return (DDI_FAILURE);
8411 		if (!PM_GET_PM_INFO(dip))
8412 			return (DDI_SUCCESS);
8413 	}
8414 
8415 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8416 }
8417 
8418 int
8419 pm_rescan_walk(dev_info_t *dip, void *arg)
8420 {
8421 	_NOTE(ARGUNUSED(arg))
8422 
8423 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8424 		return (DDI_WALK_CONTINUE);
8425 
8426 	/*
8427 	 * Currently pm_cpr_callb/resume code is the only caller
8428 	 * and it needs to make sure that stopped scan get
8429 	 * reactivated. Otherwise, rescan walk needn't reactive
8430 	 * stopped scan.
8431 	 */
8432 	pm_scan_init(dip);
8433 
8434 	(void) pm_rescan(dip);
8435 	return (DDI_WALK_CONTINUE);
8436 }
8437 
8438 static dev_info_t *
8439 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8440 {
8441 	dev_info_t *wdip, *pdip;
8442 
8443 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8444 		pdip = ddi_get_parent(wdip);
8445 		if (pdip == dip)
8446 			return (wdip);
8447 	}
8448 	return (NULL);
8449 }
8450 
8451 int
8452 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8453     void *arg, void *result)
8454 {
8455 	PMD_FUNC(pmf, "bp_bus_power")
8456 	dev_info_t	*cdip;
8457 	pm_info_t	*cinfo;
8458 	pm_bp_child_pwrchg_t	*bpc;
8459 	pm_sp_misc_t		*pspm;
8460 	pm_bp_nexus_pwrup_t *bpn;
8461 	pm_bp_child_pwrchg_t new_bpc;
8462 	pm_bp_noinvol_t *bpi;
8463 	dev_info_t *tdip;
8464 	char *pathbuf;
8465 	int		ret = DDI_SUCCESS;
8466 	int		errno = 0;
8467 	pm_component_t *cp;
8468 
8469 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8470 	    pm_decode_op(op)))
8471 	switch (op) {
8472 	case BUS_POWER_CHILD_PWRCHG:
8473 		bpc = (pm_bp_child_pwrchg_t *)arg;
8474 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8475 		tdip = bpc->bpc_dip;
8476 		cdip = pm_get_next_descendent(dip, tdip);
8477 		cinfo = PM_GET_PM_INFO(cdip);
8478 		if (cdip != tdip) {
8479 			/*
8480 			 * If the node is an involved parent, it needs to
8481 			 * power up the node as it is needed.  There is nothing
8482 			 * else the framework can do here.
8483 			 */
8484 			if (PM_WANTS_NOTIFICATION(cdip)) {
8485 				PMD(PMD_SET, ("%s: call bus_power for "
8486 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8487 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8488 				    impl_arg, op, arg, result));
8489 			}
8490 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8491 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8492 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8493 			/*
8494 			 * we presume that the parent needs to be up in
8495 			 * order for the child to change state (either
8496 			 * because it must already be on if the child is on
8497 			 * (and the pm_all_to_normal_nexus() will be a nop)
8498 			 * or because it will need to be on for the child
8499 			 * to come on; so we make the call regardless
8500 			 */
8501 			pm_hold_power(cdip);
8502 			if (cinfo) {
8503 				pm_canblock_t canblock = pspm->pspm_canblock;
8504 				ret = pm_all_to_normal_nexus(cdip, canblock);
8505 				if (ret != DDI_SUCCESS) {
8506 					pm_rele_power(cdip);
8507 					return (ret);
8508 				}
8509 			}
8510 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8511 			    PM_DEVICE(cdip)))
8512 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8513 			    result);
8514 			pm_rele_power(cdip);
8515 		} else {
8516 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8517 			    result);
8518 		}
8519 		return (ret);
8520 
8521 	case BUS_POWER_NEXUS_PWRUP:
8522 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8523 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8524 
8525 		if (!e_pm_valid_info(dip, NULL) ||
8526 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8527 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8528 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8529 			    pmf, PM_DEVICE(dip)))
8530 			*pspm->pspm_errnop = EIO;
8531 			*(int *)result = DDI_FAILURE;
8532 			return (DDI_FAILURE);
8533 		}
8534 
8535 		ASSERT(bpn->bpn_dip == dip);
8536 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8537 		    PM_DEVICE(dip)))
8538 		new_bpc.bpc_dip = dip;
8539 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8540 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8541 		new_bpc.bpc_comp = bpn->bpn_comp;
8542 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8543 		new_bpc.bpc_nlevel = bpn->bpn_level;
8544 		new_bpc.bpc_private = bpn->bpn_private;
8545 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8546 		    PM_LEVEL_UPONLY;
8547 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8548 		    &errno;
8549 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8550 		    (void *)&new_bpc, result);
8551 		kmem_free(pathbuf, MAXPATHLEN);
8552 		return (ret);
8553 
8554 	case BUS_POWER_NOINVOL:
8555 		bpi = (pm_bp_noinvol_t *)arg;
8556 		tdip = bpi->bpni_dip;
8557 		cdip = pm_get_next_descendent(dip, tdip);
8558 
8559 		/* In case of rem_drv, the leaf node has been removed */
8560 		if (cdip == NULL)
8561 			return (DDI_SUCCESS);
8562 
8563 		cinfo = PM_GET_PM_INFO(cdip);
8564 		if (cdip != tdip) {
8565 			if (PM_WANTS_NOTIFICATION(cdip)) {
8566 				PMD(PMD_NOINVOL,
8567 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8568 				    pmf, PM_DEVICE(cdip)))
8569 				ret = (*PM_BUS_POWER_FUNC(cdip))
8570 				    (cdip, NULL, op, arg, result);
8571 				if ((cinfo) && (ret == DDI_SUCCESS))
8572 					(void) pm_noinvol_update_node(cdip,
8573 					    bpi);
8574 				return (ret);
8575 			} else {
8576 				PMD(PMD_NOINVOL,
8577 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8578 				    PM_DEVICE(cdip)))
8579 				ret = pm_busop_bus_power(cdip, NULL, op,
8580 				    arg, result);
8581 				/*
8582 				 * Update the current node.
8583 				 */
8584 				if ((cinfo) && (ret == DDI_SUCCESS))
8585 					(void) pm_noinvol_update_node(cdip,
8586 					    bpi);
8587 				return (ret);
8588 			}
8589 		} else {
8590 			/*
8591 			 * For attach, detach, power up:
8592 			 * Do nothing for leaf node since its
8593 			 * counts are already updated.
8594 			 * For CFB and driver removal, since the
8595 			 * path and the target dip passed in is up to and incl.
8596 			 * the immediate ancestor, need to do the update.
8597 			 */
8598 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8599 			    "reached\n", pmf, PM_DEVICE(cdip)))
8600 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8601 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8602 				(void) pm_noinvol_update_node(cdip, bpi);
8603 			return (DDI_SUCCESS);
8604 		}
8605 
8606 	default:
8607 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8608 		return (DDI_FAILURE);
8609 	}
8610 }
8611 
8612 static int
8613 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8614     void *arg, void *resultp)
8615 {
8616 	_NOTE(ARGUNUSED(impl_arg))
8617 	PMD_FUNC(pmf, "bp_set_power")
8618 	pm_ppm_devlist_t *devl = NULL;
8619 	int clevel, circ;
8620 #ifdef	DEBUG
8621 	int circ_db, ccirc_db;
8622 #endif
8623 	int ret = DDI_SUCCESS;
8624 	dev_info_t *cdip;
8625 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8626 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8627 	pm_canblock_t canblock = pspm->pspm_canblock;
8628 	int scan = pspm->pspm_scan;
8629 	int comp = bpc->bpc_comp;
8630 	int olevel = bpc->bpc_olevel;
8631 	int nlevel = bpc->bpc_nlevel;
8632 	int comps_off_incr = 0;
8633 	dev_info_t *pdip = ddi_get_parent(dip);
8634 	int dodeps;
8635 	int direction = pspm->pspm_direction;
8636 	int *errnop = pspm->pspm_errnop;
8637 #ifdef PMDDEBUG
8638 	char *dir = pm_decode_direction(direction);
8639 #endif
8640 	int *iresp = (int *)resultp;
8641 	time_t	idletime, thresh;
8642 	pm_component_t *cp = PM_CP(dip, comp);
8643 	int work_type;
8644 
8645 	*iresp = DDI_SUCCESS;
8646 	*errnop = 0;
8647 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8648 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8649 	    pm_decode_op(op)))
8650 
8651 	/*
8652 	 * The following set of conditions indicate we are here to handle a
8653 	 * driver's pm_[raise|lower]_power request, but the device is being
8654 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8655 	 * we want to pm_block and pass a status back to the caller based
8656 	 * on whether the controlling process's next activity on the device
8657 	 * matches the current request or not.  This distinction tells
8658 	 * downstream functions to avoid calling into a driver or changing
8659 	 * the framework's power state.  To actually block, we need:
8660 	 *
8661 	 * PM_ISDIRECT(dip)
8662 	 *	no reason to block unless a process is directly controlling dev
8663 	 * direction != PM_LEVEL_EXACT
8664 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8665 	 * !pm_processes_stopped
8666 	 *	don't block if controlling proc already be stopped for cpr
8667 	 * canblock != PM_CANBLOCK_BYPASS
8668 	 *	our caller must not have explicitly prevented blocking
8669 	 */
8670 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8671 		PM_LOCK_DIP(dip);
8672 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8673 			/* releases dip lock */
8674 			ret = pm_busop_match_request(dip, bpc);
8675 			if (ret == EAGAIN) {
8676 				PM_LOCK_DIP(dip);
8677 				continue;
8678 			}
8679 			return (*iresp = ret);
8680 		}
8681 		PM_UNLOCK_DIP(dip);
8682 	}
8683 	/* BC device is never scanned, so power will stick until we are done */
8684 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8685 	    direction != PM_LEVEL_DOWNONLY) {
8686 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8687 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8688 		    canblock, 0, resultp) != DDI_SUCCESS) {
8689 			/* *resultp set by pm_set_power */
8690 			return (DDI_FAILURE);
8691 		}
8692 	}
8693 	if (PM_WANTS_NOTIFICATION(pdip)) {
8694 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8695 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8696 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8697 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8698 		if (ret != DDI_SUCCESS) {
8699 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8700 			    pmf, PM_DEVICE(pdip)))
8701 			return (DDI_FAILURE);
8702 		}
8703 	} else {
8704 		/*
8705 		 * Since we don't know what the actual power level is,
8706 		 * we place a power hold on the parent no matter what
8707 		 * component and level is changing.
8708 		 */
8709 		pm_hold_power(pdip);
8710 	}
8711 	PM_LOCK_POWER(dip, &circ);
8712 	clevel = PM_CURPOWER(dip, comp);
8713 	/*
8714 	 * It's possible that a call was made to pm_update_maxpower()
8715 	 * on another thread before we took the lock above. So, we need to
8716 	 * make sure that this request isn't processed after the
8717 	 * change of power executed on behalf of pm_update_maxpower().
8718 	 */
8719 	if (nlevel > pm_get_normal_power(dip, comp)) {
8720 		PMD(PMD_SET, ("%s: requested level is higher than normal.\n",
8721 		    pmf))
8722 		ret = DDI_FAILURE;
8723 		*iresp = DDI_FAILURE;
8724 		goto post_notify;
8725 	}
8726 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8727 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8728 	    clevel, dir))
8729 	switch (direction) {
8730 	case PM_LEVEL_UPONLY:
8731 		/* Powering up */
8732 		if (clevel >= nlevel) {
8733 			PMD(PMD_SET, ("%s: current level is already "
8734 			    "at or above the requested level.\n", pmf))
8735 			*iresp = DDI_SUCCESS;
8736 			ret = DDI_SUCCESS;
8737 			goto post_notify;
8738 		}
8739 		break;
8740 	case PM_LEVEL_EXACT:
8741 		/* specific level request */
8742 		if (clevel == nlevel && !PM_ISBC(dip)) {
8743 			PMD(PMD_SET, ("%s: current level is already "
8744 			    "at the requested level.\n", pmf))
8745 			*iresp = DDI_SUCCESS;
8746 			ret = DDI_SUCCESS;
8747 			goto post_notify;
8748 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8749 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8750 			if (!pm_cfb_enabled) {
8751 				PMD(PMD_ERROR | PMD_CFB,
8752 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8753 				*errnop = EINVAL;
8754 				*iresp = DDI_FAILURE;
8755 				ret = DDI_FAILURE;
8756 				goto post_notify;
8757 			}
8758 			mutex_enter(&pm_cfb_lock);
8759 			while (cfb_inuse) {
8760 				mutex_exit(&pm_cfb_lock);
8761 				if (delay_sig(1) == EINTR) {
8762 					ret = DDI_FAILURE;
8763 					*iresp = DDI_FAILURE;
8764 					*errnop = EINTR;
8765 					goto post_notify;
8766 				}
8767 				mutex_enter(&pm_cfb_lock);
8768 			}
8769 			mutex_exit(&pm_cfb_lock);
8770 		}
8771 		break;
8772 	case PM_LEVEL_DOWNONLY:
8773 		/* Powering down */
8774 		thresh = cur_threshold(dip, comp);
8775 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8776 		if (scan && ((PM_KUC(dip) != 0) ||
8777 		    (cp->pmc_busycount > 0) ||
8778 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8779 #ifdef	DEBUG
8780 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8781 				PMD(PMD_SET, ("%s: scan failed: "
8782 				    "kidsupcnt != 0\n", pmf))
8783 			if (cp->pmc_busycount > 0)
8784 				PMD(PMD_SET, ("%s: scan failed: "
8785 				    "device become busy\n", pmf))
8786 			if (idletime < thresh)
8787 				PMD(PMD_SET, ("%s: scan failed: device "
8788 				    "hasn't been idle long enough\n", pmf))
8789 #endif
8790 			*iresp = DDI_FAILURE;
8791 			*errnop = EBUSY;
8792 			ret = DDI_FAILURE;
8793 			goto post_notify;
8794 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8795 			PMD(PMD_SET, ("%s: current level is already at "
8796 			    "or below the requested level.\n", pmf))
8797 			*iresp = DDI_SUCCESS;
8798 			ret = DDI_SUCCESS;
8799 			goto post_notify;
8800 		}
8801 		break;
8802 	}
8803 
8804 	if (PM_IS_CFB(dip) && (comps_off_incr =
8805 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8806 		/*
8807 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8808 		 * component from full power.  Remember that we tried to
8809 		 * lower power in case it fails and we need to back out
8810 		 * the adjustment.
8811 		 */
8812 		update_comps_off(comps_off_incr, dip);
8813 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8814 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8815 		    pm_cfb_comps_off))
8816 	}
8817 
8818 	if ((*iresp = power_dev(dip,
8819 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8820 #ifdef DEBUG
8821 		/*
8822 		 * All descendents of this node should already be powered off.
8823 		 */
8824 		if (PM_CURPOWER(dip, comp) == 0) {
8825 			pm_desc_pwrchk_t pdpchk;
8826 			pdpchk.pdpc_dip = dip;
8827 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8828 			ndi_devi_enter(dip, &circ_db);
8829 			for (cdip = ddi_get_child(dip); cdip != NULL;
8830 			    cdip = ddi_get_next_sibling(cdip)) {
8831 				ndi_devi_enter(cdip, &ccirc_db);
8832 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8833 				    (void *)&pdpchk);
8834 				ndi_devi_exit(cdip, ccirc_db);
8835 			}
8836 			ndi_devi_exit(dip, circ_db);
8837 		}
8838 #endif
8839 		/*
8840 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8841 		 * back up to full power.
8842 		 */
8843 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8844 			update_comps_off(comps_off_incr, dip);
8845 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8846 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8847 			    comp, clevel, nlevel, pm_cfb_comps_off))
8848 		}
8849 		dodeps = 0;
8850 		if (POWERING_OFF(clevel, nlevel)) {
8851 			if (PM_ISBC(dip)) {
8852 				dodeps = (comp == 0);
8853 			} else {
8854 				int i;
8855 				dodeps = 1;
8856 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8857 					/* if some component still on */
8858 					if (PM_CURPOWER(dip, i)) {
8859 						dodeps = 0;
8860 						break;
8861 					}
8862 				}
8863 			}
8864 			if (dodeps)
8865 				work_type = PM_DEP_WK_POWER_OFF;
8866 		} else if (POWERING_ON(clevel, nlevel)) {
8867 			if (PM_ISBC(dip)) {
8868 				dodeps = (comp == 0);
8869 			} else {
8870 				int i;
8871 				dodeps = 1;
8872 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8873 					if (i == comp)
8874 						continue;
8875 					if (PM_CURPOWER(dip, i) > 0) {
8876 						dodeps = 0;
8877 						break;
8878 					}
8879 				}
8880 			}
8881 			if (dodeps)
8882 				work_type = PM_DEP_WK_POWER_ON;
8883 		}
8884 
8885 		if (dodeps) {
8886 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8887 
8888 			(void) ddi_pathname(dip, pathbuf);
8889 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8890 			    PM_DEP_NOWAIT, NULL, 0);
8891 			kmem_free(pathbuf, MAXPATHLEN);
8892 		}
8893 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8894 			int old;
8895 
8896 			/* If old power cached during deadlock, use it. */
8897 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8898 			    cp->pmc_phc_pwr : olevel);
8899 			mutex_enter(&pm_rsvp_lock);
8900 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8901 			    old, canblock);
8902 			pm_enqueue_notify_others(&devl, canblock);
8903 			mutex_exit(&pm_rsvp_lock);
8904 		} else {
8905 			pm_ppm_devlist_t *p;
8906 			pm_ppm_devlist_t *next;
8907 			for (p = devl; p != NULL; p = next) {
8908 				next = p->ppd_next;
8909 				kmem_free(p, sizeof (pm_ppm_devlist_t));
8910 			}
8911 			devl = NULL;
8912 		}
8913 
8914 		/*
8915 		 * If we are coming from a scan, don't do it again,
8916 		 * else we can have infinite loops.
8917 		 */
8918 		if (!scan)
8919 			pm_rescan(dip);
8920 	} else {
8921 		/* if we incremented pm_comps_off_count, but failed */
8922 		if (comps_off_incr > 0) {
8923 			update_comps_off(-comps_off_incr, dip);
8924 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8925 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8926 			    comp, clevel, nlevel, pm_cfb_comps_off))
8927 		}
8928 		*errnop = EIO;
8929 	}
8930 
8931 post_notify:
8932 	/*
8933 	 * This thread may have been in deadlock with pm_power_has_changed.
8934 	 * Before releasing power lock, clear the flag which marks this
8935 	 * condition.
8936 	 */
8937 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8938 
8939 	/*
8940 	 * Update the old power level in the bus power structure with the
8941 	 * actual power level before the transition was made to the new level.
8942 	 * Some involved parents depend on this information to keep track of
8943 	 * their children's power transition.
8944 	 */
8945 	if (*iresp != DDI_FAILURE)
8946 		bpc->bpc_olevel = clevel;
8947 
8948 	if (PM_WANTS_NOTIFICATION(pdip)) {
8949 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8950 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8951 		PM_UNLOCK_POWER(dip, circ);
8952 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8953 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8954 		    PM_DEVICE(dip), ret))
8955 	} else {
8956 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8957 		PM_UNLOCK_POWER(dip, circ);
8958 		/*
8959 		 * Now that we know what power transition has occurred
8960 		 * (if any), release the power hold.  Leave the hold
8961 		 * in effect in the case of OFF->ON transition.
8962 		 */
8963 		if (!(clevel == 0 && nlevel > 0 &&
8964 		    (!PM_ISBC(dip) || comp == 0)))
8965 			pm_rele_power(pdip);
8966 		/*
8967 		 * If the power transition was an ON->OFF transition,
8968 		 * remove the power hold from the parent.
8969 		 */
8970 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8971 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8972 			pm_rele_power(pdip);
8973 	}
8974 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8975 		return (DDI_FAILURE);
8976 	else
8977 		return (DDI_SUCCESS);
8978 }
8979 
8980 /*
8981  * If an app (SunVTS or Xsun) has taken control, then block until it
8982  * gives it up or makes the requested power level change, unless
8983  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8984  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8985  */
8986 static int
8987 pm_busop_match_request(dev_info_t *dip, void *arg)
8988 {
8989 	PMD_FUNC(pmf, "bp_match_request")
8990 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8991 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8992 	int comp = bpc->bpc_comp;
8993 	int nlevel = bpc->bpc_nlevel;
8994 	pm_canblock_t canblock = pspm->pspm_canblock;
8995 	int direction = pspm->pspm_direction;
8996 	int clevel, circ;
8997 
8998 	ASSERT(PM_IAM_LOCKING_DIP(dip));
8999 	PM_LOCK_POWER(dip, &circ);
9000 	clevel = PM_CURPOWER(dip, comp);
9001 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
9002 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
9003 	if (direction == PM_LEVEL_UPONLY) {
9004 		if (clevel >= nlevel) {
9005 			PM_UNLOCK_POWER(dip, circ);
9006 			PM_UNLOCK_DIP(dip);
9007 			return (DDI_SUCCESS);
9008 		}
9009 	} else if (clevel == nlevel) {
9010 		PM_UNLOCK_POWER(dip, circ);
9011 		PM_UNLOCK_DIP(dip);
9012 		return (DDI_SUCCESS);
9013 	}
9014 	if (canblock == PM_CANBLOCK_FAIL) {
9015 		PM_UNLOCK_POWER(dip, circ);
9016 		PM_UNLOCK_DIP(dip);
9017 		return (DDI_FAILURE);
9018 	}
9019 	if (canblock == PM_CANBLOCK_BLOCK) {
9020 		/*
9021 		 * To avoid a deadlock, we must not hold the
9022 		 * power lock when we pm_block.
9023 		 */
9024 		PM_UNLOCK_POWER(dip, circ);
9025 		PMD(PMD_SET, ("%s: blocking\n", pmf))
9026 		/* pm_block releases dip lock */
9027 		switch (pm_block(dip, comp, nlevel, clevel)) {
9028 		case PMP_RELEASE:
9029 			return (EAGAIN);
9030 		case PMP_SUCCEED:
9031 			return (DDI_SUCCESS);
9032 		case PMP_FAIL:
9033 			return (DDI_FAILURE);
9034 		}
9035 	} else {
9036 		ASSERT(0);
9037 	}
9038 	_NOTE(NOTREACHED);
9039 	return (DDI_FAILURE);	/* keep gcc happy */
9040 }
9041 
9042 static int
9043 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
9044 {
9045 	PMD_FUNC(pmf, "all_to_normal_nexus")
9046 	int		*normal;
9047 	int		i, ncomps;
9048 	size_t		size;
9049 	int		changefailed = 0;
9050 	int		ret, result = DDI_SUCCESS;
9051 	pm_bp_nexus_pwrup_t	bpn;
9052 	pm_sp_misc_t	pspm;
9053 
9054 	ASSERT(PM_GET_PM_INFO(dip));
9055 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9056 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
9057 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
9058 		return (DDI_FAILURE);
9059 	}
9060 	ncomps = PM_NUMCMPTS(dip);
9061 	for (i = 0; i < ncomps; i++) {
9062 		bpn.bpn_dip = dip;
9063 		bpn.bpn_comp = i;
9064 		bpn.bpn_level = normal[i];
9065 		pspm.pspm_canblock = canblock;
9066 		pspm.pspm_scan = 0;
9067 		bpn.bpn_private = &pspm;
9068 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
9069 		    (void *)&bpn, (void *)&result);
9070 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
9071 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
9072 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
9073 			    i, normal[i], result))
9074 			changefailed++;
9075 		}
9076 	}
9077 	kmem_free(normal, size);
9078 	if (changefailed) {
9079 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
9080 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
9081 		return (DDI_FAILURE);
9082 	}
9083 	return (DDI_SUCCESS);
9084 }
9085 
9086 int
9087 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
9088     dev_info_t *tdip)
9089 {
9090 	PMD_FUNC(pmf, "noinvol_update")
9091 	pm_bp_noinvol_t args;
9092 	int ret;
9093 	int result = DDI_SUCCESS;
9094 
9095 	args.bpni_path = path;
9096 	args.bpni_dip = tdip;
9097 	args.bpni_cmd = subcmd;
9098 	args.bpni_wasvolpmd = wasvolpmd;
9099 	args.bpni_volpmd = volpmd;
9100 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
9101 	    "volpmd %d wasvolpmd %d\n", pmf,
9102 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
9103 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
9104 	    &args, &result);
9105 	return (ret);
9106 }
9107 
9108 void
9109 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
9110 {
9111 	PMD_FUNC(pmf, "noinvol_update_node")
9112 
9113 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9114 	switch (req->bpni_cmd) {
9115 	case PM_BP_NOINVOL_ATTACH:
9116 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
9117 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9118 		    DEVI(dip)->devi_pm_noinvolpm,
9119 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9120 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9121 		PM_LOCK_DIP(dip);
9122 		DEVI(dip)->devi_pm_noinvolpm--;
9123 		if (req->bpni_wasvolpmd) {
9124 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
9125 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9126 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9127 			    DEVI(dip)->devi_pm_volpmd - 1))
9128 			if (DEVI(dip)->devi_pm_volpmd)
9129 				DEVI(dip)->devi_pm_volpmd--;
9130 		}
9131 		PM_UNLOCK_DIP(dip);
9132 		break;
9133 
9134 	case PM_BP_NOINVOL_DETACH:
9135 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
9136 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
9137 		    DEVI(dip)->devi_pm_noinvolpm,
9138 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9139 		PM_LOCK_DIP(dip);
9140 		DEVI(dip)->devi_pm_noinvolpm++;
9141 		if (req->bpni_wasvolpmd) {
9142 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
9143 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9144 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9145 			    DEVI(dip)->devi_pm_volpmd + 1))
9146 			DEVI(dip)->devi_pm_volpmd++;
9147 		}
9148 		PM_UNLOCK_DIP(dip);
9149 		break;
9150 
9151 	case PM_BP_NOINVOL_REMDRV:
9152 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9153 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9154 		    DEVI(dip)->devi_pm_noinvolpm,
9155 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9156 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9157 		PM_LOCK_DIP(dip);
9158 		DEVI(dip)->devi_pm_noinvolpm--;
9159 		if (req->bpni_wasvolpmd) {
9160 			PMD(PMD_NOINVOL,
9161 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9162 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9163 			    DEVI(dip)->devi_pm_volpmd,
9164 			    DEVI(dip)->devi_pm_volpmd - 1))
9165 			/*
9166 			 * A power up could come in between and
9167 			 * clear the volpmd, if that's the case,
9168 			 * volpmd would be clear.
9169 			 */
9170 			if (DEVI(dip)->devi_pm_volpmd)
9171 				DEVI(dip)->devi_pm_volpmd--;
9172 		}
9173 		PM_UNLOCK_DIP(dip);
9174 		break;
9175 
9176 	case PM_BP_NOINVOL_CFB:
9177 		PMD(PMD_NOINVOL,
9178 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9179 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9180 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9181 		PM_LOCK_DIP(dip);
9182 		DEVI(dip)->devi_pm_noinvolpm++;
9183 		PM_UNLOCK_DIP(dip);
9184 		break;
9185 
9186 	case PM_BP_NOINVOL_POWER:
9187 		PMD(PMD_NOINVOL,
9188 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9189 		    pmf, PM_DEVICE(dip),
9190 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9191 		    req->bpni_volpmd))
9192 		PM_LOCK_DIP(dip);
9193 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9194 		PM_UNLOCK_DIP(dip);
9195 		break;
9196 
9197 	default:
9198 		break;
9199 	}
9200 
9201 }
9202 
9203 #ifdef DEBUG
9204 static int
9205 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9206 {
9207 	PMD_FUNC(pmf, "desc_pwrchk")
9208 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9209 	pm_info_t *info = PM_GET_PM_INFO(dip);
9210 	int i;
9211 	/* LINTED */
9212 	int curpwr, ce_level;
9213 
9214 	if (!info)
9215 		return (DDI_WALK_CONTINUE);
9216 
9217 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9218 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9219 		/* LINTED */
9220 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9221 			continue;
9222 		/* E_FUNC_SET_NOT_USED */
9223 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9224 		    CE_WARN;
9225 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9226 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9227 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9228 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9229 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9230 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9231 	}
9232 	return (DDI_WALK_CONTINUE);
9233 }
9234 #endif
9235 
9236 /*
9237  * Record the fact that one thread is borrowing the lock on a device node.
9238  * Use is restricted to the case where the lending thread will block until
9239  * the borrowing thread (always curthread) completes.
9240  */
9241 void
9242 pm_borrow_lock(kthread_t *lender)
9243 {
9244 	lock_loan_t *prev = &lock_loan_head;
9245 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9246 
9247 	cur->pmlk_borrower = curthread;
9248 	cur->pmlk_lender = lender;
9249 	mutex_enter(&pm_loan_lock);
9250 	cur->pmlk_next = prev->pmlk_next;
9251 	prev->pmlk_next = cur;
9252 	mutex_exit(&pm_loan_lock);
9253 }
9254 
9255 /*
9256  * Return the borrowed lock.  A thread can borrow only one.
9257  */
9258 void
9259 pm_return_lock(void)
9260 {
9261 	lock_loan_t *cur;
9262 	lock_loan_t *prev = &lock_loan_head;
9263 
9264 	mutex_enter(&pm_loan_lock);
9265 	ASSERT(prev->pmlk_next != NULL);
9266 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9267 		if (cur->pmlk_borrower == curthread)
9268 			break;
9269 
9270 	ASSERT(cur != NULL);
9271 	prev->pmlk_next = cur->pmlk_next;
9272 	mutex_exit(&pm_loan_lock);
9273 	kmem_free(cur, sizeof (*cur));
9274 }
9275 
9276 #if defined(__x86)
9277 
9278 #define	CPR_RXR	0x1
9279 #define	CPR_TXR	0x20
9280 #define	CPR_DATAREG	0x3f8
9281 #define	CPR_LSTAT	0x3fd
9282 #define	CPR_INTRCTL	0x3f9
9283 
9284 char
9285 pm_getchar(void)
9286 {
9287 	while ((inb(CPR_LSTAT) & CPR_RXR) != CPR_RXR)
9288 		drv_usecwait(10);
9289 
9290 	return (inb(CPR_DATAREG));
9291 
9292 }
9293 
9294 void
9295 pm_putchar(char c)
9296 {
9297 	while ((inb(CPR_LSTAT) & CPR_TXR) == 0)
9298 		drv_usecwait(10);
9299 
9300 	outb(CPR_DATAREG, c);
9301 }
9302 
9303 void
9304 pm_printf(char *s)
9305 {
9306 	while (*s) {
9307 		pm_putchar(*s++);
9308 	}
9309 }
9310 
9311 #endif
9312 
9313 int
9314 pm_ppm_searchlist(pm_searchargs_t *sp)
9315 {
9316 	power_req_t power_req;
9317 	int result = 0;
9318 	/* LINTED */
9319 	int ret;
9320 
9321 	power_req.request_type = PMR_PPM_SEARCH_LIST;
9322 	power_req.req.ppm_search_list_req.searchlist = sp;
9323 	ASSERT(DEVI(ddi_root_node())->devi_pm_ppm);
9324 	ret = pm_ctlops((dev_info_t *)DEVI(ddi_root_node())->devi_pm_ppm,
9325 	    ddi_root_node(), DDI_CTLOPS_POWER, &power_req, &result);
9326 	PMD(PMD_SX, ("pm_ppm_searchlist returns %d, result %d\n",
9327 	    ret, result))
9328 	return (result);
9329 }
9330