xref: /titanic_50/usr/src/uts/common/os/sunpm.c (revision 0e7515250c8395f368aa45fb9acae7c4f8f8b786)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * sunpm.c builds sunpm.o	"power management framework"
28  *	kernel-resident power management code.  Implements power management
29  *	policy
30  *	Assumes: all backwards compat. device components wake up on &
31  *		 the pm_info pointer in dev_info is initially NULL
32  *
33  * PM - (device) Power Management
34  *
35  * Each device may have 0 or more components.  If a device has no components,
36  * then it can't be power managed.  Each component has 2 or more
37  * power states.
38  *
39  * "Backwards Compatible" (bc) devices:
40  * There are two different types of devices from the point of view of this
41  * code.  The original type, left over from the original PM implementation on
42  * the voyager platform are known in this code as "backwards compatible"
43  * devices (PM_ISBC(dip) returns true).
44  * They are recognized by the pm code by the lack of a pm-components property
45  * and a call made by the driver to pm_create_components(9F).
46  * For these devices, component 0 is special, and represents the power state
47  * of the device.  If component 0 is to be set to power level 0 (off), then
48  * the framework must first call into the driver's detach(9E) routine with
49  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
50  * After setting component 0 from 0 to a non-zero power level, a call must be
51  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
52  *
53  * Currently, the only way to get a bc device power managed is via a set of
54  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
55  *
56  * For non-bc devices, the driver describes the components by exporting a
57  * pm-components(9P) property that tells how many components there are,
58  * tells what each component's power state values are, and provides human
59  * readable strings (currently unused) for each component name and power state.
60  * Devices which export pm-components(9P) are automatically power managed
61  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
62  * after parsing power.conf(4)). The exception to this rule is that power
63  * manageable CPU devices may be automatically managed independently of autopm
64  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
65  * ioctls) cpupm. If the CPU devices are not managed independently, then they
66  * are managed by autopm. In either case, for automatically power managed
67  * devices, all components are considered independent of each other, and it is
68  * up to the driver to decide when a transition requires saving or restoring
69  * hardware state.
70  *
71  * Each device component also has a threshold time associated with each power
72  * transition (see power.conf(4)), and a busy/idle state maintained by the
73  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
74  * Components are created idle.
75  *
76  * The PM framework provides several functions:
77  * -implement PM policy as described in power.conf(4)
78  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
79  *  Policies consist of:
80  *    -set threshold values (defaults if none provided by pmconfig)
81  *    -set dependencies among devices
82  *    -enable/disable autopm
83  *    -enable/disable cpupm
84  *    -turn down idle components based on thresholds (if autopm or cpupm is
85  *     enabled) (aka scanning)
86  *    -maintain power states based on dependencies among devices
87  *    -upon request, or when the frame buffer powers off, attempt to turn off
88  *     all components that are idle or become idle over the next (10 sec)
89  *     period in an attempt to get down to an EnergyStar compliant state
90  *    -prevent powering off of a device which exported the
91  *     pm-no-involuntary-power-cycles property without active involvement of
92  *     the device's driver (so no removing power when the device driver is
93  *     not attached)
94  * -provide a mechanism for a device driver to request that a device's component
95  *  be brought back to the power level necessary for the use of the device
96  * -allow a process to directly control the power levels of device components
97  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
98  * -ensure that the console frame buffer is powered up before being referenced
99  *  via prom_printf() or other prom calls that might generate console output
100  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
101  * -provide "backwards compatible" behavior for devices without pm-components
102  *  property
103  *
104  * Scanning:
105  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
106  * component of each managed device to its lowest power based on the threshold
107  * of idleness associated with each transition and the busy/idle state of the
108  * component.
109  *
110  * The actual work of this is done by pm_scan_dev(), which cycles through each
111  * component of a device, checking its idleness against its current threshold,
112  * and calling pm_set_power() as appropriate to change the power level.
113  * This function also indicates when it would next be profitable to scan the
114  * device again, and a new scan is scheduled after that time.
115  *
116  * Dependencies:
117  * It is possible to establish a dependency between the power states of two
118  * otherwise unrelated devices.  This is currently done to ensure that the
119  * cdrom is always up whenever the console framebuffer is up, so that the user
120  * can insert a cdrom and see a popup as a result.
121  *
122  * The dependency terminology used in power.conf(4) is not easy to understand,
123  * so we've adopted a different terminology in the implementation.  We write
124  * of a "keeps up" and a "kept up" device.  A relationship can be established
125  * where one device keeps up another.  That means that if the keepsup device
126  * has any component that is at a non-zero power level, all components of the
127  * "kept up" device must be brought to full power.  This relationship is
128  * asynchronous.  When the keeping device is powered up, a request is queued
129  * to a worker thread to bring up the kept device.  The caller does not wait.
130  * Scan will not turn down a kept up device.
131  *
132  * Direct PM:
133  * A device may be directly power managed by a process.  If a device is
134  * directly pm'd, then it will not be scanned, and dependencies will not be
135  * enforced.  * If a directly pm'd device's driver requests a power change (via
136  * pm_raise_power(9F)), then the request is blocked and notification is sent
137  * to the controlling process, which must issue the requested power change for
138  * the driver to proceed.
139  *
140  */
141 
142 #include <sys/types.h>
143 #include <sys/errno.h>
144 #include <sys/callb.h>		/* callback registration during CPR */
145 #include <sys/conf.h>		/* driver flags and functions */
146 #include <sys/open.h>		/* OTYP_CHR definition */
147 #include <sys/stat.h>		/* S_IFCHR definition */
148 #include <sys/pathname.h>	/* name -> dev_info xlation */
149 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
150 #include <sys/kmem.h>		/* memory alloc stuff */
151 #include <sys/debug.h>
152 #include <sys/archsystm.h>
153 #include <sys/pm.h>
154 #include <sys/ddi.h>
155 #include <sys/sunddi.h>
156 #include <sys/sunndi.h>
157 #include <sys/sunpm.h>
158 #include <sys/epm.h>
159 #include <sys/vfs.h>
160 #include <sys/mode.h>
161 #include <sys/mkdev.h>
162 #include <sys/promif.h>
163 #include <sys/consdev.h>
164 #include <sys/esunddi.h>
165 #include <sys/modctl.h>
166 #include <sys/fs/ufs_fs.h>
167 #include <sys/note.h>
168 #include <sys/taskq.h>
169 #include <sys/bootconf.h>
170 #include <sys/reboot.h>
171 #include <sys/spl.h>
172 #include <sys/disp.h>
173 #include <sys/sobject.h>
174 #include <sys/sunmdi.h>
175 #include <sys/systm.h>
176 #include <sys/cpuvar.h>
177 #include <sys/cyclic.h>
178 #include <sys/uadmin.h>
179 #include <sys/srn.h>
180 
181 
182 /*
183  * PM LOCKING
184  *	The list of locks:
185  * Global pm mutex locks.
186  *
187  * pm_scan_lock:
188  *		It protects the timeout id of the scan thread, and the value
189  *		of autopm_enabled and cpupm.  This lock is not held
190  *		concurrently with any other PM locks.
191  *
192  * pm_clone_lock:	Protects the clone list and count of poll events
193  *		pending for the pm driver.
194  *		Lock ordering:
195  *			pm_clone_lock -> pm_pscc_interest_rwlock,
196  *			pm_clone_lock -> pm_pscc_direct_rwlock.
197  *
198  * pm_rsvp_lock:
199  *		Used to synchronize the data structures used for processes
200  *		to rendezvous with state change information when doing
201  *		direct PM.
202  *		Lock ordering:
203  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
204  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
205  *			pm_rsvp_lock -> pm_clone_lock.
206  *
207  * ppm_lock:	protects the list of registered ppm drivers
208  *		Lock ordering:
209  *			ppm_lock -> ppm driver unit_lock
210  *
211  * pm_compcnt_lock:
212  *		Protects count of components that are not at their lowest
213  *		power level.
214  *		Lock ordering:
215  *			pm_compcnt_lock -> ppm_lock.
216  *
217  * pm_dep_thread_lock:
218  *		Protects work list for pm_dep_thread.  Not taken concurrently
219  *		with any other pm lock.
220  *
221  * pm_remdrv_lock:
222  *		Serializes the operation of removing noinvol data structure
223  *		entries for a branch of the tree when a driver has been
224  *		removed from the system (modctl_rem_major).
225  *		Lock ordering:
226  *			pm_remdrv_lock -> pm_noinvol_rwlock.
227  *
228  * pm_cfb_lock: (High level spin lock)
229  *		Protects the count of how many components of the console
230  *		frame buffer are off (so we know if we have to bring up the
231  *		console as a result of a prom_printf, etc.
232  *		No other locks are taken while holding this lock.
233  *
234  * pm_loan_lock:
235  *		Protects the lock_loan list.  List is used to record that one
236  *		thread has acquired a power lock but has launched another thread
237  *		to complete its processing.  An entry in the list indicates that
238  *		the worker thread can borrow the lock held by the other thread,
239  *		which must block on the completion of the worker.  Use is
240  *		specific to module loading.
241  *		No other locks are taken while holding this lock.
242  *
243  * Global PM rwlocks
244  *
245  * pm_thresh_rwlock:
246  *		Protects the list of thresholds recorded for future use (when
247  *		devices attach).
248  *		Lock ordering:
249  *			pm_thresh_rwlock -> devi_pm_lock
250  *
251  * pm_noinvol_rwlock:
252  *		Protects list of detached nodes that had noinvol registered.
253  *		No other PM locks are taken while holding pm_noinvol_rwlock.
254  *
255  * pm_pscc_direct_rwlock:
256  *		Protects the list that maps devices being directly power
257  *		managed to the processes that manage them.
258  *		Lock ordering:
259  *			pm_pscc_direct_rwlock -> psce_lock
260  *
261  * pm_pscc_interest_rwlock;
262  *		Protects the list that maps state change events to processes
263  *		that want to know about them.
264  *		Lock ordering:
265  *			pm_pscc_interest_rwlock -> psce_lock
266  *
267  * per-dip locks:
268  *
269  * Each node has these per-dip locks, which are only used if the device is
270  * a candidate for power management (e.g. has pm components)
271  *
272  * devi_pm_lock:
273  *		Protects all power management state of the node except for
274  *		power level, which is protected by ndi_devi_enter().
275  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
276  *		Lock ordering:
277  *			devi_pm_lock -> pm_rsvp_lock,
278  *			devi_pm_lock -> pm_dep_thread_lock,
279  *			devi_pm_lock -> pm_noinvol_rwlock,
280  *			devi_pm_lock -> power lock
281  *
282  * power lock (ndi_devi_enter()):
283  *		Since changing power level is possibly a slow operation (30
284  *		seconds to spin up a disk drive), this is locked separately.
285  *		Since a call into the driver to change the power level of one
286  *		component may result in a call back into the framework to change
287  *		the power level of another, this lock allows re-entrancy by
288  *		the same thread (ndi_devi_enter is used for this because
289  *		the USB framework uses ndi_devi_enter in its power entry point,
290  *		and use of any other lock would produce a deadlock.
291  *
292  * devi_pm_busy_lock:
293  *		This lock protects the integrity of the busy count.  It is
294  *		only taken by pm_busy_component() and pm_idle_component and
295  *		some code that adjust the busy time after the timer gets set
296  *		up or after a CPR operation.  It is per-dip to keep from
297  *		single-threading all the disk drivers on a system.
298  *		It could be per component instead, but most devices have
299  *		only one component.
300  *		No other PM locks are taken while holding this lock.
301  *
302  */
303 
304 static int stdout_is_framebuffer;
305 static kmutex_t	e_pm_power_lock;
306 static kmutex_t pm_loan_lock;
307 kmutex_t	pm_scan_lock;
308 callb_id_t	pm_cpr_cb_id;
309 callb_id_t	pm_panic_cb_id;
310 callb_id_t	pm_halt_cb_id;
311 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
312 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
313 
314 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
315 clock_t pm_default_min_scan = PM_DEFAULT_MIN_SCAN;
316 clock_t pm_cpu_min_scan = PM_CPU_MIN_SCAN;
317 
318 #define	PM_MIN_SCAN(dip)	(PM_ISCPU(dip) ? pm_cpu_min_scan : \
319 				    pm_default_min_scan)
320 
321 static int pm_busop_set_power(dev_info_t *,
322     void *, pm_bus_power_op_t, void *, void *);
323 static int pm_busop_match_request(dev_info_t *, void *);
324 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
325 static void e_pm_set_max_power(dev_info_t *, int, int);
326 static int e_pm_get_max_power(dev_info_t *, int);
327 
328 /*
329  * Dependency Processing is done thru a seperate thread.
330  */
331 kmutex_t	pm_dep_thread_lock;
332 kcondvar_t	pm_dep_thread_cv;
333 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
334 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
335 
336 /*
337  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
338  * power managing things in single user mode that have been suppressed via
339  * power.conf entries.  Protected by pm_scan_lock.
340  */
341 int		autopm_enabled;
342 
343 /*
344  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
345  * to define the power management behavior of CPU devices separate from
346  * autopm. Protected by pm_scan_lock.
347  */
348 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
349 
350 /*
351  * Defines the default mode of operation for CPU power management,
352  * either the polling implementation, or the event based dispatcher driven
353  * implementation.
354  */
355 pm_cpupm_t	cpupm_default_mode = PM_CPUPM_EVENT;
356 
357 /*
358  * AutoS3 depends on autopm being enabled, and must be enabled by
359  * PM_START_AUTOS3 command.
360  */
361 int		autoS3_enabled;
362 
363 #if !defined(__sparc)
364 /*
365  * on sparc these live in fillsysinfo.c
366  *
367  * If this variable is non-zero, cpr should return "not supported" when
368  * it is queried even though it would normally be supported on this platform.
369  */
370 int cpr_supported_override;
371 
372 /*
373  * Some platforms may need to support CPR even in the absence of
374  * having the correct platform id information.  If this
375  * variable is non-zero, cpr should proceed even in the absence
376  * of otherwise being qualified.
377  */
378 int cpr_platform_enable = 0;
379 
380 #endif
381 
382 /*
383  * pm_S3_enabled indicates that we believe the platform can support S3,
384  * which we get from pmconfig(1M)
385  */
386 int		pm_S3_enabled;
387 
388 /*
389  * This flag is true while processes are stopped for a checkpoint/resume.
390  * Controlling processes of direct pm'd devices are not available to
391  * participate in power level changes, so we bypass them when this is set.
392  */
393 static int	pm_processes_stopped;
394 
395 #ifdef	DEBUG
396 
397 /*
398  * see common/sys/epm.h for PMD_* values
399  */
400 
401 uint_t		pm_debug = 0;
402 
403 /*
404  * If pm_divertdebug is set, then no prom_printf calls will be made by
405  * PMD(), which will prevent debug output from bringing up the console
406  * frame buffer.  Clearing this variable before setting pm_debug will result
407  * in PMD output going to the console.
408  *
409  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
410  * deadlocks and decremented at the end of pm_set_power()
411  */
412 uint_t		pm_divertdebug = 1;
413 volatile uint_t pm_debug_to_console = 0;
414 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
415 
416 void prdeps(char *);
417 #endif
418 
419 /* Globals */
420 
421 /*
422  * List of recorded thresholds and dependencies
423  */
424 pm_thresh_rec_t *pm_thresh_head;
425 krwlock_t pm_thresh_rwlock;
426 
427 pm_pdr_t *pm_dep_head;
428 static int pm_unresolved_deps = 0;
429 static int pm_prop_deps = 0;
430 
431 /*
432  * List of devices that exported no-involuntary-power-cycles property
433  */
434 pm_noinvol_t *pm_noinvol_head;
435 
436 /*
437  * Locks used in noinvol processing
438  */
439 krwlock_t pm_noinvol_rwlock;
440 kmutex_t pm_remdrv_lock;
441 
442 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
443 int pm_system_idle_threshold;
444 int pm_cpu_idle_threshold;
445 
446 /*
447  * By default nexus has 0 threshold, and depends on its children to keep it up
448  */
449 int pm_default_nexus_threshold = 0;
450 
451 /*
452  * Data structures shared with common/io/pm.c
453  */
454 kmutex_t	pm_clone_lock;
455 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
456 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
457 unsigned char	pm_interest[PM_MAX_CLONE];
458 struct pollhead	pm_pollhead;
459 
460 /*
461  * Data structures shared with common/io/srn.c
462  */
463 kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
464 void (*srn_signal)(int type, int event);
465 int srn_inuse;				/* stop srn detach */
466 
467 extern int	hz;
468 extern char	*platform_module_list[];
469 
470 /*
471  * Wrappers for use in ddi_walk_devs
472  */
473 
474 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
475 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
476 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
477 static int		pm_discard_dep_walk(dev_info_t *, void *);
478 #ifdef DEBUG
479 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
480 #endif
481 
482 /*
483  * Routines for managing noinvol devices
484  */
485 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
486 void			pm_noinvol_update_node(dev_info_t *,
487 			    pm_bp_noinvol_t *req);
488 
489 kmutex_t pm_rsvp_lock;
490 kmutex_t pm_compcnt_lock;
491 krwlock_t pm_pscc_direct_rwlock;
492 krwlock_t pm_pscc_interest_rwlock;
493 
494 #define	PSC_INTEREST	0	/* belongs to interest psc list */
495 #define	PSC_DIRECT	1	/* belongs to direct psc list */
496 
497 pscc_t *pm_pscc_interest;
498 pscc_t *pm_pscc_direct;
499 
500 #define	PM_MAJOR(dip) ddi_driver_major(dip)
501 #define	PM_IS_NEXUS(dip) ((PM_MAJOR(dip) == DDI_MAJOR_T_NONE) ? 0 : \
502 	NEXUS_DRV(devopsp[PM_MAJOR(dip)]))
503 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
504 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
505 
506 #define	PM_INCR_NOTLOWEST(dip) {					\
507 	mutex_enter(&pm_compcnt_lock);					\
508 	if (!PM_IS_NEXUS(dip) ||					\
509 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
510 		if (pm_comps_notlowest == 0)				\
511 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
512 		pm_comps_notlowest++;					\
513 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
514 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
515 	}								\
516 	mutex_exit(&pm_compcnt_lock);					\
517 }
518 #define	PM_DECR_NOTLOWEST(dip) {					\
519 	mutex_enter(&pm_compcnt_lock);					\
520 	if (!PM_IS_NEXUS(dip) ||					\
521 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
522 		ASSERT(pm_comps_notlowest);				\
523 		pm_comps_notlowest--;					\
524 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
525 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
526 		if (pm_comps_notlowest == 0)				\
527 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
528 	}								\
529 	mutex_exit(&pm_compcnt_lock);					\
530 }
531 
532 /*
533  * console frame-buffer power-management is not enabled when
534  * debugging services are present.  to override, set pm_cfb_override
535  * to non-zero.
536  */
537 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
538 kmutex_t pm_cfb_lock;
539 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
540 #ifdef DEBUG
541 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
542 #else
543 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
544 #endif
545 
546 static dev_info_t *cfb_dip = 0;
547 static dev_info_t *cfb_dip_detaching = 0;
548 uint_t cfb_inuse = 0;
549 static ddi_softintr_t pm_soft_id;
550 static clock_t pm_soft_pending;
551 int	pm_scans_disabled = 0;
552 
553 /*
554  * A structure to record the fact that one thread has borrowed a lock held
555  * by another thread.  The context requires that the lender block on the
556  * completion of the borrower.
557  */
558 typedef struct lock_loan {
559 	struct lock_loan	*pmlk_next;
560 	kthread_t		*pmlk_borrower;
561 	kthread_t		*pmlk_lender;
562 	dev_info_t		*pmlk_dip;
563 } lock_loan_t;
564 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
565 
566 #ifdef	DEBUG
567 #ifdef	PMDDEBUG
568 #define	PMD_FUNC(func, name)	char *(func) = (name);
569 #else	/* !PMDDEBUG */
570 #define	PMD_FUNC(func, name)
571 #endif	/* PMDDEBUG */
572 #else	/* !DEBUG */
573 #define	PMD_FUNC(func, name)
574 #endif	/* DEBUG */
575 
576 
577 /*
578  * Must be called before first device (including pseudo) attach
579  */
580 void
581 pm_init_locks(void)
582 {
583 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
584 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
585 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
586 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
587 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
588 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
589 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
590 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
591 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
592 }
593 
594 static boolean_t
595 pm_cpr_callb(void *arg, int code)
596 {
597 	_NOTE(ARGUNUSED(arg))
598 	static int auto_save;
599 	static pm_cpupm_t cpupm_save;
600 	static int pm_reset_timestamps(dev_info_t *, void *);
601 
602 	switch (code) {
603 	case CB_CODE_CPR_CHKPT:
604 		/*
605 		 * Cancel scan or wait for scan in progress to finish
606 		 * Other threads may be trying to restart the scan, so we
607 		 * have to keep at it unil it sticks
608 		 */
609 		mutex_enter(&pm_scan_lock);
610 		ASSERT(!pm_scans_disabled);
611 		pm_scans_disabled = 1;
612 		auto_save = autopm_enabled;
613 		autopm_enabled = 0;
614 		cpupm_save = cpupm;
615 		cpupm = PM_CPUPM_NOTSET;
616 		mutex_exit(&pm_scan_lock);
617 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
618 		break;
619 
620 	case CB_CODE_CPR_RESUME:
621 		ASSERT(!autopm_enabled);
622 		ASSERT(cpupm == PM_CPUPM_NOTSET);
623 		ASSERT(pm_scans_disabled);
624 		pm_scans_disabled = 0;
625 		/*
626 		 * Call pm_reset_timestamps to reset timestamps of each
627 		 * device to the time when the system is resumed so that their
628 		 * idleness can be re-calculated. That's to avoid devices from
629 		 * being powered down right after resume if the system was in
630 		 * suspended mode long enough.
631 		 */
632 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
633 
634 		autopm_enabled = auto_save;
635 		cpupm = cpupm_save;
636 		/*
637 		 * If there is any auto-pm device, get the scanning
638 		 * going. Otherwise don't bother.
639 		 */
640 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
641 		break;
642 	}
643 	return (B_TRUE);
644 }
645 
646 /*
647  * This callback routine is called when there is a system panic.  This function
648  * exists for prototype matching.
649  */
650 static boolean_t
651 pm_panic_callb(void *arg, int code)
652 {
653 	_NOTE(ARGUNUSED(arg, code))
654 	void pm_cfb_check_and_powerup(void);
655 	PMD(PMD_CFB, ("pm_panic_callb\n"))
656 	pm_cfb_check_and_powerup();
657 	return (B_TRUE);
658 }
659 
660 static boolean_t
661 pm_halt_callb(void *arg, int code)
662 {
663 	_NOTE(ARGUNUSED(arg, code))
664 	return (B_TRUE);
665 }
666 
667 /*
668  * This needs to be called after the root and platform drivers are loaded
669  * and be single-threaded with respect to driver attach/detach
670  */
671 void
672 pm_init(void)
673 {
674 	PMD_FUNC(pmf, "pm_init")
675 	char **mod;
676 	extern pri_t minclsyspri;
677 	static void pm_dep_thread(void);
678 
679 	pm_comps_notlowest = 0;
680 	pm_system_idle_threshold = pm_default_idle_threshold;
681 	pm_cpu_idle_threshold = 0;
682 
683 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
684 	    CB_CL_CPR_PM, "pm_cpr");
685 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
686 	    CB_CL_PANIC, "pm_panic");
687 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
688 	    CB_CL_HALT, "pm_halt");
689 
690 	/*
691 	 * Create a thread to do dependency processing.
692 	 */
693 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
694 	    TS_RUN, minclsyspri);
695 
696 	/*
697 	 * loadrootmodules already loaded these ppm drivers, now get them
698 	 * attached so they can claim the root drivers as they attach
699 	 */
700 	for (mod = platform_module_list; *mod; mod++) {
701 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
702 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
703 			    *mod);
704 		} else {
705 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
706 			    ddi_major_to_name(ddi_name_to_major(*mod))))
707 		}
708 	}
709 }
710 
711 /*
712  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
713  * enabled) when device becomes power managed or after a failed detach and
714  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
715  * a CPR resume to get all the devices scanning again.
716  */
717 void
718 pm_scan_init(dev_info_t *dip)
719 {
720 	PMD_FUNC(pmf, "scan_init")
721 	pm_scan_t	*scanp;
722 
723 	ASSERT(!PM_ISBC(dip));
724 
725 	PM_LOCK_DIP(dip);
726 	scanp = PM_GET_PM_SCAN(dip);
727 	if (!scanp) {
728 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
729 		    pmf, PM_DEVICE(dip)))
730 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
731 		DEVI(dip)->devi_pm_scan = scanp;
732 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
733 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
734 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
735 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
736 	}
737 	PM_UNLOCK_DIP(dip);
738 }
739 
740 /*
741  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
742  */
743 void
744 pm_scan_fini(dev_info_t *dip)
745 {
746 	PMD_FUNC(pmf, "scan_fini")
747 	pm_scan_t	*scanp;
748 
749 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
750 	ASSERT(!PM_ISBC(dip));
751 	PM_LOCK_DIP(dip);
752 	scanp = PM_GET_PM_SCAN(dip);
753 	if (!scanp) {
754 		PM_UNLOCK_DIP(dip);
755 		return;
756 	}
757 
758 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
759 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
760 
761 	kmem_free(scanp, sizeof (pm_scan_t));
762 	DEVI(dip)->devi_pm_scan = NULL;
763 	PM_UNLOCK_DIP(dip);
764 }
765 
766 /*
767  * Given a pointer to a component struct, return the current power level
768  * (struct contains index unless it is a continuous level).
769  * Located here in hopes of getting both this and dev_is_needed into the
770  * cache together
771  */
772 static int
773 cur_power(pm_component_t *cp)
774 {
775 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
776 		return (cp->pmc_cur_pwr);
777 
778 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
779 }
780 
781 static char *
782 pm_decode_direction(int direction)
783 {
784 	switch (direction) {
785 	case PM_LEVEL_UPONLY:
786 		return ("up");
787 
788 	case PM_LEVEL_EXACT:
789 		return ("exact");
790 
791 	case PM_LEVEL_DOWNONLY:
792 		return ("down");
793 
794 	default:
795 		return ("INVALID DIRECTION");
796 	}
797 }
798 
799 char *
800 pm_decode_op(pm_bus_power_op_t op)
801 {
802 	switch (op) {
803 	case BUS_POWER_CHILD_PWRCHG:
804 		return ("CHILD_PWRCHG");
805 	case BUS_POWER_NEXUS_PWRUP:
806 		return ("NEXUS_PWRUP");
807 	case BUS_POWER_PRE_NOTIFICATION:
808 		return ("PRE_NOTIFICATION");
809 	case BUS_POWER_POST_NOTIFICATION:
810 		return ("POST_NOTIFICATION");
811 	case BUS_POWER_HAS_CHANGED:
812 		return ("HAS_CHANGED");
813 	case BUS_POWER_NOINVOL:
814 		return ("NOINVOL");
815 	default:
816 		return ("UNKNOWN OP");
817 	}
818 }
819 
820 /*
821  * Returns true if level is a possible (valid) power level for component
822  */
823 int
824 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
825 {
826 	PMD_FUNC(pmf, "e_pm_valid_power")
827 	pm_component_t *cp = PM_CP(dip, cmpt);
828 	int i;
829 	int *ip = cp->pmc_comp.pmc_lvals;
830 	int limit = cp->pmc_comp.pmc_numlevels;
831 
832 	if (level < 0)
833 		return (0);
834 	for (i = 0; i < limit; i++) {
835 		if (level == *ip++)
836 			return (1);
837 	}
838 #ifdef DEBUG
839 	if (pm_debug & PMD_FAIL) {
840 		ip = cp->pmc_comp.pmc_lvals;
841 
842 		for (i = 0; i < limit; i++)
843 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
844 			    pmf, i, *ip++))
845 	}
846 #endif
847 	return (0);
848 }
849 
850 /*
851  * Returns true if device is pm'd (after calling pm_start if need be)
852  */
853 int
854 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
855 {
856 	pm_info_t *info;
857 	static int pm_start(dev_info_t *dip);
858 
859 	/*
860 	 * Check if the device is power managed if not.
861 	 * To make the common case (device is power managed already)
862 	 * fast, we check without the lock.  If device is not already
863 	 * power managed, then we take the lock and the long route through
864 	 * go get it managed.  Devices never go unmanaged until they
865 	 * detach.
866 	 */
867 	info = PM_GET_PM_INFO(dip);
868 	if (!info) {
869 		if (!DEVI_IS_ATTACHING(dip)) {
870 			return (0);
871 		}
872 		if (pm_start(dip) != DDI_SUCCESS) {
873 			return (0);
874 		}
875 		info = PM_GET_PM_INFO(dip);
876 	}
877 	ASSERT(info);
878 	if (infop != NULL)
879 		*infop = info;
880 	return (1);
881 }
882 
883 int
884 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
885 {
886 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
887 		if (cpp != NULL)
888 			*cpp = PM_CP(dip, cmpt);
889 		return (1);
890 	} else {
891 		return (0);
892 	}
893 }
894 
895 /*
896  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
897  */
898 static int
899 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
900 {
901 	PMD_FUNC(pmf, "din")
902 	pm_component_t *cp;
903 	char *pathbuf;
904 	int result;
905 
906 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
907 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
908 	    !e_pm_valid_power(dip, cmpt, level))
909 		return (DDI_FAILURE);
910 
911 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
912 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
913 	    level, cur_power(cp)))
914 
915 	if (pm_set_power(dip, cmpt, level,  direction,
916 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
917 		if (direction == PM_LEVEL_UPONLY) {
918 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
919 			(void) ddi_pathname(dip, pathbuf);
920 			cmn_err(CE_WARN, "Device %s failed to power up.",
921 			    pathbuf);
922 			kmem_free(pathbuf, MAXPATHLEN);
923 		}
924 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
925 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
926 		    pm_decode_direction(direction), level, result))
927 		return (DDI_FAILURE);
928 	}
929 
930 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
931 	    PM_DEVICE(dip)))
932 	pm_rescan(dip);
933 	return (DDI_SUCCESS);
934 }
935 
936 /*
937  * We can get multiple pm_rescan() threads, if one of them discovers
938  * that no scan is running at the moment, it kicks it into action.
939  * Otherwise, it tells the current scanning thread to scan again when
940  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
941  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
942  * thread at a time runs the pm_scan_dev() code.
943  */
944 void
945 pm_rescan(void *arg)
946 {
947 	PMD_FUNC(pmf, "rescan")
948 	dev_info_t	*dip = (dev_info_t *)arg;
949 	pm_info_t	*info;
950 	pm_scan_t	*scanp;
951 	timeout_id_t	scanid;
952 
953 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
954 	PM_LOCK_DIP(dip);
955 	info = PM_GET_PM_INFO(dip);
956 	scanp = PM_GET_PM_SCAN(dip);
957 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
958 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
959 		PM_UNLOCK_DIP(dip);
960 		return;
961 	}
962 	if (scanp->ps_scan_flags & PM_SCANNING) {
963 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
964 		PM_UNLOCK_DIP(dip);
965 		return;
966 	} else if (scanp->ps_scan_id) {
967 		scanid = scanp->ps_scan_id;
968 		scanp->ps_scan_id = 0;
969 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
970 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
971 		PM_UNLOCK_DIP(dip);
972 		(void) untimeout(scanid);
973 		PM_LOCK_DIP(dip);
974 	}
975 
976 	/*
977 	 * Dispatching pm_scan during attach time is risky due to the fact that
978 	 * attach might soon fail and dip dissolved, and panic may happen while
979 	 * attempting to stop scan. So schedule a pm_rescan instead.
980 	 * (Note that if either of the first two terms are true, taskq_dispatch
981 	 * will not be invoked).
982 	 *
983 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
984 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
985 	 * to regulate the dispatching.
986 	 *
987 	 * Scan is stopped before the device is detached (in pm_detaching())
988 	 * but it may get re-started during the post_detach processing if the
989 	 * driver fails to detach.
990 	 */
991 	if (DEVI_IS_ATTACHING(dip) ||
992 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
993 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
994 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
995 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
996 		if (scanp->ps_scan_id) {
997 			scanid = scanp->ps_scan_id;
998 			scanp->ps_scan_id = 0;
999 			PM_UNLOCK_DIP(dip);
1000 			(void) untimeout(scanid);
1001 			PM_LOCK_DIP(dip);
1002 			if (scanp->ps_scan_id) {
1003 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
1004 				    "thread scheduled pm_rescan, scanid %lx\n",
1005 				    pmf, PM_DEVICE(dip),
1006 				    (ulong_t)scanp->ps_scan_id))
1007 				PM_UNLOCK_DIP(dip);
1008 				return;
1009 			}
1010 		}
1011 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1012 		    (scanp->ps_idle_down ? pm_id_ticks :
1013 		    (PM_MIN_SCAN(dip) * hz)));
1014 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1015 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1016 		    (ulong_t)scanp->ps_scan_id))
1017 	} else {
1018 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1019 		    pmf, PM_DEVICE(dip)))
1020 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1021 	}
1022 	PM_UNLOCK_DIP(dip);
1023 }
1024 
1025 void
1026 pm_scan(void *arg)
1027 {
1028 	PMD_FUNC(pmf, "scan")
1029 	dev_info_t	*dip = (dev_info_t *)arg;
1030 	pm_scan_t	*scanp;
1031 	time_t		nextscan;
1032 
1033 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1034 
1035 	PM_LOCK_DIP(dip);
1036 	scanp = PM_GET_PM_SCAN(dip);
1037 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1038 
1039 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1040 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1041 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1042 		PM_UNLOCK_DIP(dip);
1043 		return;
1044 	}
1045 
1046 	if (scanp->ps_idle_down) {
1047 		/*
1048 		 * make sure we remember idledown was in affect until
1049 		 * we've completed the scan
1050 		 */
1051 		PMID_SET_SCANS(scanp->ps_idle_down)
1052 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1053 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1054 	}
1055 
1056 	/* possible having two threads running pm_scan() */
1057 	if (scanp->ps_scan_flags & PM_SCANNING) {
1058 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1059 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1060 		    pmf, PM_DEVICE(dip)))
1061 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1062 		PM_UNLOCK_DIP(dip);
1063 		return;
1064 	}
1065 
1066 	scanp->ps_scan_flags |= PM_SCANNING;
1067 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1068 	do {
1069 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1070 		PM_UNLOCK_DIP(dip);
1071 		nextscan = pm_scan_dev(dip);
1072 		PM_LOCK_DIP(dip);
1073 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1074 
1075 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1076 	scanp->ps_scan_flags &= ~PM_SCANNING;
1077 
1078 	if (scanp->ps_idle_down) {
1079 		scanp->ps_idle_down &= ~PMID_SCANS;
1080 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1081 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1082 	}
1083 
1084 	/* schedule for next idle check */
1085 	if (nextscan != LONG_MAX) {
1086 		if (nextscan > (LONG_MAX / hz))
1087 			nextscan = (LONG_MAX - 1) / hz;
1088 		if (scanp->ps_scan_id) {
1089 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1090 			    "another rescan scheduled scanid(%lx)\n", pmf,
1091 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1092 			PM_UNLOCK_DIP(dip);
1093 			return;
1094 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1095 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1096 			    (clock_t)(nextscan * hz));
1097 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1098 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1099 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1100 		}
1101 	}
1102 	PM_UNLOCK_DIP(dip);
1103 }
1104 
1105 void
1106 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1107 {
1108 	int components = PM_NUMCMPTS(dip);
1109 	int i;
1110 
1111 	ASSERT(components > 0);
1112 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1113 	for (i = 0; i < components; i++) {
1114 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1115 	}
1116 	PM_UNLOCK_BUSY(dip);
1117 }
1118 
1119 /*
1120  * Returns true if device needs to be kept up because it exported the
1121  * "no-involuntary-power-cycles" property or we're pretending it did (console
1122  * fb case) or it is an ancestor of such a device and has used up the "one
1123  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1124  * upon detach
1125  */
1126 int
1127 pm_noinvol(dev_info_t *dip)
1128 {
1129 	PMD_FUNC(pmf, "noinvol")
1130 
1131 	/*
1132 	 * This doesn't change over the life of a driver, so no locking needed
1133 	 */
1134 	if (PM_IS_CFB(dip)) {
1135 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1136 		    pmf, PM_DEVICE(dip)))
1137 		return (1);
1138 	}
1139 	/*
1140 	 * Not an issue if no such kids
1141 	 */
1142 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1143 #ifdef DEBUG
1144 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1145 			dev_info_t *pdip = dip;
1146 			do {
1147 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1148 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1149 				    DEVI(pdip)->devi_pm_noinvolpm,
1150 				    DEVI(pdip)->devi_pm_volpmd))
1151 				pdip = ddi_get_parent(pdip);
1152 			} while (pdip);
1153 		}
1154 #endif
1155 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1156 		return (0);
1157 	}
1158 
1159 	/*
1160 	 * Since we now maintain the counts correct at every node, we no longer
1161 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1162 	 * without the children getting their counts adjusted.
1163 	 */
1164 
1165 #ifdef	DEBUG
1166 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1167 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1168 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1169 		    PM_DEVICE(dip)))
1170 #endif
1171 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1172 }
1173 
1174 /*
1175  * This function performs the actual scanning of the device.
1176  * It attempts to power off the indicated device's components if they have
1177  * been idle and other restrictions are met.
1178  * pm_scan_dev calculates and returns when the next scan should happen for
1179  * this device.
1180  */
1181 time_t
1182 pm_scan_dev(dev_info_t *dip)
1183 {
1184 	PMD_FUNC(pmf, "scan_dev")
1185 	pm_scan_t	*scanp;
1186 	time_t		*timestamp, idletime, now, thresh;
1187 	time_t		timeleft = 0;
1188 #ifdef PMDDEBUG
1189 	int		curpwr;
1190 #endif
1191 	int		i, nxtpwr, pwrndx, unused;
1192 	size_t		size;
1193 	pm_component_t	 *cp;
1194 	dev_info_t	*pdip = ddi_get_parent(dip);
1195 	int		circ;
1196 	static int	cur_threshold(dev_info_t *, int);
1197 	static int	pm_next_lower_power(pm_component_t *, int);
1198 	clock_t		min_scan = pm_default_min_scan;
1199 
1200 	/*
1201 	 * skip attaching device
1202 	 */
1203 	if (DEVI_IS_ATTACHING(dip)) {
1204 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1205 		    pmf, PM_DEVICE(dip), min_scan))
1206 		return (min_scan);
1207 	}
1208 
1209 	PM_LOCK_DIP(dip);
1210 	scanp = PM_GET_PM_SCAN(dip);
1211 	min_scan = PM_MIN_SCAN(dip);
1212 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1213 
1214 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1215 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1216 	    PM_KUC(dip)))
1217 
1218 	/* no scan under the following conditions */
1219 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1220 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1221 	    (PM_KUC(dip) != 0) ||
1222 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1223 		PM_UNLOCK_DIP(dip);
1224 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1225 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1226 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1227 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1228 		    cpupm, PM_KUC(dip),
1229 		    PM_ISDIRECT(dip) ? "is" : "is not",
1230 		    pm_noinvol(dip) ? "is" : "is not"))
1231 		return (LONG_MAX);
1232 	}
1233 	PM_UNLOCK_DIP(dip);
1234 
1235 	if (!ndi_devi_tryenter(pdip, &circ)) {
1236 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1237 		    pmf, PM_DEVICE(pdip)))
1238 		return ((time_t)1);
1239 	}
1240 	now = gethrestime_sec();
1241 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1242 	timestamp = kmem_alloc(size, KM_SLEEP);
1243 	pm_get_timestamps(dip, timestamp);
1244 
1245 	/*
1246 	 * Since we removed support for backwards compatible devices,
1247 	 * (see big comment at top of file)
1248 	 * it is no longer required to deal with component 0 last.
1249 	 */
1250 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1251 		/*
1252 		 * If already off (an optimization, perhaps)
1253 		 */
1254 		cp = PM_CP(dip, i);
1255 		pwrndx = cp->pmc_cur_pwr;
1256 #ifdef PMDDEBUG
1257 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1258 		    PM_LEVEL_UNKNOWN :
1259 		    cp->pmc_comp.pmc_lvals[pwrndx];
1260 #endif
1261 
1262 		if (pwrndx == 0) {
1263 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1264 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1265 			/* skip device if off or at its lowest */
1266 			continue;
1267 		}
1268 
1269 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1270 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1271 			/* were busy or newly became busy by another thread */
1272 			if (timeleft == 0)
1273 				timeleft = max(thresh, min_scan);
1274 			else
1275 				timeleft = min(
1276 				    timeleft, max(thresh, min_scan));
1277 			continue;
1278 		}
1279 
1280 		idletime = now - timestamp[i];		/* idle time */
1281 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1282 		    pmf, PM_DEVICE(dip), i, idletime))
1283 		if (idletime >= thresh || PM_IS_PID(dip)) {
1284 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1285 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1286 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1287 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1288 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1289 			    PM_CURPOWER(dip, i) != nxtpwr) {
1290 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1291 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1292 				    i, curpwr, nxtpwr))
1293 				timeleft = min_scan;
1294 				continue;
1295 			} else {
1296 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1297 				    "%d->%d, GOOD curpwr %d\n", pmf,
1298 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1299 				    cur_power(cp)))
1300 
1301 				if (nxtpwr == 0)	/* component went off */
1302 					continue;
1303 
1304 				/*
1305 				 * scan to next lower level
1306 				 */
1307 				if (timeleft == 0)
1308 					timeleft = max(
1309 					    1, cur_threshold(dip, i));
1310 				else
1311 					timeleft = min(timeleft,
1312 					    max(1, cur_threshold(dip, i)));
1313 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1314 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1315 				    i, timeleft))
1316 			}
1317 		} else {	/* comp not idle long enough */
1318 			if (timeleft == 0)
1319 				timeleft = thresh - idletime;
1320 			else
1321 				timeleft = min(timeleft, (thresh - idletime));
1322 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1323 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1324 		}
1325 	}
1326 	ndi_devi_exit(pdip, circ);
1327 	kmem_free(timestamp, size);
1328 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1329 	    PM_DEVICE(dip), timeleft))
1330 
1331 	/*
1332 	 * if components are already at lowest level, timeleft is left 0
1333 	 */
1334 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1335 }
1336 
1337 /*
1338  * pm_scan_stop - cancel scheduled pm_rescan,
1339  *                wait for termination of dispatched pm_scan thread
1340  *                     and active pm_scan_dev thread.
1341  */
1342 void
1343 pm_scan_stop(dev_info_t *dip)
1344 {
1345 	PMD_FUNC(pmf, "scan_stop")
1346 	pm_scan_t	*scanp;
1347 	timeout_id_t	scanid;
1348 
1349 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1350 	PM_LOCK_DIP(dip);
1351 	scanp = PM_GET_PM_SCAN(dip);
1352 	if (!scanp) {
1353 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1354 		    pmf, PM_DEVICE(dip)))
1355 		PM_UNLOCK_DIP(dip);
1356 		return;
1357 	}
1358 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1359 
1360 	/* cancel scheduled scan taskq */
1361 	while (scanp->ps_scan_id) {
1362 		scanid = scanp->ps_scan_id;
1363 		scanp->ps_scan_id = 0;
1364 		PM_UNLOCK_DIP(dip);
1365 		(void) untimeout(scanid);
1366 		PM_LOCK_DIP(dip);
1367 	}
1368 
1369 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1370 		PM_UNLOCK_DIP(dip);
1371 		delay(1);
1372 		PM_LOCK_DIP(dip);
1373 	}
1374 	PM_UNLOCK_DIP(dip);
1375 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1376 }
1377 
1378 int
1379 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1380 {
1381 	_NOTE(ARGUNUSED(arg))
1382 
1383 	if (!PM_GET_PM_SCAN(dip))
1384 		return (DDI_WALK_CONTINUE);
1385 	ASSERT(!PM_ISBC(dip));
1386 	pm_scan_stop(dip);
1387 	return (DDI_WALK_CONTINUE);
1388 }
1389 
1390 /*
1391  * Converts a power level value to its index
1392  */
1393 static int
1394 power_val_to_index(pm_component_t *cp, int val)
1395 {
1396 	int limit, i, *ip;
1397 
1398 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1399 	    val != PM_LEVEL_EXACT);
1400 	/*  convert power value into index (i) */
1401 	limit = cp->pmc_comp.pmc_numlevels;
1402 	ip = cp->pmc_comp.pmc_lvals;
1403 	for (i = 0; i < limit; i++)
1404 		if (val == *ip++)
1405 			return (i);
1406 	return (-1);
1407 }
1408 
1409 /*
1410  * Converts a numeric power level to a printable string
1411  */
1412 static char *
1413 power_val_to_string(pm_component_t *cp, int val)
1414 {
1415 	int index;
1416 
1417 	if (val == PM_LEVEL_UPONLY)
1418 		return ("<UPONLY>");
1419 
1420 	if (val == PM_LEVEL_UNKNOWN ||
1421 	    (index = power_val_to_index(cp, val)) == -1)
1422 		return ("<LEVEL_UNKNOWN>");
1423 
1424 	return (cp->pmc_comp.pmc_lnames[index]);
1425 }
1426 
1427 /*
1428  * Return true if this node has been claimed by a ppm.
1429  */
1430 static int
1431 pm_ppm_claimed(dev_info_t *dip)
1432 {
1433 	return (PPM(dip) != NULL);
1434 }
1435 
1436 /*
1437  * A node which was voluntarily power managed has just used up its "free cycle"
1438  * and need is volpmd field cleared, and the same done to all its descendents
1439  */
1440 static void
1441 pm_clear_volpm_dip(dev_info_t *dip)
1442 {
1443 	PMD_FUNC(pmf, "clear_volpm_dip")
1444 
1445 	if (dip == NULL)
1446 		return;
1447 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1448 	    PM_DEVICE(dip)))
1449 	DEVI(dip)->devi_pm_volpmd = 0;
1450 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1451 		pm_clear_volpm_dip(dip);
1452 	}
1453 }
1454 
1455 /*
1456  * A node which was voluntarily power managed has used up the "free cycles"
1457  * for the subtree that it is the root of.  Scan through the list of detached
1458  * nodes and adjust the counts of any that are descendents of the node.
1459  */
1460 static void
1461 pm_clear_volpm_list(dev_info_t *dip)
1462 {
1463 	PMD_FUNC(pmf, "clear_volpm_list")
1464 	char	*pathbuf;
1465 	size_t	len;
1466 	pm_noinvol_t *ip;
1467 
1468 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1469 	(void) ddi_pathname(dip, pathbuf);
1470 	len = strlen(pathbuf);
1471 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1472 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1473 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1474 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1475 		    ip->ni_path))
1476 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1477 		    ip->ni_path[len] == '/') {
1478 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1479 			    ip->ni_path))
1480 			ip->ni_volpmd = 0;
1481 			ip->ni_wasvolpmd = 0;
1482 		}
1483 	}
1484 	kmem_free(pathbuf, MAXPATHLEN);
1485 	rw_exit(&pm_noinvol_rwlock);
1486 }
1487 
1488 /*
1489  * Powers a device, suspending or resuming the driver if it is a backward
1490  * compatible device, calling into ppm to change power level.
1491  * Called with the component's power lock held.
1492  */
1493 static int
1494 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1495     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1496 {
1497 	PMD_FUNC(pmf, "power_dev")
1498 	power_req_t power_req;
1499 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1500 	int		resume_needed = 0;
1501 	int		suspended = 0;
1502 	int		result;
1503 #ifdef PMDDEBUG
1504 	struct pm_component *cp = PM_CP(dip, comp);
1505 #endif
1506 	int		bc = PM_ISBC(dip);
1507 	int pm_all_components_off(dev_info_t *);
1508 	int		clearvolpmd = 0;
1509 	char		pathbuf[MAXNAMELEN];
1510 #ifdef PMDDEBUG
1511 	char *ppmname, *ppmaddr;
1512 #endif
1513 	/*
1514 	 * If this is comp 0 of a backwards compat device and we are
1515 	 * going to take the power away, we need to detach it with
1516 	 * DDI_PM_SUSPEND command.
1517 	 */
1518 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1519 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1520 			/* We could not suspend before turning cmpt zero off */
1521 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1522 			    pmf, PM_DEVICE(dip)))
1523 			return (DDI_FAILURE);
1524 		} else {
1525 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1526 			suspended++;
1527 		}
1528 	}
1529 	power_req.request_type = PMR_PPM_SET_POWER;
1530 	power_req.req.ppm_set_power_req.who = dip;
1531 	power_req.req.ppm_set_power_req.cmpt = comp;
1532 	power_req.req.ppm_set_power_req.old_level = old_level;
1533 	power_req.req.ppm_set_power_req.new_level = level;
1534 	power_req.req.ppm_set_power_req.canblock = canblock;
1535 	power_req.req.ppm_set_power_req.cookie = NULL;
1536 #ifdef PMDDEBUG
1537 	if (pm_ppm_claimed(dip)) {
1538 		ppmname = PM_NAME(PPM(dip));
1539 		ppmaddr = PM_ADDR(PPM(dip));
1540 
1541 	} else {
1542 		ppmname = "noppm";
1543 		ppmaddr = "0";
1544 	}
1545 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1546 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1547 	    power_val_to_string(cp, old_level), old_level,
1548 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1549 #endif
1550 	/*
1551 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1552 	 * bc device comp 0 is powering on, then we count it as a power cycle
1553 	 * against its voluntary count.
1554 	 */
1555 	if (DEVI(dip)->devi_pm_volpmd &&
1556 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1557 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1558 		clearvolpmd = 1;
1559 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1560 	    &power_req, &result)) == DDI_SUCCESS) {
1561 		/*
1562 		 * Now do involuntary pm accounting;  If we've just cycled power
1563 		 * on a voluntarily pm'd node, and by inference on its entire
1564 		 * subtree, we need to set the subtree (including those nodes
1565 		 * already detached) volpmd counts to 0, and subtract out the
1566 		 * value of the current node's volpmd count from the ancestors
1567 		 */
1568 		if (clearvolpmd) {
1569 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1570 			pm_clear_volpm_dip(dip);
1571 			pm_clear_volpm_list(dip);
1572 			if (volpmd) {
1573 				(void) ddi_pathname(dip, pathbuf);
1574 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1575 				    volpmd, 0, pathbuf, dip);
1576 			}
1577 		}
1578 	} else {
1579 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1580 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1581 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1582 	}
1583 	/*
1584 	 * If some other devices were also powered up (e.g. other cpus in
1585 	 * the same domain) return a pointer to that list
1586 	 */
1587 	if (devlist) {
1588 		*devlist = (pm_ppm_devlist_t *)
1589 		    power_req.req.ppm_set_power_req.cookie;
1590 	}
1591 	/*
1592 	 * We will have to resume the device if the device is backwards compat
1593 	 * device and either of the following is true:
1594 	 * -This is comp 0 and we have successfully powered it up
1595 	 * -This is comp 0 and we have failed to power it down. Resume is
1596 	 *  needed because we have suspended it above
1597 	 */
1598 
1599 	if (bc && comp == 0) {
1600 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1601 		if (power_op_ret == DDI_SUCCESS) {
1602 			if (POWERING_ON(old_level, level)) {
1603 				/*
1604 				 * It must be either suspended or resumed
1605 				 * via pm_power_has_changed path
1606 				 */
1607 				ASSERT((DEVI(dip)->devi_pm_flags &
1608 				    PMC_SUSPENDED) ||
1609 				    (PM_CP(dip, comp)->pmc_flags &
1610 				    PM_PHC_WHILE_SET_POWER));
1611 
1612 					resume_needed = suspended;
1613 			}
1614 		} else {
1615 			if (POWERING_OFF(old_level, level)) {
1616 				/*
1617 				 * It must be either suspended or resumed
1618 				 * via pm_power_has_changed path
1619 				 */
1620 				ASSERT((DEVI(dip)->devi_pm_flags &
1621 				    PMC_SUSPENDED) ||
1622 				    (PM_CP(dip, comp)->pmc_flags &
1623 				    PM_PHC_WHILE_SET_POWER));
1624 
1625 					resume_needed = suspended;
1626 			}
1627 		}
1628 	}
1629 	if (resume_needed) {
1630 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1631 		/* ppm is not interested in DDI_PM_RESUME */
1632 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1633 		    DDI_SUCCESS) {
1634 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1635 		} else
1636 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1637 			    PM_DEVICE(dip));
1638 	}
1639 	return (power_op_ret);
1640 }
1641 
1642 /*
1643  * Return true if we are the owner or a borrower of the devi lock.  See
1644  * pm_lock_power_single() about borrowing the lock.
1645  */
1646 static int
1647 pm_devi_lock_held(dev_info_t *dip)
1648 {
1649 	lock_loan_t *cur;
1650 
1651 	if (DEVI_BUSY_OWNED(dip))
1652 		return (1);
1653 
1654 	/* return false if no locks borrowed */
1655 	if (lock_loan_head.pmlk_next == NULL)
1656 		return (0);
1657 
1658 	mutex_enter(&pm_loan_lock);
1659 	/* see if our thread is registered as a lock borrower. */
1660 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1661 		if (cur->pmlk_borrower == curthread)
1662 			break;
1663 	mutex_exit(&pm_loan_lock);
1664 
1665 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1666 }
1667 
1668 /*
1669  * pm_set_power: adjusts power level of device.	 Assumes device is power
1670  * manageable & component exists.
1671  *
1672  * Cases which require us to bring up devices we keep up ("wekeepups") for
1673  * backwards compatible devices:
1674  *	component 0 is off and we're bringing it up from 0
1675  *		bring up wekeepup first
1676  *	and recursively when component 0 is off and we bring some other
1677  *	component up from 0
1678  * For devices which are not backward compatible, our dependency notion is much
1679  * simpler.  Unless all components are off, then wekeeps must be on.
1680  * We don't treat component 0 differently.
1681  * Canblock tells how to deal with a direct pm'd device.
1682  * Scan arg tells us if we were called from scan, in which case we don't need
1683  * to go back to the root node and walk down to change power.
1684  */
1685 int
1686 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1687     pm_canblock_t canblock, int scan, int *retp)
1688 {
1689 	PMD_FUNC(pmf, "set_power")
1690 	char		*pathbuf;
1691 	pm_bp_child_pwrchg_t bpc;
1692 	pm_sp_misc_t	pspm;
1693 	int		ret = DDI_SUCCESS;
1694 	int		unused = DDI_SUCCESS;
1695 	dev_info_t	*pdip = ddi_get_parent(dip);
1696 
1697 #ifdef DEBUG
1698 	int		diverted = 0;
1699 
1700 	/*
1701 	 * This prevents operations on the console from calling prom_printf and
1702 	 * either deadlocking or bringing up the console because of debug
1703 	 * output
1704 	 */
1705 	if (dip == cfb_dip) {
1706 		diverted++;
1707 		mutex_enter(&pm_debug_lock);
1708 		pm_divertdebug++;
1709 		mutex_exit(&pm_debug_lock);
1710 	}
1711 #endif
1712 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1713 	    direction == PM_LEVEL_EXACT);
1714 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1715 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1716 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1717 	(void) ddi_pathname(dip, pathbuf);
1718 	bpc.bpc_dip = dip;
1719 	bpc.bpc_path = pathbuf;
1720 	bpc.bpc_comp = comp;
1721 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1722 	bpc.bpc_nlevel = level;
1723 	pspm.pspm_direction = direction;
1724 	pspm.pspm_errnop = retp;
1725 	pspm.pspm_canblock = canblock;
1726 	pspm.pspm_scan = scan;
1727 	bpc.bpc_private = &pspm;
1728 
1729 	/*
1730 	 * If a config operation is being done (we've locked the parent) or
1731 	 * we already hold the power lock (we've locked the node)
1732 	 * then we can operate directly on the node because we have already
1733 	 * brought up all the ancestors, otherwise, we have to go back to the
1734 	 * top of the tree.
1735 	 */
1736 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1737 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1738 		    (void *)&bpc, (void *)&unused);
1739 	else
1740 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1741 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1742 #ifdef DEBUG
1743 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1744 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1745 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1746 	}
1747 	if (diverted) {
1748 		mutex_enter(&pm_debug_lock);
1749 		pm_divertdebug--;
1750 		mutex_exit(&pm_debug_lock);
1751 	}
1752 #endif
1753 	kmem_free(pathbuf, MAXPATHLEN);
1754 	return (ret);
1755 }
1756 
1757 /*
1758  * If holddip is set, then if a dip is found we return with the node held.
1759  *
1760  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1761  * (resolve_pathname), but it does not drive attach.
1762  */
1763 dev_info_t *
1764 pm_name_to_dip(char *pathname, int holddip)
1765 {
1766 	struct pathname pn;
1767 	char		*component;
1768 	dev_info_t	*parent, *child;
1769 	int		circ;
1770 
1771 	if ((pathname == NULL) || (*pathname != '/'))
1772 		return (NULL);
1773 
1774 	/* setup pathname and allocate component */
1775 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1776 		return (NULL);
1777 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1778 
1779 	/* start at top, process '/' component */
1780 	parent = child = ddi_root_node();
1781 	ndi_hold_devi(parent);
1782 	pn_skipslash(&pn);
1783 	ASSERT(i_ddi_devi_attached(parent));
1784 
1785 	/* process components of pathname */
1786 	while (pn_pathleft(&pn)) {
1787 		(void) pn_getcomponent(&pn, component);
1788 
1789 		/* enter parent and search for component child */
1790 		ndi_devi_enter(parent, &circ);
1791 		child = ndi_devi_findchild(parent, component);
1792 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1793 			child = NULL;
1794 			ndi_devi_exit(parent, circ);
1795 			ndi_rele_devi(parent);
1796 			goto out;
1797 		}
1798 
1799 		/* attached child found, hold child and release parent */
1800 		ndi_hold_devi(child);
1801 		ndi_devi_exit(parent, circ);
1802 		ndi_rele_devi(parent);
1803 
1804 		/* child becomes parent, and process next component */
1805 		parent = child;
1806 		pn_skipslash(&pn);
1807 
1808 		/* loop with active ndi_devi_hold of child->parent */
1809 	}
1810 
1811 out:
1812 	pn_free(&pn);
1813 	kmem_free(component, MAXNAMELEN);
1814 
1815 	/* if we are not asked to return with hold, drop current hold */
1816 	if (child && !holddip)
1817 		ndi_rele_devi(child);
1818 	return (child);
1819 }
1820 
1821 /*
1822  * Search for a dependency and mark it unsatisfied
1823  */
1824 static void
1825 pm_unsatisfy(char *keeper, char *kept)
1826 {
1827 	PMD_FUNC(pmf, "unsatisfy")
1828 	pm_pdr_t *dp;
1829 
1830 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1831 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1832 		if (!dp->pdr_isprop) {
1833 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1834 			    (dp->pdr_kept_count > 0) &&
1835 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1836 				if (dp->pdr_satisfied) {
1837 					dp->pdr_satisfied = 0;
1838 					pm_unresolved_deps++;
1839 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1840 					    "pm_unresolved_deps now %d\n", pmf,
1841 					    pm_unresolved_deps))
1842 				}
1843 			}
1844 		}
1845 	}
1846 }
1847 
1848 /*
1849  * Device dip is being un power managed, it keeps up count other devices.
1850  * We need to release any hold we have on the kept devices, and also
1851  * mark the dependency no longer satisfied.
1852  */
1853 static void
1854 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1855 {
1856 	PMD_FUNC(pmf, "unkeeps")
1857 	int i, j;
1858 	dev_info_t *kept;
1859 	dev_info_t *dip;
1860 	struct pm_component *cp;
1861 	int keeper_on = 0, circ;
1862 
1863 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1864 	    keeper, (void *)keptpaths))
1865 	/*
1866 	 * Try to grab keeper. Keeper may have gone away by now,
1867 	 * in this case, used the passed in value pwr
1868 	 */
1869 	dip = pm_name_to_dip(keeper, 1);
1870 	for (i = 0; i < count; i++) {
1871 		/* Release power hold */
1872 		kept = pm_name_to_dip(keptpaths[i], 1);
1873 		if (kept) {
1874 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1875 			    PM_DEVICE(kept), i))
1876 			/*
1877 			 * We need to check if we skipped a bringup here
1878 			 * because we could have failed the bringup
1879 			 * (ie DIRECT PM device) and have
1880 			 * not increment the count.
1881 			 */
1882 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1883 				keeper_on = 0;
1884 				PM_LOCK_POWER(dip, &circ);
1885 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1886 					cp = &DEVI(dip)->devi_pm_components[j];
1887 					if (cur_power(cp)) {
1888 						keeper_on++;
1889 						break;
1890 					}
1891 				}
1892 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1893 					pm_rele_power(kept);
1894 					DEVI(kept)->devi_pm_flags
1895 					    &= ~PMC_SKIP_BRINGUP;
1896 				}
1897 				PM_UNLOCK_POWER(dip, circ);
1898 			} else if (pwr) {
1899 				if (PM_SKBU(kept) == 0) {
1900 					pm_rele_power(kept);
1901 					DEVI(kept)->devi_pm_flags
1902 					    &= ~PMC_SKIP_BRINGUP;
1903 				}
1904 			}
1905 			ddi_release_devi(kept);
1906 		}
1907 		/*
1908 		 * mark this dependency not satisfied
1909 		 */
1910 		pm_unsatisfy(keeper, keptpaths[i]);
1911 	}
1912 	if (dip)
1913 		ddi_release_devi(dip);
1914 }
1915 
1916 /*
1917  * Device kept is being un power managed, it is kept up by keeper.
1918  * We need to mark the dependency no longer satisfied.
1919  */
1920 static void
1921 pm_unkepts(char *kept, char *keeper)
1922 {
1923 	PMD_FUNC(pmf, "unkepts")
1924 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1925 	ASSERT(keeper != NULL);
1926 	/*
1927 	 * mark this dependency not satisfied
1928 	 */
1929 	pm_unsatisfy(keeper, kept);
1930 }
1931 
1932 /*
1933  * Removes dependency information and hold on the kepts, if the path is a
1934  * path of a keeper.
1935  */
1936 static void
1937 pm_free_keeper(char *path, int pwr)
1938 {
1939 	pm_pdr_t *dp;
1940 	int i;
1941 	size_t length;
1942 
1943 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1944 		if (strcmp(dp->pdr_keeper, path) != 0)
1945 			continue;
1946 		/*
1947 		 * Remove all our kept holds and the dependency records,
1948 		 * then free up the kept lists.
1949 		 */
1950 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1951 		if (dp->pdr_kept_count)  {
1952 			for (i = 0; i < dp->pdr_kept_count; i++) {
1953 				length = strlen(dp->pdr_kept_paths[i]);
1954 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1955 			}
1956 			kmem_free(dp->pdr_kept_paths,
1957 			    dp->pdr_kept_count * sizeof (char **));
1958 			dp->pdr_kept_paths = NULL;
1959 			dp->pdr_kept_count = 0;
1960 		}
1961 	}
1962 }
1963 
1964 /*
1965  * Removes the device represented by path from the list of kepts, if the
1966  * path is a path of a kept
1967  */
1968 static void
1969 pm_free_kept(char *path)
1970 {
1971 	pm_pdr_t *dp;
1972 	int i;
1973 	int j, count;
1974 	size_t length;
1975 	char **paths;
1976 
1977 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1978 		if (dp->pdr_kept_count == 0)
1979 			continue;
1980 		count = dp->pdr_kept_count;
1981 		/* Remove this device from the kept path lists */
1982 		for (i = 0; i < count; i++) {
1983 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1984 				pm_unkepts(path, dp->pdr_keeper);
1985 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1986 				kmem_free(dp->pdr_kept_paths[i], length);
1987 				dp->pdr_kept_paths[i] = NULL;
1988 				dp->pdr_kept_count--;
1989 			}
1990 		}
1991 		/* Compact the kept paths array */
1992 		if (dp->pdr_kept_count) {
1993 			length = dp->pdr_kept_count * sizeof (char **);
1994 			paths = kmem_zalloc(length, KM_SLEEP);
1995 			j = 0;
1996 			for (i = 0; i < count; i++) {
1997 				if (dp->pdr_kept_paths[i] != NULL) {
1998 					paths[j] = dp->pdr_kept_paths[i];
1999 					j++;
2000 				}
2001 			}
2002 			ASSERT(j == dp->pdr_kept_count);
2003 		}
2004 		/* Now free the old array and point to the new one */
2005 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
2006 		if (dp->pdr_kept_count)
2007 			dp->pdr_kept_paths = paths;
2008 		else
2009 			dp->pdr_kept_paths = NULL;
2010 	}
2011 }
2012 
2013 /*
2014  * Free the dependency information for a device.
2015  */
2016 void
2017 pm_free_keeps(char *path, int pwr)
2018 {
2019 	PMD_FUNC(pmf, "free_keeps")
2020 
2021 #ifdef DEBUG
2022 	int doprdeps = 0;
2023 	void prdeps(char *);
2024 
2025 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2026 	if (pm_debug & PMD_KEEPS) {
2027 		doprdeps = 1;
2028 		prdeps("pm_free_keeps before");
2029 	}
2030 #endif
2031 	/*
2032 	 * First assume we are a keeper and remove all our kepts.
2033 	 */
2034 	pm_free_keeper(path, pwr);
2035 	/*
2036 	 * Now assume we a kept device, and remove all our records.
2037 	 */
2038 	pm_free_kept(path);
2039 #ifdef	DEBUG
2040 	if (doprdeps) {
2041 		prdeps("pm_free_keeps after");
2042 	}
2043 #endif
2044 }
2045 
2046 static int
2047 pm_is_kept(char *path)
2048 {
2049 	pm_pdr_t *dp;
2050 	int i;
2051 
2052 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2053 		if (dp->pdr_kept_count == 0)
2054 			continue;
2055 		for (i = 0; i < dp->pdr_kept_count; i++) {
2056 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2057 				return (1);
2058 		}
2059 	}
2060 	return (0);
2061 }
2062 
2063 static void
2064 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2065 {
2066 	PMD_FUNC(pmf, "hold_rele_power")
2067 	int circ;
2068 
2069 	if ((dip == NULL) ||
2070 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2071 		return;
2072 
2073 	PM_LOCK_POWER(dip, &circ);
2074 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2075 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2076 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2077 
2078 	PM_KUC(dip) += cnt;
2079 
2080 	ASSERT(PM_KUC(dip) >= 0);
2081 	PM_UNLOCK_POWER(dip, circ);
2082 
2083 	if (cnt < 0 && PM_KUC(dip) == 0)
2084 		pm_rescan(dip);
2085 }
2086 
2087 #define	MAX_PPM_HANDLERS	4
2088 
2089 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2090 
2091 struct	ppm_callbacks {
2092 	int (*ppmc_func)(dev_info_t *);
2093 	dev_info_t	*ppmc_dip;
2094 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2095 
2096 
2097 /*
2098  * This routine calls into all the registered ppms to notify them
2099  * that either all components of power-managed devices are at their
2100  * lowest levels or no longer all are at their lowest levels.
2101  */
2102 static void
2103 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2104 {
2105 	struct ppm_callbacks *ppmcp;
2106 	power_req_t power_req;
2107 	int result = 0;
2108 
2109 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2110 	power_req.req.ppm_all_lowest_req.mode = mode;
2111 	mutex_enter(&ppm_lock);
2112 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2113 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2114 		    DDI_CTLOPS_POWER, &power_req, &result);
2115 	mutex_exit(&ppm_lock);
2116 	if (mode == PM_ALL_LOWEST) {
2117 		if (autoS3_enabled) {
2118 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2119 			    "autos3\n"))
2120 			mutex_enter(&srn_clone_lock);
2121 			if (srn_signal) {
2122 				srn_inuse++;
2123 				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2124 				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2125 				srn_inuse--;
2126 			} else {
2127 				PMD(PMD_SX, ("srn_signal NULL\n"))
2128 			}
2129 			mutex_exit(&srn_clone_lock);
2130 		} else {
2131 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2132 			    "disabled\n"));
2133 		}
2134 	}
2135 }
2136 
2137 static void
2138 pm_set_pm_info(dev_info_t *dip, void *value)
2139 {
2140 	DEVI(dip)->devi_pm_info = value;
2141 }
2142 
2143 pm_rsvp_t *pm_blocked_list;
2144 
2145 /*
2146  * Look up an entry in the blocked list by dip and component
2147  */
2148 static pm_rsvp_t *
2149 pm_rsvp_lookup(dev_info_t *dip, int comp)
2150 {
2151 	pm_rsvp_t *p;
2152 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2153 	for (p = pm_blocked_list; p; p = p->pr_next)
2154 		if (p->pr_dip == dip && p->pr_comp == comp) {
2155 			return (p);
2156 		}
2157 	return (NULL);
2158 }
2159 
2160 /*
2161  * Called when a device which is direct power managed (or the parent or
2162  * dependent of such a device) changes power, or when a pm clone is closed
2163  * that was direct power managing a device.  This call results in pm_blocked()
2164  * (below) returning.
2165  */
2166 void
2167 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2168 {
2169 	PMD_FUNC(pmf, "proceed")
2170 	pm_rsvp_t *found = NULL;
2171 	pm_rsvp_t *p;
2172 
2173 	mutex_enter(&pm_rsvp_lock);
2174 	switch (cmd) {
2175 	/*
2176 	 * we're giving up control, let any pending op continue
2177 	 */
2178 	case PMP_RELEASE:
2179 		for (p = pm_blocked_list; p; p = p->pr_next) {
2180 			if (dip == p->pr_dip) {
2181 				p->pr_retval = PMP_RELEASE;
2182 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2183 				    pmf, PM_DEVICE(dip)))
2184 				cv_signal(&p->pr_cv);
2185 			}
2186 		}
2187 		break;
2188 
2189 	/*
2190 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2191 	 * succeed and a non-matching request for the same device fail
2192 	 */
2193 	case PMP_SETPOWER:
2194 		found = pm_rsvp_lookup(dip, comp);
2195 		if (!found)	/* if driver not waiting */
2196 			break;
2197 		/*
2198 		 * This cannot be pm_lower_power, since that can only happen
2199 		 * during detach or probe
2200 		 */
2201 		if (found->pr_newlevel <= newlevel) {
2202 			found->pr_retval = PMP_SUCCEED;
2203 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2204 			    PM_DEVICE(dip)))
2205 		} else {
2206 			found->pr_retval = PMP_FAIL;
2207 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2208 			    PM_DEVICE(dip)))
2209 		}
2210 		cv_signal(&found->pr_cv);
2211 		break;
2212 
2213 	default:
2214 		panic("pm_proceed unknown cmd %d", cmd);
2215 	}
2216 	mutex_exit(&pm_rsvp_lock);
2217 }
2218 
2219 /*
2220  * This routine dispatches new work to the dependency thread. Caller must
2221  * be prepared to block for memory if necessary.
2222  */
2223 void
2224 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2225     int *res, int cached_pwr)
2226 {
2227 	pm_dep_wk_t	*new_work;
2228 
2229 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2230 	new_work->pdw_type = cmd;
2231 	new_work->pdw_wait = wait;
2232 	new_work->pdw_done = 0;
2233 	new_work->pdw_ret = 0;
2234 	new_work->pdw_pwr = cached_pwr;
2235 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2236 	if (keeper != NULL) {
2237 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2238 		    KM_SLEEP);
2239 		(void) strcpy(new_work->pdw_keeper, keeper);
2240 	}
2241 	if (kept != NULL) {
2242 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2243 		(void) strcpy(new_work->pdw_kept, kept);
2244 	}
2245 	mutex_enter(&pm_dep_thread_lock);
2246 	if (pm_dep_thread_workq == NULL) {
2247 		pm_dep_thread_workq = new_work;
2248 		pm_dep_thread_tail = new_work;
2249 		new_work->pdw_next = NULL;
2250 	} else {
2251 		pm_dep_thread_tail->pdw_next = new_work;
2252 		pm_dep_thread_tail = new_work;
2253 		new_work->pdw_next = NULL;
2254 	}
2255 	cv_signal(&pm_dep_thread_cv);
2256 	/* If caller asked for it, wait till it is done. */
2257 	if (wait)  {
2258 		while (!new_work->pdw_done)
2259 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2260 		/*
2261 		 * Pass return status, if any, back.
2262 		 */
2263 		if (res != NULL)
2264 			*res = new_work->pdw_ret;
2265 		/*
2266 		 * If we asked to wait, it is our job to free the request
2267 		 * structure.
2268 		 */
2269 		if (new_work->pdw_keeper)
2270 			kmem_free(new_work->pdw_keeper,
2271 			    strlen(new_work->pdw_keeper) + 1);
2272 		if (new_work->pdw_kept)
2273 			kmem_free(new_work->pdw_kept,
2274 			    strlen(new_work->pdw_kept) + 1);
2275 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2276 	}
2277 	mutex_exit(&pm_dep_thread_lock);
2278 }
2279 
2280 /*
2281  * Release the pm resource for this device.
2282  */
2283 void
2284 pm_rem_info(dev_info_t *dip)
2285 {
2286 	PMD_FUNC(pmf, "rem_info")
2287 	int		i, count = 0;
2288 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2289 	dev_info_t	*pdip = ddi_get_parent(dip);
2290 	char		*pathbuf;
2291 	int		work_type = PM_DEP_WK_DETACH;
2292 
2293 	ASSERT(info);
2294 
2295 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2296 	if (PM_ISDIRECT(dip)) {
2297 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2298 		ASSERT(info->pmi_clone);
2299 		info->pmi_clone = 0;
2300 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2301 	}
2302 	ASSERT(!PM_GET_PM_SCAN(dip));
2303 
2304 	/*
2305 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2306 	 * Others we check all components.  BC node that has already
2307 	 * called pm_destroy_components() has zero component count.
2308 	 * Parents that get notification are not adjusted because their
2309 	 * kidsupcnt is always 0 (or 1 during configuration).
2310 	 */
2311 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2312 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2313 
2314 	/* node is detached, so we can examine power without locking */
2315 	if (PM_ISBC(dip)) {
2316 		count = (PM_CURPOWER(dip, 0) != 0);
2317 	} else {
2318 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2319 			count += (PM_CURPOWER(dip, i) != 0);
2320 	}
2321 
2322 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2323 		e_pm_hold_rele_power(pdip, -count);
2324 
2325 	/* Schedule a request to clean up dependency records */
2326 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2327 	(void) ddi_pathname(dip, pathbuf);
2328 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2329 	    PM_DEP_NOWAIT, NULL, (count > 0));
2330 	kmem_free(pathbuf, MAXPATHLEN);
2331 
2332 	/*
2333 	 * Adjust the pm_comps_notlowest count since this device is
2334 	 * not being power-managed anymore.
2335 	 */
2336 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2337 		if (PM_CURPOWER(dip, i) != 0)
2338 			PM_DECR_NOTLOWEST(dip);
2339 	}
2340 	/*
2341 	 * Once we clear the info pointer, it looks like it is not power
2342 	 * managed to everybody else.
2343 	 */
2344 	pm_set_pm_info(dip, NULL);
2345 	kmem_free(info, sizeof (pm_info_t));
2346 }
2347 
2348 int
2349 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2350 {
2351 	int components = PM_NUMCMPTS(dip);
2352 	int *bufp;
2353 	size_t size;
2354 	int i;
2355 
2356 	if (components <= 0) {
2357 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2358 		    "can't get normal power values\n", PM_DEVICE(dip));
2359 		return (DDI_FAILURE);
2360 	} else {
2361 		size = components * sizeof (int);
2362 		bufp = kmem_alloc(size, KM_SLEEP);
2363 		for (i = 0; i < components; i++) {
2364 			bufp[i] = pm_get_normal_power(dip, i);
2365 		}
2366 	}
2367 	*length = size;
2368 	*valuep = bufp;
2369 	return (DDI_SUCCESS);
2370 }
2371 
2372 static int
2373 pm_reset_timestamps(dev_info_t *dip, void *arg)
2374 {
2375 	_NOTE(ARGUNUSED(arg))
2376 
2377 	int components;
2378 	int	i;
2379 
2380 	if (!PM_GET_PM_INFO(dip))
2381 		return (DDI_WALK_CONTINUE);
2382 	components = PM_NUMCMPTS(dip);
2383 	ASSERT(components > 0);
2384 	PM_LOCK_BUSY(dip);
2385 	for (i = 0; i < components; i++) {
2386 		struct pm_component *cp;
2387 		/*
2388 		 * If the component was not marked as busy,
2389 		 * reset its timestamp to now.
2390 		 */
2391 		cp = PM_CP(dip, i);
2392 		if (cp->pmc_timestamp)
2393 			cp->pmc_timestamp = gethrestime_sec();
2394 	}
2395 	PM_UNLOCK_BUSY(dip);
2396 	return (DDI_WALK_CONTINUE);
2397 }
2398 
2399 /*
2400  * Convert a power level to an index into the levels array (or
2401  * just PM_LEVEL_UNKNOWN in that special case).
2402  */
2403 static int
2404 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2405 {
2406 	PMD_FUNC(pmf, "level_to_index")
2407 	int i;
2408 	int limit = cp->pmc_comp.pmc_numlevels;
2409 	int *ip = cp->pmc_comp.pmc_lvals;
2410 
2411 	if (level == PM_LEVEL_UNKNOWN)
2412 		return (level);
2413 
2414 	for (i = 0; i < limit; i++) {
2415 		if (level == *ip++) {
2416 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2417 			    pmf, PM_DEVICE(dip),
2418 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2419 			return (i);
2420 		}
2421 	}
2422 	panic("pm_level_to_index: level %d not found for device "
2423 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2424 	/*NOTREACHED*/
2425 }
2426 
2427 /*
2428  * Internal function to set current power level
2429  */
2430 static void
2431 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2432 {
2433 	PMD_FUNC(pmf, "set_cur_pwr")
2434 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2435 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2436 
2437 	/*
2438 	 * Nothing to adjust if current & new levels are the same.
2439 	 */
2440 	if (curpwr != PM_LEVEL_UNKNOWN &&
2441 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2442 		return;
2443 
2444 	/*
2445 	 * Keep the count for comps doing transition to/from lowest
2446 	 * level.
2447 	 */
2448 	if (curpwr == 0) {
2449 		PM_INCR_NOTLOWEST(dip);
2450 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2451 		PM_DECR_NOTLOWEST(dip);
2452 	}
2453 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2454 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2455 }
2456 
2457 /*
2458  * This is the default method of setting the power of a device if no ppm
2459  * driver has claimed it.
2460  */
2461 int
2462 pm_power(dev_info_t *dip, int comp, int level)
2463 {
2464 	PMD_FUNC(pmf, "power")
2465 	struct dev_ops	*ops;
2466 	int		(*fn)(dev_info_t *, int, int);
2467 	struct pm_component *cp = PM_CP(dip, comp);
2468 	int retval;
2469 	pm_info_t *info = PM_GET_PM_INFO(dip);
2470 	static int pm_phc_impl(dev_info_t *, int, int, int);
2471 
2472 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2473 	    PM_DEVICE(dip), comp, level))
2474 	if (!(ops = ddi_get_driver(dip))) {
2475 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2476 		    PM_DEVICE(dip)))
2477 		return (DDI_FAILURE);
2478 	}
2479 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2480 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2481 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2482 		    (!fn ? " devo_power NULL" : "")))
2483 		return (DDI_FAILURE);
2484 	}
2485 	cp->pmc_flags |= PM_POWER_OP;
2486 	retval = (*fn)(dip, comp, level);
2487 	cp->pmc_flags &= ~PM_POWER_OP;
2488 	if (retval == DDI_SUCCESS) {
2489 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2490 		return (DDI_SUCCESS);
2491 	}
2492 
2493 	/*
2494 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2495 	 * updated only the power level of the component.  If our attempt to
2496 	 * set the device new to a power level above has failed we sync the
2497 	 * total power state via phc code now.
2498 	 */
2499 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2500 		int phc_lvl =
2501 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2502 
2503 		ASSERT(info);
2504 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2505 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2506 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2507 	}
2508 
2509 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2510 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2511 	    level, power_val_to_string(cp, level)));
2512 	return (DDI_FAILURE);
2513 }
2514 
2515 int
2516 pm_unmanage(dev_info_t *dip)
2517 {
2518 	PMD_FUNC(pmf, "unmanage")
2519 	power_req_t power_req;
2520 	int result, retval = 0;
2521 
2522 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2523 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2524 	    PM_DEVICE(dip)))
2525 	power_req.request_type = PMR_PPM_UNMANAGE;
2526 	power_req.req.ppm_config_req.who = dip;
2527 	if (pm_ppm_claimed(dip))
2528 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2529 		    &power_req, &result);
2530 #ifdef DEBUG
2531 	else
2532 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2533 		    &power_req, &result);
2534 #endif
2535 	ASSERT(retval == DDI_SUCCESS);
2536 	pm_rem_info(dip);
2537 	return (retval);
2538 }
2539 
2540 int
2541 pm_raise_power(dev_info_t *dip, int comp, int level)
2542 {
2543 	if (level < 0)
2544 		return (DDI_FAILURE);
2545 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2546 	    !e_pm_valid_power(dip, comp, level))
2547 		return (DDI_FAILURE);
2548 
2549 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2550 }
2551 
2552 int
2553 pm_lower_power(dev_info_t *dip, int comp, int level)
2554 {
2555 	PMD_FUNC(pmf, "pm_lower_power")
2556 
2557 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2558 	    !e_pm_valid_power(dip, comp, level)) {
2559 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2560 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2561 		return (DDI_FAILURE);
2562 	}
2563 
2564 	if (!DEVI_IS_DETACHING(dip)) {
2565 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2566 		    pmf, PM_DEVICE(dip)))
2567 		return (DDI_FAILURE);
2568 	}
2569 
2570 	/*
2571 	 * If we don't care about saving power, or we're treating this node
2572 	 * specially, then this is a no-op
2573 	 */
2574 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2575 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2576 		    pmf, PM_DEVICE(dip),
2577 		    !autopm_enabled ? "!autopm_enabled " : "",
2578 		    !PM_POLLING_CPUPM ? "!cpupm_polling " : "",
2579 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2580 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2581 		return (DDI_SUCCESS);
2582 	}
2583 
2584 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2585 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2586 		    PM_DEVICE(dip)))
2587 		return (DDI_FAILURE);
2588 	}
2589 	return (DDI_SUCCESS);
2590 }
2591 
2592 /*
2593  * Find the entries struct for a given dip in the blocked list, return it locked
2594  */
2595 static psce_t *
2596 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2597 {
2598 	pscc_t *p;
2599 	psce_t *psce;
2600 
2601 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2602 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2603 		if (p->pscc_dip == dip) {
2604 			*psccp = p;
2605 			psce = p->pscc_entries;
2606 			mutex_enter(&psce->psce_lock);
2607 			ASSERT(psce);
2608 			rw_exit(&pm_pscc_direct_rwlock);
2609 			return (psce);
2610 		}
2611 	}
2612 	rw_exit(&pm_pscc_direct_rwlock);
2613 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2614 	/*NOTREACHED*/
2615 }
2616 
2617 /*
2618  * Write an entry indicating a power level change (to be passed to a process
2619  * later) in the given psce.
2620  * If we were called in the path that brings up the console fb in the
2621  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2622  * we create a record that has a size of -1, a physaddr of NULL, and that
2623  * has the overflow flag set.
2624  */
2625 static int
2626 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2627     int old, int which, pm_canblock_t canblock)
2628 {
2629 	char	buf[MAXNAMELEN];
2630 	pm_state_change_t *p;
2631 	size_t	size;
2632 	caddr_t physpath = NULL;
2633 	int	overrun = 0;
2634 
2635 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2636 	(void) ddi_pathname(dip, buf);
2637 	size = strlen(buf) + 1;
2638 	p = psce->psce_in;
2639 	if (canblock == PM_CANBLOCK_BYPASS) {
2640 		physpath = kmem_alloc(size, KM_NOSLEEP);
2641 		if (physpath == NULL) {
2642 			/*
2643 			 * mark current entry as overrun
2644 			 */
2645 			p->flags |= PSC_EVENT_LOST;
2646 			size = (size_t)-1;
2647 		}
2648 	} else
2649 		physpath = kmem_alloc(size, KM_SLEEP);
2650 	if (p->size) {	/* overflow; mark the next entry */
2651 		if (p->size != (size_t)-1)
2652 			kmem_free(p->physpath, p->size);
2653 		ASSERT(psce->psce_out == p);
2654 		if (p == psce->psce_last) {
2655 			psce->psce_first->flags |= PSC_EVENT_LOST;
2656 			psce->psce_out = psce->psce_first;
2657 		} else {
2658 			(p + 1)->flags |= PSC_EVENT_LOST;
2659 			psce->psce_out = (p + 1);
2660 		}
2661 		overrun++;
2662 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2663 		p->flags |= PSC_EVENT_LOST;
2664 		p->size = 0;
2665 		p->physpath = NULL;
2666 	}
2667 	if (which == PSC_INTEREST) {
2668 		mutex_enter(&pm_compcnt_lock);
2669 		if (pm_comps_notlowest == 0)
2670 			p->flags |= PSC_ALL_LOWEST;
2671 		else
2672 			p->flags &= ~PSC_ALL_LOWEST;
2673 		mutex_exit(&pm_compcnt_lock);
2674 	}
2675 	p->event = event;
2676 	p->timestamp = gethrestime_sec();
2677 	p->component = comp;
2678 	p->old_level = old;
2679 	p->new_level = new;
2680 	p->physpath = physpath;
2681 	p->size = size;
2682 	if (physpath != NULL)
2683 		(void) strcpy(p->physpath, buf);
2684 	if (p == psce->psce_last)
2685 		psce->psce_in = psce->psce_first;
2686 	else
2687 		psce->psce_in = ++p;
2688 	mutex_exit(&psce->psce_lock);
2689 	return (overrun);
2690 }
2691 
2692 /*
2693  * Find the next entry on the interest list.  We keep a pointer to the item we
2694  * last returned in the user's cooke.  Returns a locked entries struct.
2695  */
2696 static psce_t *
2697 psc_interest(void **cookie, pscc_t **psccp)
2698 {
2699 	pscc_t *pscc;
2700 	pscc_t **cookiep = (pscc_t **)cookie;
2701 
2702 	if (*cookiep == NULL)
2703 		pscc = pm_pscc_interest;
2704 	else
2705 		pscc = (*cookiep)->pscc_next;
2706 	if (pscc) {
2707 		*cookiep = pscc;
2708 		*psccp = pscc;
2709 		mutex_enter(&pscc->pscc_entries->psce_lock);
2710 		return (pscc->pscc_entries);
2711 	} else {
2712 		return (NULL);
2713 	}
2714 }
2715 
2716 /*
2717  * Create an entry for a process to pick up indicating a power level change.
2718  */
2719 static void
2720 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2721     int newlevel, int oldlevel, pm_canblock_t canblock)
2722 {
2723 	PMD_FUNC(pmf, "enqueue_notify")
2724 	pscc_t	*pscc;
2725 	psce_t	*psce;
2726 	void		*cookie = NULL;
2727 	int	overrun;
2728 
2729 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2730 	switch (cmd) {
2731 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2732 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2733 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2734 		psce = pm_psc_dip_to_direct(dip, &pscc);
2735 		ASSERT(psce);
2736 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2737 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2738 		    pm_poll_cnt[pscc->pscc_clone]))
2739 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2740 		    PSC_DIRECT, canblock);
2741 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2742 		mutex_enter(&pm_clone_lock);
2743 		if (!overrun)
2744 			pm_poll_cnt[pscc->pscc_clone]++;
2745 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2746 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2747 		mutex_exit(&pm_clone_lock);
2748 		break;
2749 	case PSC_HAS_CHANGED:
2750 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2751 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2752 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2753 			psce = pm_psc_dip_to_direct(dip, &pscc);
2754 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2755 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2756 			    pm_poll_cnt[pscc->pscc_clone]))
2757 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2758 			    oldlevel, PSC_DIRECT, canblock);
2759 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2760 			mutex_enter(&pm_clone_lock);
2761 			if (!overrun)
2762 				pm_poll_cnt[pscc->pscc_clone]++;
2763 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2764 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2765 			mutex_exit(&pm_clone_lock);
2766 		}
2767 		mutex_enter(&pm_clone_lock);
2768 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2769 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2770 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2771 			    oldlevel, PSC_INTEREST, canblock);
2772 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2773 		}
2774 		rw_exit(&pm_pscc_interest_rwlock);
2775 		mutex_exit(&pm_clone_lock);
2776 		break;
2777 #ifdef DEBUG
2778 	default:
2779 		ASSERT(0);
2780 #endif
2781 	}
2782 }
2783 
2784 static void
2785 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2786 {
2787 	if (listp) {
2788 		pm_ppm_devlist_t *p, *next = NULL;
2789 
2790 		for (p = *listp; p; p = next) {
2791 			next = p->ppd_next;
2792 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2793 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2794 			    canblock);
2795 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2796 		}
2797 		*listp = NULL;
2798 	}
2799 }
2800 
2801 /*
2802  * Try to get the power locks of the parent node and target (child)
2803  * node.  Return true if successful (with both locks held) or false
2804  * (with no locks held).
2805  */
2806 static int
2807 pm_try_parent_child_locks(dev_info_t *pdip,
2808     dev_info_t *dip, int *pcircp, int *circp)
2809 {
2810 	if (ndi_devi_tryenter(pdip, pcircp))
2811 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2812 			return (1);
2813 		} else {
2814 			ndi_devi_exit(pdip, *pcircp);
2815 		}
2816 	return (0);
2817 }
2818 
2819 /*
2820  * Determine if the power lock owner is blocked by current thread.
2821  * returns :
2822  * 	1 - If the thread owning the effective power lock (the first lock on
2823  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2824  *          a mutex held by the current thread.
2825  *
2826  *	0 - otherwise
2827  *
2828  * Note : This function is called by pm_power_has_changed to determine whether
2829  * it is executing in parallel with pm_set_power.
2830  */
2831 static int
2832 pm_blocked_by_us(dev_info_t *dip)
2833 {
2834 	power_req_t power_req;
2835 	kthread_t *owner;
2836 	int result;
2837 	kmutex_t *mp;
2838 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2839 
2840 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2841 	power_req.req.ppm_power_lock_owner_req.who = dip;
2842 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2843 	    DDI_SUCCESS) {
2844 		/*
2845 		 * It is assumed that if the device is claimed by ppm, ppm
2846 		 * will always implement this request type and it'll always
2847 		 * return success. We panic here, if it fails.
2848 		 */
2849 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2850 		    PM_DEVICE(dip));
2851 		/*NOTREACHED*/
2852 	}
2853 
2854 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2855 	    owner->t_state == TS_SLEEP &&
2856 	    owner->t_sobj_ops &&
2857 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2858 	    (mp = (kmutex_t *)owner->t_wchan) &&
2859 	    mutex_owner(mp) == curthread)
2860 		return (1);
2861 
2862 	return (0);
2863 }
2864 
2865 /*
2866  * Notify parent which wants to hear about a child's power changes.
2867  */
2868 static void
2869 pm_notify_parent(dev_info_t *dip,
2870     dev_info_t *pdip, int comp, int old_level, int level)
2871 {
2872 	pm_bp_has_changed_t bphc;
2873 	pm_sp_misc_t pspm;
2874 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2875 	int result = DDI_SUCCESS;
2876 
2877 	bphc.bphc_dip = dip;
2878 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2879 	bphc.bphc_comp = comp;
2880 	bphc.bphc_olevel = old_level;
2881 	bphc.bphc_nlevel = level;
2882 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2883 	pspm.pspm_scan = 0;
2884 	bphc.bphc_private = &pspm;
2885 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2886 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2887 	kmem_free(pathbuf, MAXPATHLEN);
2888 }
2889 
2890 /*
2891  * Check if we need to resume a BC device, and make the attach call as required.
2892  */
2893 static int
2894 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2895 {
2896 	int ret = DDI_SUCCESS;
2897 
2898 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2899 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2900 		/* ppm is not interested in DDI_PM_RESUME */
2901 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2902 			/* XXX Should we mark it resumed, */
2903 			/* even though it failed? */
2904 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2905 			    PM_NAME(dip), PM_ADDR(dip));
2906 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2907 	}
2908 
2909 	return (ret);
2910 }
2911 
2912 /*
2913  * Tests outside the lock to see if we should bother to enqueue an entry
2914  * for any watching process.  If yes, then caller will take the lock and
2915  * do the full protocol
2916  */
2917 static int
2918 pm_watchers()
2919 {
2920 	if (pm_processes_stopped)
2921 		return (0);
2922 	return (pm_pscc_direct || pm_pscc_interest);
2923 }
2924 
2925 /*
2926  * A driver is reporting that the power of one of its device's components
2927  * has changed.  Update the power state accordingly.
2928  */
2929 int
2930 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2931 {
2932 	PMD_FUNC(pmf, "pm_power_has_changed")
2933 	int ret;
2934 	dev_info_t *pdip = ddi_get_parent(dip);
2935 	struct pm_component *cp;
2936 	int blocked, circ, pcirc, old_level;
2937 	static int pm_phc_impl(dev_info_t *, int, int, int);
2938 
2939 	if (level < 0) {
2940 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2941 		    PM_DEVICE(dip), level))
2942 		return (DDI_FAILURE);
2943 	}
2944 
2945 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2946 	    PM_DEVICE(dip), comp, level))
2947 
2948 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2949 	    !e_pm_valid_power(dip, comp, level))
2950 		return (DDI_FAILURE);
2951 
2952 	/*
2953 	 * A driver thread calling pm_power_has_changed and another thread
2954 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2955 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2956 	 * this specific deadlock.  If we can't get the lock immediately
2957 	 * and we are deadlocked, just update the component's level, do
2958 	 * notifications, and return.  We intend to update the total power
2959 	 * state later (if the other thread fails to set power to the
2960 	 * desired level).  If we were called because of a power change on a
2961 	 * component that isn't involved in a set_power op, update all state
2962 	 * immediately.
2963 	 */
2964 	cp = PM_CP(dip, comp);
2965 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2966 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2967 		    (cp->pmc_flags & PM_POWER_OP)) {
2968 			if (pm_watchers()) {
2969 				mutex_enter(&pm_rsvp_lock);
2970 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2971 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2972 				mutex_exit(&pm_rsvp_lock);
2973 			}
2974 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2975 				pm_notify_parent(dip,
2976 				    pdip, comp, cur_power(cp), level);
2977 			(void) pm_check_and_resume(dip,
2978 			    comp, cur_power(cp), level);
2979 
2980 			/*
2981 			 * Stash the old power index, update curpwr, and flag
2982 			 * that the total power state needs to be synched.
2983 			 */
2984 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2985 			/*
2986 			 * Several pm_power_has_changed calls could arrive
2987 			 * while the set power path remains blocked.  Keep the
2988 			 * oldest old power and the newest new power of any
2989 			 * sequence of phc calls which arrive during deadlock.
2990 			 */
2991 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2992 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2993 			cp->pmc_cur_pwr =
2994 			    pm_level_to_index(dip, cp, level);
2995 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
2996 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2997 			return (DDI_SUCCESS);
2998 		} else
2999 			if (blocked) {	/* blocked, but different cmpt? */
3000 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
3001 					cmn_err(CE_NOTE,
3002 					    "!pm: parent kuc not updated due "
3003 					    "to possible deadlock.\n");
3004 					return (pm_phc_impl(dip,
3005 					    comp, level, 1));
3006 				}
3007 				old_level = cur_power(cp);
3008 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3009 				    (!PM_ISBC(dip) || comp == 0) &&
3010 				    POWERING_ON(old_level, level))
3011 					pm_hold_power(pdip);
3012 				ret = pm_phc_impl(dip, comp, level, 1);
3013 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3014 					if ((!PM_ISBC(dip) ||
3015 					    comp == 0) && level == 0 &&
3016 					    old_level != PM_LEVEL_UNKNOWN)
3017 						pm_rele_power(pdip);
3018 				}
3019 				ndi_devi_exit(pdip, pcirc);
3020 				/* child lock not held: deadlock */
3021 				return (ret);
3022 			}
3023 		delay(1);
3024 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3025 	}
3026 
3027 	/* non-deadlock case */
3028 	old_level = cur_power(cp);
3029 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3030 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3031 		pm_hold_power(pdip);
3032 	ret = pm_phc_impl(dip, comp, level, 1);
3033 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3034 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3035 		    old_level != PM_LEVEL_UNKNOWN)
3036 			pm_rele_power(pdip);
3037 	}
3038 	PM_UNLOCK_POWER(dip, circ);
3039 	ndi_devi_exit(pdip, pcirc);
3040 	return (ret);
3041 }
3042 
3043 /*
3044  * Account for power changes to a component of the the console frame buffer.
3045  * If lowering power from full (or "unkown", which is treatd as full)
3046  * we will increment the "components off" count of the fb device.
3047  * Subsequent lowering of the same component doesn't affect the count.  If
3048  * raising a component back to full power, we will decrement the count.
3049  *
3050  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3051  */
3052 static int
3053 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3054 {
3055 	struct pm_component *cp = PM_CP(dip, cmpt);
3056 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3057 	int want_normal = (new == cp->pmc_norm_pwr);
3058 	int incr = 0;
3059 
3060 	if (on && !want_normal)
3061 		incr = 1;
3062 	else if (!on && want_normal)
3063 		incr = -1;
3064 	return (incr);
3065 }
3066 
3067 /*
3068  * Adjust the count of console frame buffer components < full power.
3069  */
3070 static void
3071 update_comps_off(int incr, dev_info_t *dip)
3072 {
3073 		mutex_enter(&pm_cfb_lock);
3074 		pm_cfb_comps_off += incr;
3075 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3076 		mutex_exit(&pm_cfb_lock);
3077 }
3078 
3079 /*
3080  * Update the power state in the framework (via the ppm).  The 'notify'
3081  * argument tells whether to notify watchers.  Power lock is already held.
3082  */
3083 static int
3084 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3085 {
3086 	PMD_FUNC(pmf, "phc_impl")
3087 	power_req_t power_req;
3088 	int i, dodeps = 0;
3089 	dev_info_t *pdip = ddi_get_parent(dip);
3090 	int result;
3091 	int old_level;
3092 	struct pm_component *cp;
3093 	int incr = 0;
3094 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3095 	int work_type = 0;
3096 	char *pathbuf;
3097 
3098 	/* Must use "official" power level for this test. */
3099 	cp = PM_CP(dip, comp);
3100 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3101 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3102 	if (old_level != PM_LEVEL_UNKNOWN)
3103 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3104 
3105 	if (level == old_level) {
3106 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3107 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3108 		return (DDI_SUCCESS);
3109 	}
3110 
3111 	/*
3112 	 * Tell ppm about this.
3113 	 */
3114 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3115 	power_req.req.ppm_notify_level_req.who = dip;
3116 	power_req.req.ppm_notify_level_req.cmpt = comp;
3117 	power_req.req.ppm_notify_level_req.new_level = level;
3118 	power_req.req.ppm_notify_level_req.old_level = old_level;
3119 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3120 	    &result) == DDI_FAILURE) {
3121 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3122 		    pmf, PM_DEVICE(dip), level))
3123 		return (DDI_FAILURE);
3124 	}
3125 
3126 	if (PM_IS_CFB(dip)) {
3127 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3128 
3129 		if (incr) {
3130 			update_comps_off(incr, dip);
3131 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3132 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3133 			    comp, old_level, level, pm_cfb_comps_off))
3134 		}
3135 	}
3136 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3137 	result = DDI_SUCCESS;
3138 
3139 	if (notify) {
3140 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3141 			pm_notify_parent(dip, pdip, comp, old_level, level);
3142 		(void) pm_check_and_resume(dip, comp, old_level, level);
3143 	}
3144 
3145 	/*
3146 	 * Decrement the dependency kidsup count if we turn a device
3147 	 * off.
3148 	 */
3149 	if (POWERING_OFF(old_level, level)) {
3150 		dodeps = 1;
3151 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3152 			cp = PM_CP(dip, i);
3153 			if (cur_power(cp)) {
3154 				dodeps = 0;
3155 				break;
3156 			}
3157 		}
3158 		if (dodeps)
3159 			work_type = PM_DEP_WK_POWER_OFF;
3160 	}
3161 
3162 	/*
3163 	 * Increment if we turn it on. Check to see
3164 	 * if other comps are already on, if so,
3165 	 * dont increment.
3166 	 */
3167 	if (POWERING_ON(old_level, level)) {
3168 		dodeps = 1;
3169 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3170 			cp = PM_CP(dip, i);
3171 			if (comp == i)
3172 				continue;
3173 			/* -1 also treated as 0 in this case */
3174 			if (cur_power(cp) > 0) {
3175 				dodeps = 0;
3176 				break;
3177 			}
3178 		}
3179 		if (dodeps)
3180 			work_type = PM_DEP_WK_POWER_ON;
3181 	}
3182 
3183 	if (dodeps) {
3184 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3185 		(void) ddi_pathname(dip, pathbuf);
3186 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3187 		    PM_DEP_NOWAIT, NULL, 0);
3188 		kmem_free(pathbuf, MAXPATHLEN);
3189 	}
3190 
3191 	if (notify && (level != old_level) && pm_watchers()) {
3192 		mutex_enter(&pm_rsvp_lock);
3193 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3194 		    PM_CANBLOCK_BLOCK);
3195 		mutex_exit(&pm_rsvp_lock);
3196 	}
3197 
3198 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3199 	pm_rescan(dip);
3200 	return (DDI_SUCCESS);
3201 }
3202 
3203 /*
3204  * This function is called at startup time to notify pm of the existence
3205  * of any platform power managers for this platform.  As a result of
3206  * this registration, each function provided will be called each time
3207  * a device node is attached, until one returns true, and it must claim the
3208  * device node (by returning non-zero) if it wants to be involved in the
3209  * node's power management.  If it does claim the node, then it will
3210  * subsequently be notified of attach and detach events.
3211  *
3212  */
3213 
3214 int
3215 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3216 {
3217 	PMD_FUNC(pmf, "register_ppm")
3218 	struct ppm_callbacks *ppmcp;
3219 	pm_component_t *cp;
3220 	int i, pwr, result, circ;
3221 	power_req_t power_req;
3222 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3223 	void pm_ppm_claim(dev_info_t *);
3224 
3225 	mutex_enter(&ppm_lock);
3226 	ppmcp = ppm_callbacks;
3227 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3228 		if (ppmcp->ppmc_func == NULL) {
3229 			ppmcp->ppmc_func = func;
3230 			ppmcp->ppmc_dip = dip;
3231 			break;
3232 		}
3233 	}
3234 	mutex_exit(&ppm_lock);
3235 
3236 	if (i >= MAX_PPM_HANDLERS)
3237 		return (DDI_FAILURE);
3238 	while ((dip = ddi_get_parent(dip)) != NULL) {
3239 		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3240 			continue;
3241 		pm_ppm_claim(dip);
3242 		/* don't bother with the not power-manageable nodes */
3243 		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3244 			/*
3245 			 * Tell ppm about this.
3246 			 */
3247 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3248 			p->old_level = PM_LEVEL_UNKNOWN;
3249 			p->who = dip;
3250 			PM_LOCK_POWER(dip, &circ);
3251 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3252 				cp = PM_CP(dip, i);
3253 				pwr = cp->pmc_cur_pwr;
3254 				if (pwr != PM_LEVEL_UNKNOWN) {
3255 					p->cmpt = i;
3256 					p->new_level = cur_power(cp);
3257 					p->old_level = PM_LEVEL_UNKNOWN;
3258 					if (pm_ctlops(PPM(dip), dip,
3259 					    DDI_CTLOPS_POWER, &power_req,
3260 					    &result) == DDI_FAILURE) {
3261 						PMD(PMD_FAIL, ("%s: pc "
3262 						    "%s@%s(%s#%d) to %d "
3263 						    "fails\n", pmf,
3264 						    PM_DEVICE(dip), pwr))
3265 					}
3266 				}
3267 			}
3268 			PM_UNLOCK_POWER(dip, circ);
3269 		}
3270 	}
3271 	return (DDI_SUCCESS);
3272 }
3273 
3274 /*
3275  * Call the ppm's that have registered and adjust the devinfo struct as
3276  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3277  * by each ppm are assumed to be disjoint.
3278  */
3279 void
3280 pm_ppm_claim(dev_info_t *dip)
3281 {
3282 	struct ppm_callbacks *ppmcp;
3283 
3284 	if (PPM(dip)) {
3285 		return;
3286 	}
3287 	mutex_enter(&ppm_lock);
3288 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3289 		if ((*ppmcp->ppmc_func)(dip)) {
3290 			DEVI(dip)->devi_pm_ppm =
3291 			    (struct dev_info *)ppmcp->ppmc_dip;
3292 			mutex_exit(&ppm_lock);
3293 			return;
3294 		}
3295 	}
3296 	mutex_exit(&ppm_lock);
3297 }
3298 
3299 /*
3300  * Node is being detached so stop autopm until we see if it succeeds, in which
3301  * case pm_stop will be called.  For backwards compatible devices we bring the
3302  * device up to full power on the assumption the detach will succeed.
3303  */
3304 void
3305 pm_detaching(dev_info_t *dip)
3306 {
3307 	PMD_FUNC(pmf, "detaching")
3308 	pm_info_t *info = PM_GET_PM_INFO(dip);
3309 	int iscons;
3310 
3311 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3312 	    PM_NUMCMPTS(dip)))
3313 	if (info == NULL)
3314 		return;
3315 	ASSERT(DEVI_IS_DETACHING(dip));
3316 	PM_LOCK_DIP(dip);
3317 	info->pmi_dev_pm_state |= PM_DETACHING;
3318 	PM_UNLOCK_DIP(dip);
3319 	if (!PM_ISBC(dip))
3320 		pm_scan_stop(dip);
3321 
3322 	/*
3323 	 * console and old-style devices get brought up when detaching.
3324 	 */
3325 	iscons = PM_IS_CFB(dip);
3326 	if (iscons || PM_ISBC(dip)) {
3327 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3328 		if (iscons) {
3329 			mutex_enter(&pm_cfb_lock);
3330 			while (cfb_inuse) {
3331 				mutex_exit(&pm_cfb_lock);
3332 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3333 				delay(1);
3334 				mutex_enter(&pm_cfb_lock);
3335 			}
3336 			ASSERT(cfb_dip_detaching == NULL);
3337 			ASSERT(cfb_dip);
3338 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3339 			cfb_dip = NULL;
3340 			mutex_exit(&pm_cfb_lock);
3341 		}
3342 	}
3343 }
3344 
3345 /*
3346  * Node failed to detach.  If it used to be autopm'd, make it so again.
3347  */
3348 void
3349 pm_detach_failed(dev_info_t *dip)
3350 {
3351 	PMD_FUNC(pmf, "detach_failed")
3352 	pm_info_t *info = PM_GET_PM_INFO(dip);
3353 	int pm_all_at_normal(dev_info_t *);
3354 
3355 	if (info == NULL)
3356 		return;
3357 	ASSERT(DEVI_IS_DETACHING(dip));
3358 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3359 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3360 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3361 			/* Make sure the operation is still needed */
3362 			if (!pm_all_at_normal(dip)) {
3363 				if (pm_all_to_normal(dip,
3364 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3365 					PMD(PMD_ERROR, ("%s: could not bring "
3366 					    "%s@%s(%s#%d) to normal\n", pmf,
3367 					    PM_DEVICE(dip)))
3368 				}
3369 			}
3370 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3371 		}
3372 	}
3373 	if (!PM_ISBC(dip)) {
3374 		mutex_enter(&pm_scan_lock);
3375 		if (PM_SCANABLE(dip))
3376 			pm_scan_init(dip);
3377 		mutex_exit(&pm_scan_lock);
3378 		pm_rescan(dip);
3379 	}
3380 }
3381 
3382 /* generic Backwards Compatible component */
3383 static char *bc_names[] = {"off", "on"};
3384 
3385 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3386 
3387 static void
3388 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3389 {
3390 	pm_comp_t *pmc;
3391 	pmc = &cp->pmc_comp;
3392 	pmc->pmc_numlevels = 2;
3393 	pmc->pmc_lvals[0] = 0;
3394 	pmc->pmc_lvals[1] = norm;
3395 	e_pm_set_cur_pwr(dip, cp, norm);
3396 }
3397 
3398 static void
3399 e_pm_default_components(dev_info_t *dip, int cmpts)
3400 {
3401 	int i;
3402 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3403 
3404 	p = DEVI(dip)->devi_pm_components;
3405 	for (i = 0; i < cmpts; i++, p++) {
3406 		p->pmc_comp = bc_comp;	/* struct assignment */
3407 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3408 		    KM_SLEEP);
3409 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3410 		    KM_SLEEP);
3411 		p->pmc_comp.pmc_numlevels = 2;
3412 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3413 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3414 	}
3415 }
3416 
3417 /*
3418  * Called from functions that require components to exist already to allow
3419  * for their creation by parsing the pm-components property.
3420  * Device will not be power managed as a result of this call
3421  * No locking needed because we're single threaded by the ndi_devi_enter
3422  * done while attaching, and the device isn't visible until after it has
3423  * attached
3424  */
3425 int
3426 pm_premanage(dev_info_t *dip, int style)
3427 {
3428 	PMD_FUNC(pmf, "premanage")
3429 	pm_comp_t	*pcp, *compp;
3430 	int		cmpts, i, norm, error;
3431 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3432 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3433 
3434 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3435 	/*
3436 	 * If this dip has already been processed, don't mess with it
3437 	 */
3438 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3439 		return (DDI_SUCCESS);
3440 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3441 		return (DDI_FAILURE);
3442 	}
3443 	/*
3444 	 * Look up pm-components property and create components accordingly
3445 	 * If that fails, fall back to backwards compatibility
3446 	 */
3447 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3448 		/*
3449 		 * If error is set, the property existed but was not well formed
3450 		 */
3451 		if (error || (style == PM_STYLE_NEW)) {
3452 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3453 			return (DDI_FAILURE);
3454 		}
3455 		/*
3456 		 * If they don't have the pm-components property, then we
3457 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3458 		 * behavior driver must have called pm_create_components, and
3459 		 * we need to flesh out dummy components
3460 		 */
3461 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3462 			/*
3463 			 * Not really failure, but we don't want the
3464 			 * caller to treat it as success
3465 			 */
3466 			return (DDI_FAILURE);
3467 		}
3468 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3469 		e_pm_default_components(dip, cmpts);
3470 		for (i = 0; i < cmpts; i++) {
3471 			/*
3472 			 * if normal power not set yet, we don't really know
3473 			 * what *ANY* of the power values are.  If normal
3474 			 * power is set, then we assume for this backwards
3475 			 * compatible case that the values are 0, normal power.
3476 			 */
3477 			norm = pm_get_normal_power(dip, i);
3478 			if (norm == (uint_t)-1) {
3479 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3480 				    PM_DEVICE(dip), i))
3481 				return (DDI_FAILURE);
3482 			}
3483 			/*
3484 			 * Components of BC devices start at their normal power,
3485 			 * so count them to be not at their lowest power.
3486 			 */
3487 			PM_INCR_NOTLOWEST(dip);
3488 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3489 		}
3490 	} else {
3491 		/*
3492 		 * e_pm_create_components was called from pm_autoconfig(), it
3493 		 * creates components with no descriptions (or known levels)
3494 		 */
3495 		cmpts = PM_NUMCMPTS(dip);
3496 		ASSERT(cmpts != 0);
3497 		pcp = compp;
3498 		p = DEVI(dip)->devi_pm_components;
3499 		for (i = 0; i < cmpts; i++, p++) {
3500 			p->pmc_comp = *pcp++;   /* struct assignment */
3501 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3502 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3503 		}
3504 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3505 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3506 			    PMC_CPU_THRESH);
3507 		else
3508 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3509 			    PMC_DEF_THRESH);
3510 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3511 	}
3512 	return (DDI_SUCCESS);
3513 }
3514 
3515 /*
3516  * Called from during or after the device's attach to let us know it is ready
3517  * to play autopm.   Look up the pm model and manage the device accordingly.
3518  * Returns system call errno value.
3519  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3520  * a little cleaner
3521  *
3522  * Called with dip lock held, return with dip lock unheld.
3523  */
3524 
3525 int
3526 e_pm_manage(dev_info_t *dip, int style)
3527 {
3528 	PMD_FUNC(pmf, "e_manage")
3529 	pm_info_t	*info;
3530 	dev_info_t	*pdip = ddi_get_parent(dip);
3531 	int	pm_thresh_specd(dev_info_t *);
3532 	int	count;
3533 	char	*pathbuf;
3534 
3535 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3536 		return (DDI_FAILURE);
3537 	}
3538 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3539 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3540 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3541 
3542 	/*
3543 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3544 	 * out at their normal power, so they are "up", others start out
3545 	 * unknown, which is effectively "up".  Parent which want notification
3546 	 * get kidsupcnt of 0 always.
3547 	 */
3548 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3549 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3550 		e_pm_hold_rele_power(pdip, count);
3551 
3552 	pm_set_pm_info(dip, info);
3553 	/*
3554 	 * Apply any recorded thresholds
3555 	 */
3556 	(void) pm_thresh_specd(dip);
3557 
3558 	/*
3559 	 * Do dependency processing.
3560 	 */
3561 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3562 	(void) ddi_pathname(dip, pathbuf);
3563 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3564 	    PM_DEP_NOWAIT, NULL, 0);
3565 	kmem_free(pathbuf, MAXPATHLEN);
3566 
3567 	if (!PM_ISBC(dip)) {
3568 		mutex_enter(&pm_scan_lock);
3569 		if (PM_SCANABLE(dip)) {
3570 			pm_scan_init(dip);
3571 			mutex_exit(&pm_scan_lock);
3572 			pm_rescan(dip);
3573 		} else {
3574 			mutex_exit(&pm_scan_lock);
3575 		}
3576 	}
3577 	return (0);
3578 }
3579 
3580 /*
3581  * This is the obsolete exported interface for a driver to find out its
3582  * "normal" (max) power.
3583  * We only get components destroyed while no power management is
3584  * going on (and the device is detached), so we don't need a mutex here
3585  */
3586 int
3587 pm_get_normal_power(dev_info_t *dip, int comp)
3588 {
3589 
3590 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3591 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3592 	}
3593 	return (DDI_FAILURE);
3594 }
3595 
3596 /*
3597  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3598  */
3599 int
3600 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3601 {
3602 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3603 		*levelp = PM_CURPOWER(dip, comp);
3604 		return (DDI_SUCCESS);
3605 	}
3606 	return (DDI_FAILURE);
3607 }
3608 
3609 /*
3610  * Returns current threshold of indicated component
3611  */
3612 static int
3613 cur_threshold(dev_info_t *dip, int comp)
3614 {
3615 	pm_component_t *cp = PM_CP(dip, comp);
3616 	int pwr;
3617 
3618 	if (PM_ISBC(dip)) {
3619 		/*
3620 		 * backwards compatible nodes only have one threshold
3621 		 */
3622 		return (cp->pmc_comp.pmc_thresh[1]);
3623 	}
3624 	pwr = cp->pmc_cur_pwr;
3625 	if (pwr == PM_LEVEL_UNKNOWN) {
3626 		int thresh;
3627 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3628 			thresh = pm_default_nexus_threshold;
3629 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3630 			thresh = pm_cpu_idle_threshold;
3631 		else
3632 			thresh = pm_system_idle_threshold;
3633 		return (thresh);
3634 	}
3635 	ASSERT(cp->pmc_comp.pmc_thresh);
3636 	return (cp->pmc_comp.pmc_thresh[pwr]);
3637 }
3638 
3639 /*
3640  * Compute next lower component power level given power index.
3641  */
3642 static int
3643 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3644 {
3645 	int nxt_pwr;
3646 
3647 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3648 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3649 	} else {
3650 		pwrndx--;
3651 		ASSERT(pwrndx >= 0);
3652 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3653 	}
3654 	return (nxt_pwr);
3655 }
3656 
3657 /*
3658  * Update the maxpower (normal) power of a component. Note that the
3659  * component's power level is only changed if it's current power level
3660  * is higher than the new max power.
3661  */
3662 int
3663 pm_update_maxpower(dev_info_t *dip, int comp, int level)
3664 {
3665 	PMD_FUNC(pmf, "update_maxpower")
3666 	int old;
3667 	int result;
3668 
3669 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3670 	    !e_pm_valid_power(dip, comp, level)) {
3671 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3672 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3673 		return (DDI_FAILURE);
3674 	}
3675 	old = e_pm_get_max_power(dip, comp);
3676 	e_pm_set_max_power(dip, comp, level);
3677 
3678 	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3679 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3680 		e_pm_set_max_power(dip, comp, old);
3681 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3682 		    PM_DEVICE(dip)))
3683 		return (DDI_FAILURE);
3684 	}
3685 	return (DDI_SUCCESS);
3686 }
3687 
3688 /*
3689  * Bring all components of device to normal power
3690  */
3691 int
3692 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3693 {
3694 	PMD_FUNC(pmf, "all_to_normal")
3695 	int		*normal;
3696 	int		i, ncomps, result;
3697 	size_t		size;
3698 	int		changefailed = 0;
3699 
3700 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3701 	ASSERT(PM_GET_PM_INFO(dip));
3702 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3703 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3704 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3705 		return (DDI_FAILURE);
3706 	}
3707 	ncomps = PM_NUMCMPTS(dip);
3708 	for (i = 0; i < ncomps; i++) {
3709 		if (pm_set_power(dip, i, normal[i],
3710 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3711 			changefailed++;
3712 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3713 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3714 			    PM_DEVICE(dip), i, normal[i], result))
3715 		}
3716 	}
3717 	kmem_free(normal, size);
3718 	if (changefailed) {
3719 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3720 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3721 		return (DDI_FAILURE);
3722 	}
3723 	return (DDI_SUCCESS);
3724 }
3725 
3726 /*
3727  * Returns true if all components of device are at normal power
3728  */
3729 int
3730 pm_all_at_normal(dev_info_t *dip)
3731 {
3732 	PMD_FUNC(pmf, "all_at_normal")
3733 	int		*normal;
3734 	int		i;
3735 	size_t		size;
3736 
3737 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3738 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3739 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3740 		return (DDI_FAILURE);
3741 	}
3742 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3743 		int current = PM_CURPOWER(dip, i);
3744 		if (normal[i] > current) {
3745 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3746 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3747 			    normal[i], current))
3748 			break;
3749 		}
3750 	}
3751 	kmem_free(normal, size);
3752 	if (i != PM_NUMCMPTS(dip)) {
3753 		return (0);
3754 	}
3755 	return (1);
3756 }
3757 
3758 static void
3759 bring_wekeeps_up(char *keeper)
3760 {
3761 	PMD_FUNC(pmf, "bring_wekeeps_up")
3762 	int i;
3763 	pm_pdr_t *dp;
3764 	pm_info_t *wku_info;
3765 	char *kept_path;
3766 	dev_info_t *kept;
3767 	static void bring_pmdep_up(dev_info_t *, int);
3768 
3769 	if (panicstr) {
3770 		return;
3771 	}
3772 	/*
3773 	 * We process the request even if the keeper detaches because
3774 	 * detach processing expects this to increment kidsupcnt of kept.
3775 	 */
3776 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3777 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3778 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3779 			continue;
3780 		for (i = 0; i < dp->pdr_kept_count; i++) {
3781 			kept_path = dp->pdr_kept_paths[i];
3782 			if (kept_path == NULL)
3783 				continue;
3784 			ASSERT(kept_path[0] != '\0');
3785 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3786 				continue;
3787 			wku_info = PM_GET_PM_INFO(kept);
3788 			if (wku_info == NULL) {
3789 				if (kept)
3790 					ddi_release_devi(kept);
3791 				continue;
3792 			}
3793 			/*
3794 			 * Don't mess with it if it is being detached, it isn't
3795 			 * safe to call its power entry point
3796 			 */
3797 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3798 				if (kept)
3799 					ddi_release_devi(kept);
3800 				continue;
3801 			}
3802 			bring_pmdep_up(kept, 1);
3803 			ddi_release_devi(kept);
3804 		}
3805 	}
3806 }
3807 
3808 /*
3809  * Bring up the 'kept' device passed as argument
3810  */
3811 static void
3812 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3813 {
3814 	PMD_FUNC(pmf, "bring_pmdep_up")
3815 	int is_all_at_normal = 0;
3816 
3817 	/*
3818 	 * If the kept device has been unmanaged, do nothing.
3819 	 */
3820 	if (!PM_GET_PM_INFO(kept_dip))
3821 		return;
3822 
3823 	/* Just ignore DIRECT PM device till they are released. */
3824 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3825 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3826 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3827 		    "controlling process did something else\n", pmf,
3828 		    PM_DEVICE(kept_dip)))
3829 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3830 		return;
3831 	}
3832 	/* if we got here the keeper had a transition from OFF->ON */
3833 	if (hold)
3834 		pm_hold_power(kept_dip);
3835 
3836 	if (!is_all_at_normal)
3837 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3838 }
3839 
3840 /*
3841  * A bunch of stuff that belongs only to the next routine (or two)
3842  */
3843 
3844 static const char namestr[] = "NAME=";
3845 static const int nameln = sizeof (namestr) - 1;
3846 static const char pmcompstr[] = "pm-components";
3847 
3848 struct pm_comp_pkg {
3849 	pm_comp_t		*comp;
3850 	struct pm_comp_pkg	*next;
3851 };
3852 
3853 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3854 
3855 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3856 			((ch) >= 'A' && (ch) <= 'F'))
3857 
3858 /*
3859  * Rather than duplicate this code ...
3860  * (this code excerpted from the function that follows it)
3861  */
3862 #define	FINISH_COMP { \
3863 	ASSERT(compp); \
3864 	compp->pmc_lnames_sz = size; \
3865 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3866 	compp->pmc_numlevels = level; \
3867 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3868 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3869 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3870 	/* copy string out of prop array into buffer */ \
3871 	for (j = 0; j < level; j++) { \
3872 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3873 		compp->pmc_lvals[j] = lvals[j]; \
3874 		(void) strcpy(tp, lnames[j]); \
3875 		compp->pmc_lnames[j] = tp; \
3876 		tp += lszs[j]; \
3877 	} \
3878 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3879 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3880 	}
3881 
3882 /*
3883  * Create (empty) component data structures.
3884  */
3885 static void
3886 e_pm_create_components(dev_info_t *dip, int num_components)
3887 {
3888 	struct pm_component *compp, *ocompp;
3889 	int i, size = 0;
3890 
3891 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3892 	ASSERT(!DEVI(dip)->devi_pm_components);
3893 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3894 	size = sizeof (struct pm_component) * num_components;
3895 
3896 	compp = kmem_zalloc(size, KM_SLEEP);
3897 	ocompp = compp;
3898 	DEVI(dip)->devi_pm_comp_size = size;
3899 	DEVI(dip)->devi_pm_num_components = num_components;
3900 	PM_LOCK_BUSY(dip);
3901 	for (i = 0; i < num_components;  i++) {
3902 		compp->pmc_timestamp = gethrestime_sec();
3903 		compp->pmc_norm_pwr = (uint_t)-1;
3904 		compp++;
3905 	}
3906 	PM_UNLOCK_BUSY(dip);
3907 	DEVI(dip)->devi_pm_components = ocompp;
3908 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3909 }
3910 
3911 /*
3912  * Parse hex or decimal value from char string
3913  */
3914 static char *
3915 pm_parsenum(char *cp, int *valp)
3916 {
3917 	int ch, offset;
3918 	char numbuf[256];
3919 	char *np = numbuf;
3920 	int value = 0;
3921 
3922 	ch = *cp++;
3923 	if (isdigit(ch)) {
3924 		if (ch == '0') {
3925 			if ((ch = *cp++) == 'x' || ch == 'X') {
3926 				ch = *cp++;
3927 				while (isxdigit(ch)) {
3928 					*np++ = (char)ch;
3929 					ch = *cp++;
3930 				}
3931 				*np = 0;
3932 				cp--;
3933 				goto hexval;
3934 			} else {
3935 				goto digit;
3936 			}
3937 		} else {
3938 digit:
3939 			while (isdigit(ch)) {
3940 				*np++ = (char)ch;
3941 				ch = *cp++;
3942 			}
3943 			*np = 0;
3944 			cp--;
3945 			goto decval;
3946 		}
3947 	} else
3948 		return (NULL);
3949 
3950 hexval:
3951 	for (np = numbuf; *np; np++) {
3952 		if (*np >= 'a' && *np <= 'f')
3953 			offset = 'a' - 10;
3954 		else if (*np >= 'A' && *np <= 'F')
3955 			offset = 'A' - 10;
3956 		else if (*np >= '0' && *np <= '9')
3957 			offset = '0';
3958 		value *= 16;
3959 		value += *np - offset;
3960 	}
3961 	*valp = value;
3962 	return (cp);
3963 
3964 decval:
3965 	offset = '0';
3966 	for (np = numbuf; *np; np++) {
3967 		value *= 10;
3968 		value += *np - offset;
3969 	}
3970 	*valp = value;
3971 	return (cp);
3972 }
3973 
3974 /*
3975  * Set max (previously documented as "normal") power.
3976  */
3977 static void
3978 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3979 {
3980 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3981 }
3982 
3983 /*
3984  * Get max (previously documented as "normal") power.
3985  */
3986 static int
3987 e_pm_get_max_power(dev_info_t *dip, int component_number)
3988 {
3989 	return (PM_CP(dip, component_number)->pmc_norm_pwr);
3990 }
3991 
3992 /*
3993  * Internal routine for destroying components
3994  * It is called even when there might not be any, so it must be forgiving.
3995  */
3996 static void
3997 e_pm_destroy_components(dev_info_t *dip)
3998 {
3999 	int i;
4000 	struct pm_component *cp;
4001 
4002 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4003 	if (PM_NUMCMPTS(dip) == 0)
4004 		return;
4005 	cp = DEVI(dip)->devi_pm_components;
4006 	ASSERT(cp);
4007 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
4008 		int nlevels = cp->pmc_comp.pmc_numlevels;
4009 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
4010 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
4011 		/*
4012 		 * For BC nodes, the rest is static in bc_comp, so skip it
4013 		 */
4014 		if (PM_ISBC(dip))
4015 			continue;
4016 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
4017 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
4018 		kmem_free(cp->pmc_comp.pmc_lname_buf,
4019 		    cp->pmc_comp.pmc_lnames_sz);
4020 	}
4021 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
4022 	DEVI(dip)->devi_pm_components = NULL;
4023 	DEVI(dip)->devi_pm_num_components = 0;
4024 	DEVI(dip)->devi_pm_flags &=
4025 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4026 }
4027 
4028 /*
4029  * Read the pm-components property (if there is one) and use it to set up
4030  * components.  Returns a pointer to an array of component structures if
4031  * pm-components found and successfully parsed, else returns NULL.
4032  * Sets error return *errp to true to indicate a failure (as opposed to no
4033  * property being present).
4034  */
4035 pm_comp_t *
4036 pm_autoconfig(dev_info_t *dip, int *errp)
4037 {
4038 	PMD_FUNC(pmf, "autoconfig")
4039 	uint_t nelems;
4040 	char **pp;
4041 	pm_comp_t *compp = NULL;
4042 	int i, j, level, components = 0;
4043 	size_t size = 0;
4044 	struct pm_comp_pkg *p, *ptail;
4045 	struct pm_comp_pkg *phead = NULL;
4046 	int *lvals = NULL;
4047 	int *lszs = NULL;
4048 	int *np = NULL;
4049 	int npi = 0;
4050 	char **lnames = NULL;
4051 	char *cp, *tp;
4052 	pm_comp_t *ret = NULL;
4053 
4054 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4055 	*errp = 0;	/* assume success */
4056 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4057 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
4058 		return (NULL);
4059 	}
4060 
4061 	if (nelems < 3) {	/* need at least one name and two levels */
4062 		goto errout;
4063 	}
4064 
4065 	/*
4066 	 * pm_create_components is no longer allowed
4067 	 */
4068 	if (PM_NUMCMPTS(dip) != 0) {
4069 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
4070 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4071 		goto errout;
4072 	}
4073 
4074 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4075 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4076 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4077 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4078 
4079 	level = 0;
4080 	phead = NULL;
4081 	for (i = 0; i < nelems; i++) {
4082 		cp = pp[i];
4083 		if (!isdigit(*cp)) {	/*  must be name */
4084 			if (strncmp(cp, namestr, nameln) != 0) {
4085 				goto errout;
4086 			}
4087 			if (i != 0) {
4088 				if (level == 0) {	/* no level spec'd */
4089 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4090 					    pmf))
4091 					goto errout;
4092 				}
4093 				np[npi++] = lvals[level - 1];
4094 				/* finish up previous component levels */
4095 				FINISH_COMP;
4096 			}
4097 			cp += nameln;
4098 			if (!*cp) {
4099 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4100 				goto errout;
4101 			}
4102 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4103 			if (phead == NULL) {
4104 				phead = ptail = p;
4105 			} else {
4106 				ptail->next = p;
4107 				ptail = p;
4108 			}
4109 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4110 			    KM_SLEEP);
4111 			compp->pmc_name_sz = strlen(cp) + 1;
4112 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4113 			    KM_SLEEP);
4114 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4115 			components++;
4116 			level = 0;
4117 		} else {	/* better be power level <num>=<name> */
4118 #ifdef DEBUG
4119 			tp = cp;
4120 #endif
4121 			if (i == 0 ||
4122 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4123 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4124 				goto errout;
4125 			}
4126 #ifdef DEBUG
4127 			tp = cp;
4128 #endif
4129 			if (*cp++ != '=' || !*cp) {
4130 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4131 				goto errout;
4132 			}
4133 
4134 			lszs[level] = strlen(cp) + 1;
4135 			size += lszs[level];
4136 			lnames[level] = cp;	/* points into prop string */
4137 			level++;
4138 		}
4139 	}
4140 	np[npi++] = lvals[level - 1];
4141 	if (level == 0) {	/* ended with a name */
4142 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4143 		goto errout;
4144 	}
4145 	FINISH_COMP;
4146 
4147 
4148 	/*
4149 	 * Now we have a list of components--we have to return instead an
4150 	 * array of them, but we can just copy the top level and leave
4151 	 * the rest as is
4152 	 */
4153 	(void) e_pm_create_components(dip, components);
4154 	for (i = 0; i < components; i++)
4155 		e_pm_set_max_power(dip, i, np[i]);
4156 
4157 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4158 	for (i = 0, p = phead; i < components; i++) {
4159 		ASSERT(p);
4160 		/*
4161 		 * Now sanity-check values:  levels must be monotonically
4162 		 * increasing
4163 		 */
4164 		if (p->comp->pmc_numlevels < 2) {
4165 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4166 			    "levels\n", pmf,
4167 			    p->comp->pmc_name, PM_DEVICE(dip),
4168 			    p->comp->pmc_numlevels))
4169 			goto errout;
4170 		}
4171 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4172 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4173 			    (p->comp->pmc_lvals[j] <=
4174 			    p->comp->pmc_lvals[j - 1]))) {
4175 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4176 				    "not mono. incr, %d follows %d\n", pmf,
4177 				    p->comp->pmc_name, PM_DEVICE(dip),
4178 				    p->comp->pmc_lvals[j],
4179 				    p->comp->pmc_lvals[j - 1]))
4180 				goto errout;
4181 			}
4182 		}
4183 		ret[i] = *p->comp;	/* struct assignment */
4184 		for (j = 0; j < i; j++) {
4185 			/*
4186 			 * Test for unique component names
4187 			 */
4188 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4189 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4190 				    "unique\n", pmf, ret[j].pmc_name,
4191 				    PM_DEVICE(dip)))
4192 				goto errout;
4193 			}
4194 		}
4195 		ptail = p;
4196 		p = p->next;
4197 		phead = p;	/* errout depends on phead making sense */
4198 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4199 		kmem_free(ptail, sizeof (*ptail));
4200 	}
4201 out:
4202 	ddi_prop_free(pp);
4203 	if (lvals)
4204 		kmem_free(lvals, nelems * sizeof (int));
4205 	if (lszs)
4206 		kmem_free(lszs, nelems * sizeof (int));
4207 	if (lnames)
4208 		kmem_free(lnames, nelems * sizeof (char *));
4209 	if (np)
4210 		kmem_free(np, nelems * sizeof (int));
4211 	return (ret);
4212 
4213 errout:
4214 	e_pm_destroy_components(dip);
4215 	*errp = 1;	/* signal failure */
4216 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4217 	for (i = 0; i < nelems - 1; i++)
4218 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4219 	if (nelems != 0)
4220 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4221 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4222 	for (p = phead; p; ) {
4223 		pm_comp_t *pp;
4224 		int n;
4225 
4226 		ptail = p;
4227 		/*
4228 		 * Free component data structures
4229 		 */
4230 		pp = p->comp;
4231 		n = pp->pmc_numlevels;
4232 		if (pp->pmc_name_sz) {
4233 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4234 		}
4235 		if (pp->pmc_lnames_sz) {
4236 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4237 		}
4238 		if (pp->pmc_lnames) {
4239 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4240 		}
4241 		if (pp->pmc_thresh) {
4242 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4243 		}
4244 		if (pp->pmc_lvals) {
4245 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4246 		}
4247 		p = ptail->next;
4248 		kmem_free(ptail, sizeof (*ptail));
4249 	}
4250 	if (ret != NULL)
4251 		kmem_free(ret, components * sizeof (pm_comp_t));
4252 	ret = NULL;
4253 	goto out;
4254 }
4255 
4256 /*
4257  * Set threshold values for a devices components by dividing the target
4258  * threshold (base) by the number of transitions and assign each transition
4259  * that threshold.  This will get the entire device down in the target time if
4260  * all components are idle and even if there are dependencies among components.
4261  *
4262  * Devices may well get powered all the way down before the target time, but
4263  * at least the EPA will be happy.
4264  */
4265 void
4266 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4267 {
4268 	PMD_FUNC(pmf, "set_device_threshold")
4269 	int target_threshold = (base * 95) / 100;
4270 	int level, comp;		/* loop counters */
4271 	int transitions = 0;
4272 	int ncomp = PM_NUMCMPTS(dip);
4273 	int thresh;
4274 	int remainder;
4275 	pm_comp_t *pmc;
4276 	int i, circ;
4277 
4278 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4279 	PM_LOCK_DIP(dip);
4280 	/*
4281 	 * First we handle the easy one.  If we're setting the default
4282 	 * threshold for a node with children, then we set it to the
4283 	 * default nexus threshold (currently 0) and mark it as default
4284 	 * nexus threshold instead
4285 	 */
4286 	if (PM_IS_NEXUS(dip)) {
4287 		if (flag == PMC_DEF_THRESH) {
4288 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4289 			    PM_DEVICE(dip)))
4290 			thresh = pm_default_nexus_threshold;
4291 			for (comp = 0; comp < ncomp; comp++) {
4292 				pmc = &PM_CP(dip, comp)->pmc_comp;
4293 				for (level = 1; level < pmc->pmc_numlevels;
4294 				    level++) {
4295 					pmc->pmc_thresh[level] = thresh;
4296 				}
4297 			}
4298 			DEVI(dip)->devi_pm_dev_thresh =
4299 			    pm_default_nexus_threshold;
4300 			/*
4301 			 * If the nexus node is being reconfigured back to
4302 			 * the default threshold, adjust the notlowest count.
4303 			 */
4304 			if (DEVI(dip)->devi_pm_flags &
4305 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4306 				PM_LOCK_POWER(dip, &circ);
4307 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4308 					if (PM_CURPOWER(dip, i) == 0)
4309 						continue;
4310 					mutex_enter(&pm_compcnt_lock);
4311 					ASSERT(pm_comps_notlowest);
4312 					pm_comps_notlowest--;
4313 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4314 					    "notlowest to %d\n", pmf,
4315 					    PM_DEVICE(dip), pm_comps_notlowest))
4316 					if (pm_comps_notlowest == 0)
4317 						pm_ppm_notify_all_lowest(dip,
4318 						    PM_ALL_LOWEST);
4319 					mutex_exit(&pm_compcnt_lock);
4320 				}
4321 				PM_UNLOCK_POWER(dip, circ);
4322 			}
4323 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4324 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4325 			PM_UNLOCK_DIP(dip);
4326 			return;
4327 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4328 			/*
4329 			 * If the nexus node is being configured for a
4330 			 * non-default threshold, include that node in
4331 			 * the notlowest accounting.
4332 			 */
4333 			PM_LOCK_POWER(dip, &circ);
4334 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4335 				if (PM_CURPOWER(dip, i) == 0)
4336 					continue;
4337 				mutex_enter(&pm_compcnt_lock);
4338 				if (pm_comps_notlowest == 0)
4339 					pm_ppm_notify_all_lowest(dip,
4340 					    PM_NOT_ALL_LOWEST);
4341 				pm_comps_notlowest++;
4342 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4343 				    "notlowest to %d\n", pmf,
4344 				    PM_DEVICE(dip), pm_comps_notlowest))
4345 				mutex_exit(&pm_compcnt_lock);
4346 			}
4347 			PM_UNLOCK_POWER(dip, circ);
4348 		}
4349 	}
4350 	/*
4351 	 * Compute the total number of transitions for all components
4352 	 * of the device.  Distribute the threshold evenly over them
4353 	 */
4354 	for (comp = 0; comp < ncomp; comp++) {
4355 		pmc = &PM_CP(dip, comp)->pmc_comp;
4356 		ASSERT(pmc->pmc_numlevels > 1);
4357 		transitions += pmc->pmc_numlevels - 1;
4358 	}
4359 	ASSERT(transitions);
4360 	thresh = target_threshold / transitions;
4361 
4362 	for (comp = 0; comp < ncomp; comp++) {
4363 		pmc = &PM_CP(dip, comp)->pmc_comp;
4364 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4365 			pmc->pmc_thresh[level] = thresh;
4366 		}
4367 	}
4368 
4369 #ifdef DEBUG
4370 	for (comp = 0; comp < ncomp; comp++) {
4371 		pmc = &PM_CP(dip, comp)->pmc_comp;
4372 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4373 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4374 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4375 			    comp, level, pmc->pmc_thresh[level]))
4376 		}
4377 	}
4378 #endif
4379 	/*
4380 	 * Distribute any remainder till they are all gone
4381 	 */
4382 	remainder = target_threshold - thresh * transitions;
4383 	level = 1;
4384 #ifdef DEBUG
4385 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4386 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4387 	    transitions))
4388 #endif
4389 	while (remainder > 0) {
4390 		comp = 0;
4391 		while (remainder && (comp < ncomp)) {
4392 			pmc = &PM_CP(dip, comp)->pmc_comp;
4393 			if (level < pmc->pmc_numlevels) {
4394 				pmc->pmc_thresh[level] += 1;
4395 				remainder--;
4396 			}
4397 			comp++;
4398 		}
4399 		level++;
4400 	}
4401 #ifdef DEBUG
4402 	for (comp = 0; comp < ncomp; comp++) {
4403 		pmc = &PM_CP(dip, comp)->pmc_comp;
4404 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4405 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4406 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4407 			    comp, level, pmc->pmc_thresh[level]))
4408 		}
4409 	}
4410 #endif
4411 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4412 	DEVI(dip)->devi_pm_dev_thresh = base;
4413 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4414 	DEVI(dip)->devi_pm_flags |= flag;
4415 	PM_UNLOCK_DIP(dip);
4416 }
4417 
4418 /*
4419  * Called when there is no old-style platform power management driver
4420  */
4421 static int
4422 ddi_no_platform_power(power_req_t *req)
4423 {
4424 	_NOTE(ARGUNUSED(req))
4425 	return (DDI_FAILURE);
4426 }
4427 
4428 /*
4429  * This function calls the entry point supplied by the platform-specific
4430  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4431  * The use of global for getting the  function name from platform-specific
4432  * pm driver is not ideal, but it is simple and efficient.
4433  * The previous property lookup was being done in the idle loop on swift
4434  * systems without pmc chips and hurt deskbench performance as well as
4435  * violating scheduler locking rules
4436  */
4437 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4438 
4439 /*
4440  * Old obsolete interface for a device to request a power change (but only
4441  * an increase in power)
4442  */
4443 int
4444 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4445 {
4446 	return (pm_raise_power(dip, cmpt, level));
4447 }
4448 
4449 /*
4450  * The old obsolete interface to platform power management.  Only used by
4451  * Gypsy platform and APM on X86.
4452  */
4453 int
4454 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4455 {
4456 	power_req_t	request;
4457 
4458 	request.request_type = PMR_SET_POWER;
4459 	request.req.set_power_req.who = dip;
4460 	request.req.set_power_req.cmpt = pm_cmpt;
4461 	request.req.set_power_req.level = pm_level;
4462 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4463 }
4464 
4465 /*
4466  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4467  * passed.  Returns true if subsequent processing could result in power being
4468  * removed from the device.  The arg is not currently used because it is
4469  * implicit in the operation of cpr/DR.
4470  */
4471 int
4472 ddi_removing_power(dev_info_t *dip)
4473 {
4474 	_NOTE(ARGUNUSED(dip))
4475 	return (pm_powering_down);
4476 }
4477 
4478 /*
4479  * Returns true if a device indicates that its parent handles suspend/resume
4480  * processing for it.
4481  */
4482 int
4483 e_ddi_parental_suspend_resume(dev_info_t *dip)
4484 {
4485 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4486 }
4487 
4488 /*
4489  * Called for devices which indicate that their parent does suspend/resume
4490  * handling for them
4491  */
4492 int
4493 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4494 {
4495 	power_req_t	request;
4496 	request.request_type = PMR_SUSPEND;
4497 	request.req.suspend_req.who = dip;
4498 	request.req.suspend_req.cmd = cmd;
4499 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4500 }
4501 
4502 /*
4503  * Called for devices which indicate that their parent does suspend/resume
4504  * handling for them
4505  */
4506 int
4507 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4508 {
4509 	power_req_t	request;
4510 	request.request_type = PMR_RESUME;
4511 	request.req.resume_req.who = dip;
4512 	request.req.resume_req.cmd = cmd;
4513 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4514 }
4515 
4516 /*
4517  * Old obsolete exported interface for drivers to create components.
4518  * This is now handled by exporting the pm-components property.
4519  */
4520 int
4521 pm_create_components(dev_info_t *dip, int num_components)
4522 {
4523 	PMD_FUNC(pmf, "pm_create_components")
4524 
4525 	if (num_components < 1)
4526 		return (DDI_FAILURE);
4527 
4528 	if (!DEVI_IS_ATTACHING(dip)) {
4529 		return (DDI_FAILURE);
4530 	}
4531 
4532 	/* don't need to lock dip because attach is single threaded */
4533 	if (DEVI(dip)->devi_pm_components) {
4534 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4535 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4536 		return (DDI_FAILURE);
4537 	}
4538 	e_pm_create_components(dip, num_components);
4539 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4540 	e_pm_default_components(dip, num_components);
4541 	return (DDI_SUCCESS);
4542 }
4543 
4544 /*
4545  * Obsolete interface previously called by drivers to destroy their components
4546  * at detach time.  This is now done automatically.  However, we need to keep
4547  * this for the old drivers.
4548  */
4549 void
4550 pm_destroy_components(dev_info_t *dip)
4551 {
4552 	PMD_FUNC(pmf, "pm_destroy_components")
4553 	dev_info_t *pdip = ddi_get_parent(dip);
4554 
4555 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4556 	    PM_DEVICE(dip)))
4557 	ASSERT(DEVI_IS_DETACHING(dip));
4558 #ifdef DEBUG
4559 	if (!PM_ISBC(dip))
4560 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4561 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4562 		    PM_ADDR(dip));
4563 #endif
4564 	/*
4565 	 * We ignore this unless this is an old-style driver, except for
4566 	 * printing the message above
4567 	 */
4568 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4569 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4570 		    PM_DEVICE(dip)))
4571 		return;
4572 	}
4573 	ASSERT(PM_GET_PM_INFO(dip));
4574 
4575 	/*
4576 	 * pm_unmanage will clear info pointer later, after dealing with
4577 	 * dependencies
4578 	 */
4579 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4580 	/*
4581 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4582 	 * Parents that get notification are not adjusted because their
4583 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4584 	 */
4585 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4586 		pm_rele_power(pdip);
4587 #ifdef DEBUG
4588 	else {
4589 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4590 		    pmf, PM_DEVICE(dip)))
4591 	}
4592 #endif
4593 	e_pm_destroy_components(dip);
4594 	/*
4595 	 * Forget we ever knew anything about the components of this  device
4596 	 */
4597 	DEVI(dip)->devi_pm_flags &=
4598 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4599 }
4600 
4601 /*
4602  * Exported interface for a driver to set a component busy.
4603  */
4604 int
4605 pm_busy_component(dev_info_t *dip, int cmpt)
4606 {
4607 	struct pm_component *cp;
4608 
4609 	ASSERT(dip != NULL);
4610 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4611 		return (DDI_FAILURE);
4612 	PM_LOCK_BUSY(dip);
4613 	cp->pmc_busycount++;
4614 	cp->pmc_timestamp = 0;
4615 	PM_UNLOCK_BUSY(dip);
4616 	return (DDI_SUCCESS);
4617 }
4618 
4619 /*
4620  * Exported interface for a driver to set a component idle.
4621  */
4622 int
4623 pm_idle_component(dev_info_t *dip, int cmpt)
4624 {
4625 	PMD_FUNC(pmf, "pm_idle_component")
4626 	struct pm_component *cp;
4627 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4628 
4629 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4630 		return (DDI_FAILURE);
4631 
4632 	PM_LOCK_BUSY(dip);
4633 	if (cp->pmc_busycount) {
4634 		if (--(cp->pmc_busycount) == 0)
4635 			cp->pmc_timestamp = gethrestime_sec();
4636 	} else {
4637 		cp->pmc_timestamp = gethrestime_sec();
4638 	}
4639 
4640 	PM_UNLOCK_BUSY(dip);
4641 
4642 	/*
4643 	 * if device becomes idle during idle down period, try scan it down
4644 	 */
4645 	if (scanp && PM_IS_PID(dip)) {
4646 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4647 		    PM_DEVICE(dip)))
4648 		pm_rescan(dip);
4649 		return (DDI_SUCCESS);
4650 	}
4651 
4652 	/*
4653 	 * handle scan not running with nexus threshold == 0
4654 	 */
4655 
4656 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4657 		pm_rescan(dip);
4658 	}
4659 
4660 	return (DDI_SUCCESS);
4661 }
4662 
4663 /*
4664  * This is the old  obsolete interface called by drivers to set their normal
4665  * power.  Thus we can't fix its behavior or return a value.
4666  * This functionality is replaced by the pm-component property.
4667  * We'll only get components destroyed while no power management is
4668  * going on (and the device is detached), so we don't need a mutex here
4669  */
4670 void
4671 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4672 {
4673 	PMD_FUNC(pmf, "set_normal_power")
4674 #ifdef DEBUG
4675 	if (!PM_ISBC(dip))
4676 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4677 		    "(driver exporting pm-components property) ignored",
4678 		    PM_NAME(dip), PM_ADDR(dip));
4679 #endif
4680 	if (PM_ISBC(dip)) {
4681 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4682 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4683 		e_pm_set_max_power(dip, comp, level);
4684 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4685 	}
4686 }
4687 
4688 /*
4689  * Called on a successfully detached driver to free pm resources
4690  */
4691 static void
4692 pm_stop(dev_info_t *dip)
4693 {
4694 	PMD_FUNC(pmf, "stop")
4695 	dev_info_t *pdip = ddi_get_parent(dip);
4696 
4697 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4698 	/* stopping scan, destroy scan data structure */
4699 	if (!PM_ISBC(dip)) {
4700 		pm_scan_stop(dip);
4701 		pm_scan_fini(dip);
4702 	}
4703 
4704 	if (PM_GET_PM_INFO(dip) != NULL) {
4705 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4706 			/*
4707 			 * Old style driver may have called
4708 			 * pm_destroy_components already, but just in case ...
4709 			 */
4710 			e_pm_destroy_components(dip);
4711 		} else {
4712 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4713 			    pmf, PM_DEVICE(dip)))
4714 		}
4715 	} else {
4716 		if (PM_NUMCMPTS(dip))
4717 			e_pm_destroy_components(dip);
4718 		else {
4719 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4720 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4721 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4722 					pm_rele_power(pdip);
4723 				} else if (pdip &&
4724 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4725 					(void) mdi_power(pdip,
4726 					    MDI_PM_RELE_POWER,
4727 					    (void *)dip, NULL, 0);
4728 				}
4729 			}
4730 		}
4731 	}
4732 }
4733 
4734 /*
4735  * The node is the subject of a reparse pm props ioctl. Throw away the old
4736  * info and start over.
4737  */
4738 int
4739 e_new_pm_props(dev_info_t *dip)
4740 {
4741 	if (PM_GET_PM_INFO(dip) != NULL) {
4742 		pm_stop(dip);
4743 
4744 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4745 			return (DDI_FAILURE);
4746 		}
4747 	}
4748 	e_pm_props(dip);
4749 	return (DDI_SUCCESS);
4750 }
4751 
4752 /*
4753  * Device has been attached, so process its pm properties
4754  */
4755 void
4756 e_pm_props(dev_info_t *dip)
4757 {
4758 	char *pp;
4759 	int len;
4760 	int flags = 0;
4761 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4762 
4763 	/*
4764 	 * It doesn't matter if we do this more than once, we should always
4765 	 * get the same answers, and if not, then the last one in is the
4766 	 * best one.
4767 	 */
4768 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4769 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4770 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4771 			flags = PMC_NEEDS_SR;
4772 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4773 			flags = PMC_NO_SR;
4774 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4775 			flags = PMC_PARENTAL_SR;
4776 		} else {
4777 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4778 			    "%s property value '%s'", PM_NAME(dip),
4779 			    PM_ADDR(dip), "pm-hardware-state", pp);
4780 		}
4781 		kmem_free(pp, len);
4782 	}
4783 	/*
4784 	 * This next segment (PMC_WANTS_NOTIFY) is in
4785 	 * support of nexus drivers which will want to be involved in
4786 	 * (or at least notified of) their child node's power level transitions.
4787 	 * "pm-want-child-notification?" is defined by the parent.
4788 	 */
4789 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4790 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4791 		flags |= PMC_WANTS_NOTIFY;
4792 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4793 	    dip, propflag, "pm-want-child-notification?"));
4794 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4795 	    "no-involuntary-power-cycles"))
4796 		flags |= PMC_NO_INVOL;
4797 	/*
4798 	 * Is the device a CPU device?
4799 	 */
4800 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4801 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4802 		if (strcmp(pp, "CPU") == 0) {
4803 			flags |= PMC_CPU_DEVICE;
4804 		} else {
4805 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4806 			    "%s property value '%s'", PM_NAME(dip),
4807 			    PM_ADDR(dip), "pm-class", pp);
4808 		}
4809 		kmem_free(pp, len);
4810 	}
4811 	/* devfs single threads us */
4812 	DEVI(dip)->devi_pm_flags |= flags;
4813 }
4814 
4815 /*
4816  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4817  * driver which has claimed a node.
4818  * Sets old_power in arg struct.
4819  */
4820 static int
4821 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4822     ddi_ctl_enum_t ctlop, void *arg, void *result)
4823 {
4824 	_NOTE(ARGUNUSED(dip))
4825 	PMD_FUNC(pmf, "ctlops")
4826 	power_req_t *reqp = (power_req_t *)arg;
4827 	int retval;
4828 	dev_info_t *target_dip;
4829 	int new_level, old_level, cmpt;
4830 #ifdef PMDDEBUG
4831 	char *format;
4832 #endif
4833 
4834 	/*
4835 	 * The interface for doing the actual power level changes is now
4836 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4837 	 * different platform-specific power control drivers.
4838 	 *
4839 	 * This driver implements the "default" version of this interface.
4840 	 * If no ppm driver has been installed then this interface is called
4841 	 * instead.
4842 	 */
4843 	ASSERT(dip == NULL);
4844 	switch (ctlop) {
4845 	case DDI_CTLOPS_POWER:
4846 		switch (reqp->request_type) {
4847 		case PMR_PPM_SET_POWER:
4848 		{
4849 			target_dip = reqp->req.ppm_set_power_req.who;
4850 			ASSERT(target_dip == rdip);
4851 			new_level = reqp->req.ppm_set_power_req.new_level;
4852 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4853 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4854 			old_level = PM_CURPOWER(target_dip, cmpt);
4855 			reqp->req.ppm_set_power_req.old_level = old_level;
4856 			retval = pm_power(target_dip, cmpt, new_level);
4857 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4858 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4859 			    old_level, new_level, (retval == DDI_SUCCESS ?
4860 			    "chd" : "no chg")))
4861 			return (retval);
4862 		}
4863 
4864 		case PMR_PPM_PRE_DETACH:
4865 		case PMR_PPM_POST_DETACH:
4866 		case PMR_PPM_PRE_ATTACH:
4867 		case PMR_PPM_POST_ATTACH:
4868 		case PMR_PPM_PRE_PROBE:
4869 		case PMR_PPM_POST_PROBE:
4870 		case PMR_PPM_PRE_RESUME:
4871 		case PMR_PPM_INIT_CHILD:
4872 		case PMR_PPM_UNINIT_CHILD:
4873 #ifdef PMDDEBUG
4874 			switch (reqp->request_type) {
4875 				case PMR_PPM_PRE_DETACH:
4876 					format = "%s: PMR_PPM_PRE_DETACH "
4877 					    "%s@%s(%s#%d)\n";
4878 					break;
4879 				case PMR_PPM_POST_DETACH:
4880 					format = "%s: PMR_PPM_POST_DETACH "
4881 					    "%s@%s(%s#%d) rets %d\n";
4882 					break;
4883 				case PMR_PPM_PRE_ATTACH:
4884 					format = "%s: PMR_PPM_PRE_ATTACH "
4885 					    "%s@%s(%s#%d)\n";
4886 					break;
4887 				case PMR_PPM_POST_ATTACH:
4888 					format = "%s: PMR_PPM_POST_ATTACH "
4889 					    "%s@%s(%s#%d) rets %d\n";
4890 					break;
4891 				case PMR_PPM_PRE_PROBE:
4892 					format = "%s: PMR_PPM_PRE_PROBE "
4893 					    "%s@%s(%s#%d)\n";
4894 					break;
4895 				case PMR_PPM_POST_PROBE:
4896 					format = "%s: PMR_PPM_POST_PROBE "
4897 					    "%s@%s(%s#%d) rets %d\n";
4898 					break;
4899 				case PMR_PPM_PRE_RESUME:
4900 					format = "%s: PMR_PPM_PRE_RESUME "
4901 					    "%s@%s(%s#%d) rets %d\n";
4902 					break;
4903 				case PMR_PPM_INIT_CHILD:
4904 					format = "%s: PMR_PPM_INIT_CHILD "
4905 					    "%s@%s(%s#%d)\n";
4906 					break;
4907 				case PMR_PPM_UNINIT_CHILD:
4908 					format = "%s: PMR_PPM_UNINIT_CHILD "
4909 					    "%s@%s(%s#%d)\n";
4910 					break;
4911 				default:
4912 					break;
4913 			}
4914 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4915 			    reqp->req.ppm_config_req.result))
4916 #endif
4917 			return (DDI_SUCCESS);
4918 
4919 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4920 			/*
4921 			 * Nothing for us to do
4922 			 */
4923 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4924 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4925 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4926 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4927 			    reqp->req.ppm_notify_level_req.cmpt,
4928 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4929 			    reqp->req.ppm_notify_level_req.cmpt),
4930 			    reqp->req.ppm_notify_level_req.new_level))
4931 			return (DDI_SUCCESS);
4932 
4933 		case PMR_PPM_UNMANAGE:
4934 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4935 			    pmf, PM_DEVICE(rdip)))
4936 			return (DDI_SUCCESS);
4937 
4938 		case PMR_PPM_LOCK_POWER:
4939 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4940 			    reqp->req.ppm_lock_power_req.circp);
4941 			return (DDI_SUCCESS);
4942 
4943 		case PMR_PPM_UNLOCK_POWER:
4944 			pm_unlock_power_single(
4945 			    reqp->req.ppm_unlock_power_req.who,
4946 			    reqp->req.ppm_unlock_power_req.circ);
4947 			return (DDI_SUCCESS);
4948 
4949 		case PMR_PPM_TRY_LOCK_POWER:
4950 			*(int *)result = pm_try_locking_power_single(
4951 			    reqp->req.ppm_lock_power_req.who,
4952 			    reqp->req.ppm_lock_power_req.circp);
4953 			return (DDI_SUCCESS);
4954 
4955 		case PMR_PPM_POWER_LOCK_OWNER:
4956 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4957 			ASSERT(target_dip == rdip);
4958 			reqp->req.ppm_power_lock_owner_req.owner =
4959 			    DEVI(rdip)->devi_busy_thread;
4960 			return (DDI_SUCCESS);
4961 		default:
4962 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4963 			return (DDI_FAILURE);
4964 		}
4965 
4966 	default:
4967 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4968 		return (DDI_FAILURE);
4969 	}
4970 }
4971 
4972 /*
4973  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4974  * power_ops struct for this functionality instead?
4975  * However, we only ever do this on a ppm driver.
4976  */
4977 int
4978 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4979 {
4980 	int (*fp)();
4981 
4982 	/* if no ppm handler, call the default routine */
4983 	if (d == NULL) {
4984 		return (pm_default_ctlops(d, r, op, a, v));
4985 	}
4986 	if (!d || !r)
4987 		return (DDI_FAILURE);
4988 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4989 	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4990 
4991 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4992 	return ((*fp)(d, r, op, a, v));
4993 }
4994 
4995 /*
4996  * Called on a node when attach completes or the driver makes its first pm
4997  * call (whichever comes first).
4998  * In the attach case, device may not be power manageable at all.
4999  * Don't need to lock the dip because we're single threaded by the devfs code
5000  */
5001 static int
5002 pm_start(dev_info_t *dip)
5003 {
5004 	PMD_FUNC(pmf, "start")
5005 	int ret;
5006 	dev_info_t *pdip = ddi_get_parent(dip);
5007 	int e_pm_manage(dev_info_t *, int);
5008 	void pm_noinvol_specd(dev_info_t *dip);
5009 
5010 	e_pm_props(dip);
5011 	pm_noinvol_specd(dip);
5012 	/*
5013 	 * If this dip has already been processed, don't mess with it
5014 	 * (but decrement the speculative count we did above, as whatever
5015 	 * code put it under pm already will have dealt with it)
5016 	 */
5017 	if (PM_GET_PM_INFO(dip)) {
5018 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
5019 		    pmf, PM_DEVICE(dip)))
5020 		return (0);
5021 	}
5022 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
5023 
5024 	if (PM_GET_PM_INFO(dip) == NULL) {
5025 		/*
5026 		 * keep the kidsupcount increment as is
5027 		 */
5028 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
5029 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
5030 			pm_hold_power(pdip);
5031 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
5032 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
5033 			    (void *)dip, NULL, 0);
5034 		}
5035 
5036 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
5037 		    "left up\n", pmf, PM_DEVICE(dip)))
5038 	}
5039 
5040 	return (ret);
5041 }
5042 
5043 /*
5044  * Keep a list of recorded thresholds.  For now we just keep a list and
5045  * search it linearly.  We don't expect too many entries.  Can always hash it
5046  * later if we need to.
5047  */
5048 void
5049 pm_record_thresh(pm_thresh_rec_t *rp)
5050 {
5051 	pm_thresh_rec_t *pptr, *ptr;
5052 
5053 	ASSERT(*rp->ptr_physpath);
5054 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
5055 	for (pptr = NULL, ptr = pm_thresh_head;
5056 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
5057 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
5058 			/* replace this one */
5059 			rp->ptr_next = ptr->ptr_next;
5060 			if (pptr) {
5061 				pptr->ptr_next = rp;
5062 			} else {
5063 				pm_thresh_head = rp;
5064 			}
5065 			rw_exit(&pm_thresh_rwlock);
5066 			kmem_free(ptr, ptr->ptr_size);
5067 			return;
5068 		}
5069 		continue;
5070 	}
5071 	/*
5072 	 * There was not a match in the list, insert this one in front
5073 	 */
5074 	if (pm_thresh_head) {
5075 		rp->ptr_next = pm_thresh_head;
5076 		pm_thresh_head = rp;
5077 	} else {
5078 		rp->ptr_next = NULL;
5079 		pm_thresh_head = rp;
5080 	}
5081 	rw_exit(&pm_thresh_rwlock);
5082 }
5083 
5084 /*
5085  * Create a new dependency record and hang a new dependency entry off of it
5086  */
5087 pm_pdr_t *
5088 newpdr(char *kept, char *keeps, int isprop)
5089 {
5090 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5091 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5092 	p->pdr_size = size;
5093 	p->pdr_isprop = isprop;
5094 	p->pdr_kept_paths = NULL;
5095 	p->pdr_kept_count = 0;
5096 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5097 	(void) strcpy(p->pdr_kept, kept);
5098 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5099 	(void) strcpy(p->pdr_keeper, keeps);
5100 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5101 	    (intptr_t)p + size);
5102 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5103 	    (intptr_t)p + size);
5104 	return (p);
5105 }
5106 
5107 /*
5108  * Keep a list of recorded dependencies.  We only keep the
5109  * keeper -> kept list for simplification. At this point We do not
5110  * care about whether the devices are attached or not yet,
5111  * this would be done in pm_keeper() and pm_kept().
5112  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5113  * and it is up to the user to issue the ioctl again if they want it
5114  * (e.g. pmconfig)
5115  * Returns true if dependency already exists in the list.
5116  */
5117 int
5118 pm_record_keeper(char *kept, char *keeper, int isprop)
5119 {
5120 	PMD_FUNC(pmf, "record_keeper")
5121 	pm_pdr_t *npdr, *ppdr, *pdr;
5122 
5123 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5124 	ASSERT(kept && keeper);
5125 #ifdef DEBUG
5126 	if (pm_debug & PMD_KEEPS)
5127 		prdeps("pm_record_keeper entry");
5128 #endif
5129 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5130 	    ppdr = pdr, pdr = pdr->pdr_next) {
5131 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5132 		    pdr->pdr_keeper))
5133 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5134 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5135 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5136 			return (1);
5137 		}
5138 	}
5139 	/*
5140 	 * We did not find any match, so we have to make an entry
5141 	 */
5142 	npdr = newpdr(kept, keeper, isprop);
5143 	if (ppdr) {
5144 		ASSERT(ppdr->pdr_next == NULL);
5145 		ppdr->pdr_next = npdr;
5146 	} else {
5147 		ASSERT(pm_dep_head == NULL);
5148 		pm_dep_head = npdr;
5149 	}
5150 #ifdef DEBUG
5151 	if (pm_debug & PMD_KEEPS)
5152 		prdeps("pm_record_keeper after new record");
5153 #endif
5154 	if (!isprop)
5155 		pm_unresolved_deps++;
5156 	else
5157 		pm_prop_deps++;
5158 	return (0);
5159 }
5160 
5161 /*
5162  * Look up this device in the set of devices we've seen ioctls for
5163  * to see if we are holding a threshold spec for it.  If so, make it so.
5164  * At ioctl time, we were given the physical path of the device.
5165  */
5166 int
5167 pm_thresh_specd(dev_info_t *dip)
5168 {
5169 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5170 	char *path = 0;
5171 	char pathbuf[MAXNAMELEN];
5172 	pm_thresh_rec_t *rp;
5173 
5174 	path = ddi_pathname(dip, pathbuf);
5175 
5176 	rw_enter(&pm_thresh_rwlock, RW_READER);
5177 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5178 		if (strcmp(rp->ptr_physpath, path) != 0)
5179 			continue;
5180 		pm_apply_recorded_thresh(dip, rp);
5181 		rw_exit(&pm_thresh_rwlock);
5182 		return (1);
5183 	}
5184 	rw_exit(&pm_thresh_rwlock);
5185 	return (0);
5186 }
5187 
5188 static int
5189 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5190 {
5191 	PMD_FUNC(pmf, "set_keeping")
5192 	pm_info_t *kept_info;
5193 	int j, up = 0, circ;
5194 	void prdeps(char *);
5195 
5196 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5197 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5198 #ifdef DEBUG
5199 	if (pm_debug & PMD_KEEPS)
5200 		prdeps("Before PAD\n");
5201 #endif
5202 	ASSERT(keeper != kept);
5203 	if (PM_GET_PM_INFO(keeper) == NULL) {
5204 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5205 		    "%s@%s(%s#%d), but the latter is not power managed",
5206 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5207 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5208 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5209 		return (0);
5210 	}
5211 	kept_info = PM_GET_PM_INFO(kept);
5212 	ASSERT(kept_info);
5213 	PM_LOCK_POWER(keeper, &circ);
5214 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5215 		if (PM_CURPOWER(keeper, j)) {
5216 			up++;
5217 			break;
5218 		}
5219 	}
5220 	if (up) {
5221 		/* Bringup and maintain a hold on the kept */
5222 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5223 		    PM_DEVICE(kept)))
5224 		bring_pmdep_up(kept, 1);
5225 	}
5226 	PM_UNLOCK_POWER(keeper, circ);
5227 #ifdef DEBUG
5228 	if (pm_debug & PMD_KEEPS)
5229 		prdeps("After PAD\n");
5230 #endif
5231 	return (1);
5232 }
5233 
5234 /*
5235  * Should this device keep up another device?
5236  * Look up this device in the set of devices we've seen ioctls for
5237  * to see if we are holding a dependency spec for it.  If so, make it so.
5238  * Because we require the kept device to be attached already in order to
5239  * make the list entry (and hold it), we only need to look for keepers.
5240  * At ioctl time, we were given the physical path of the device.
5241  */
5242 int
5243 pm_keeper(char *keeper)
5244 {
5245 	PMD_FUNC(pmf, "keeper")
5246 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5247 	dev_info_t *dip;
5248 	pm_pdr_t *dp;
5249 	dev_info_t *kept = NULL;
5250 	int ret = 0;
5251 	int i;
5252 
5253 	if (!pm_unresolved_deps && !pm_prop_deps)
5254 		return (0);
5255 	ASSERT(keeper != NULL);
5256 	dip = pm_name_to_dip(keeper, 1);
5257 	if (dip == NULL)
5258 		return (0);
5259 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5260 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5261 		if (!dp->pdr_isprop) {
5262 			if (!pm_unresolved_deps)
5263 				continue;
5264 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5265 			if (dp->pdr_satisfied) {
5266 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5267 				continue;
5268 			}
5269 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5270 				ret += pm_apply_recorded_dep(dip, dp);
5271 			}
5272 		} else {
5273 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5274 				continue;
5275 			for (i = 0; i < dp->pdr_kept_count; i++) {
5276 				if (dp->pdr_kept_paths[i] == NULL)
5277 					continue;
5278 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5279 				if (kept == NULL)
5280 					continue;
5281 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5282 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5283 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5284 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5285 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5286 				    dp->pdr_kept_count))
5287 				if (kept != dip) {
5288 					ret += pm_set_keeping(dip, kept);
5289 				}
5290 				ddi_release_devi(kept);
5291 			}
5292 
5293 		}
5294 	}
5295 	ddi_release_devi(dip);
5296 	return (ret);
5297 }
5298 
5299 /*
5300  * Should this device be kept up by another device?
5301  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5302  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5303  * kept device lists.
5304  */
5305 static int
5306 pm_kept(char *keptp)
5307 {
5308 	PMD_FUNC(pmf, "kept")
5309 	pm_pdr_t *dp;
5310 	int found = 0;
5311 	int ret = 0;
5312 	dev_info_t *keeper;
5313 	dev_info_t *kept;
5314 	size_t length;
5315 	int i;
5316 	char **paths;
5317 	char *path;
5318 
5319 	ASSERT(keptp != NULL);
5320 	kept = pm_name_to_dip(keptp, 1);
5321 	if (kept == NULL)
5322 		return (0);
5323 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5324 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5325 		if (dp->pdr_isprop) {
5326 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5327 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5328 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5329 				/*
5330 				 * Dont allow self dependency.
5331 				 */
5332 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5333 					continue;
5334 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5335 				if (keeper == NULL)
5336 					continue;
5337 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5338 				    "%p\n", pmf, (void *)kept))
5339 #ifdef DEBUG
5340 				if (pm_debug & PMD_DEP)
5341 					prdeps("Before Adding from pm_kept\n");
5342 #endif
5343 				/*
5344 				 * Add ourselves to the dip list.
5345 				 */
5346 				if (dp->pdr_kept_count == 0) {
5347 					length = strlen(keptp) + 1;
5348 					path =
5349 					    kmem_alloc(length, KM_SLEEP);
5350 					paths = kmem_alloc(sizeof (char **),
5351 					    KM_SLEEP);
5352 					(void) strcpy(path, keptp);
5353 					paths[0] = path;
5354 					dp->pdr_kept_paths = paths;
5355 					dp->pdr_kept_count++;
5356 				} else {
5357 					/* Check to see if already on list */
5358 					for (i = 0; i < dp->pdr_kept_count;
5359 					    i++) {
5360 						if (strcmp(keptp,
5361 						    dp->pdr_kept_paths[i])
5362 						    == 0) {
5363 							found++;
5364 							break;
5365 						}
5366 					}
5367 					if (found) {
5368 						ddi_release_devi(keeper);
5369 						continue;
5370 					}
5371 					length = dp->pdr_kept_count *
5372 					    sizeof (char **);
5373 					paths = kmem_alloc(
5374 					    length + sizeof (char **),
5375 					    KM_SLEEP);
5376 					if (dp->pdr_kept_count) {
5377 						bcopy(dp->pdr_kept_paths,
5378 						    paths, length);
5379 						kmem_free(dp->pdr_kept_paths,
5380 						    length);
5381 					}
5382 					dp->pdr_kept_paths = paths;
5383 					length = strlen(keptp) + 1;
5384 					path =
5385 					    kmem_alloc(length, KM_SLEEP);
5386 					(void) strcpy(path, keptp);
5387 					dp->pdr_kept_paths[i] = path;
5388 					dp->pdr_kept_count++;
5389 				}
5390 #ifdef DEBUG
5391 				if (pm_debug & PMD_DEP)
5392 					prdeps("After from pm_kept\n");
5393 #endif
5394 				if (keeper) {
5395 					ret += pm_set_keeping(keeper, kept);
5396 					ddi_release_devi(keeper);
5397 				}
5398 			}
5399 		} else {
5400 			/*
5401 			 * pm_keeper would be called later to do
5402 			 * the actual pm_set_keeping.
5403 			 */
5404 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5405 			    pmf, (void *)kept))
5406 #ifdef DEBUG
5407 			if (pm_debug & PMD_DEP)
5408 				prdeps("Before Adding from pm_kept\n");
5409 #endif
5410 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5411 				if (dp->pdr_kept_paths == NULL) {
5412 					length = strlen(keptp) + 1;
5413 					path =
5414 					    kmem_alloc(length, KM_SLEEP);
5415 					paths = kmem_alloc(sizeof (char **),
5416 					    KM_SLEEP);
5417 					(void) strcpy(path, keptp);
5418 					paths[0] = path;
5419 					dp->pdr_kept_paths = paths;
5420 					dp->pdr_kept_count++;
5421 				}
5422 			}
5423 #ifdef DEBUG
5424 			if (pm_debug & PMD_DEP)
5425 				prdeps("After from pm_kept\n");
5426 #endif
5427 		}
5428 	}
5429 	ddi_release_devi(kept);
5430 	return (ret);
5431 }
5432 
5433 /*
5434  * Apply a recorded dependency.  dp specifies the dependency, and
5435  * keeper is already known to be the device that keeps up the other (kept) one.
5436  * We have to the whole tree for the "kept" device, then apply
5437  * the dependency (which may already be applied).
5438  */
5439 int
5440 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5441 {
5442 	PMD_FUNC(pmf, "apply_recorded_dep")
5443 	dev_info_t *kept = NULL;
5444 	int ret = 0;
5445 	char *keptp = NULL;
5446 
5447 	/*
5448 	 * Device to Device dependency can only be 1 to 1.
5449 	 */
5450 	if (dp->pdr_kept_paths == NULL)
5451 		return (0);
5452 	keptp = dp->pdr_kept_paths[0];
5453 	if (keptp == NULL)
5454 		return (0);
5455 	ASSERT(*keptp != '\0');
5456 	kept = pm_name_to_dip(keptp, 1);
5457 	if (kept == NULL)
5458 		return (0);
5459 	if (kept) {
5460 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5461 		    dp->pdr_keeper, keptp))
5462 		if (pm_set_keeping(keeper, kept)) {
5463 			ASSERT(dp->pdr_satisfied == 0);
5464 			dp->pdr_satisfied = 1;
5465 			ASSERT(pm_unresolved_deps);
5466 			pm_unresolved_deps--;
5467 			ret++;
5468 		}
5469 	}
5470 	ddi_release_devi(kept);
5471 
5472 	return (ret);
5473 }
5474 
5475 /*
5476  * Called from common/io/pm.c
5477  */
5478 int
5479 pm_cur_power(pm_component_t *cp)
5480 {
5481 	return (cur_power(cp));
5482 }
5483 
5484 /*
5485  * External interface to sanity-check a power level.
5486  */
5487 int
5488 pm_valid_power(dev_info_t *dip, int comp, int level)
5489 {
5490 	PMD_FUNC(pmf, "valid_power")
5491 
5492 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5493 		return (e_pm_valid_power(dip, comp, level));
5494 	else {
5495 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5496 		    pmf, comp, PM_NUMCMPTS(dip), level))
5497 		return (0);
5498 	}
5499 }
5500 
5501 /*
5502  * Called when a device that is direct power managed needs to change state.
5503  * This routine arranges to block the request until the process managing
5504  * the device makes the change (or some other incompatible change) or
5505  * the process closes /dev/pm.
5506  */
5507 static int
5508 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5509 {
5510 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5511 	int ret = 0;
5512 	void pm_dequeue_blocked(pm_rsvp_t *);
5513 	void pm_enqueue_blocked(pm_rsvp_t *);
5514 
5515 	ASSERT(!pm_processes_stopped);
5516 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5517 	new->pr_dip = dip;
5518 	new->pr_comp = comp;
5519 	new->pr_newlevel = newpower;
5520 	new->pr_oldlevel = oldpower;
5521 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5522 	mutex_enter(&pm_rsvp_lock);
5523 	pm_enqueue_blocked(new);
5524 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5525 	    PM_CANBLOCK_BLOCK);
5526 	PM_UNLOCK_DIP(dip);
5527 	/*
5528 	 * truss may make the cv_wait_sig return prematurely
5529 	 */
5530 	while (ret == 0) {
5531 		/*
5532 		 * Normally there will be no user context involved, but if
5533 		 * there is (e.g. we are here via an ioctl call to a driver)
5534 		 * then we should allow the process to abort the request,
5535 		 * or we get an unkillable process if the same thread does
5536 		 * PM_DIRECT_PM and pm_raise_power
5537 		 */
5538 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5539 			ret = PMP_FAIL;
5540 		} else {
5541 			ret = new->pr_retval;
5542 		}
5543 	}
5544 	pm_dequeue_blocked(new);
5545 	mutex_exit(&pm_rsvp_lock);
5546 	cv_destroy(&new->pr_cv);
5547 	kmem_free(new, sizeof (*new));
5548 	return (ret);
5549 }
5550 
5551 /*
5552  * Returns true if the process is interested in power level changes (has issued
5553  * PM_GET_STATE_CHANGE ioctl).
5554  */
5555 int
5556 pm_interest_registered(int clone)
5557 {
5558 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5559 	return (pm_interest[clone]);
5560 }
5561 
5562 /*
5563  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5564  * watch all state transitions (dip == NULL).  Set up data
5565  * structs to communicate with process about state changes.
5566  */
5567 void
5568 pm_register_watcher(int clone, dev_info_t *dip)
5569 {
5570 	pscc_t	*p;
5571 	psce_t	*psce;
5572 	static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5573 
5574 	/*
5575 	 * We definitely need a control struct, then we have to search to see
5576 	 * there is already an entries struct (in the dip != NULL case).
5577 	 */
5578 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5579 	pscc->pscc_clone = clone;
5580 	pscc->pscc_dip = dip;
5581 
5582 	if (dip) {
5583 		int found = 0;
5584 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5585 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5586 			/*
5587 			 * Already an entry for this clone, so just use it
5588 			 * for the new one (for the case where a single
5589 			 * process is watching multiple devices)
5590 			 */
5591 			if (p->pscc_clone == clone) {
5592 				pscc->pscc_entries = p->pscc_entries;
5593 				pscc->pscc_entries->psce_references++;
5594 				found++;
5595 				break;
5596 			}
5597 		}
5598 		if (!found) {		/* create a new one */
5599 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5600 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5601 			psce->psce_first =
5602 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5603 			    KM_SLEEP);
5604 			psce->psce_in = psce->psce_out = psce->psce_first;
5605 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5606 			psce->psce_references = 1;
5607 			pscc->pscc_entries = psce;
5608 		}
5609 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5610 		rw_exit(&pm_pscc_direct_rwlock);
5611 	} else {
5612 		ASSERT(!pm_interest_registered(clone));
5613 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5614 #ifdef DEBUG
5615 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5616 			/*
5617 			 * Should not be an entry for this clone!
5618 			 */
5619 			ASSERT(p->pscc_clone != clone);
5620 		}
5621 #endif
5622 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5623 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5624 		    PSCCOUNT, KM_SLEEP);
5625 		psce->psce_in = psce->psce_out = psce->psce_first;
5626 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5627 		psce->psce_references = 1;
5628 		pscc->pscc_entries = psce;
5629 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5630 		pm_interest[clone] = 1;
5631 		rw_exit(&pm_pscc_interest_rwlock);
5632 	}
5633 }
5634 
5635 /*
5636  * Remove the given entry from the blocked list
5637  */
5638 void
5639 pm_dequeue_blocked(pm_rsvp_t *p)
5640 {
5641 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5642 	if (pm_blocked_list == p) {
5643 		ASSERT(p->pr_prev == NULL);
5644 		if (p->pr_next != NULL)
5645 			p->pr_next->pr_prev = NULL;
5646 		pm_blocked_list = p->pr_next;
5647 	} else {
5648 		ASSERT(p->pr_prev != NULL);
5649 		p->pr_prev->pr_next = p->pr_next;
5650 		if (p->pr_next != NULL)
5651 			p->pr_next->pr_prev = p->pr_prev;
5652 	}
5653 }
5654 
5655 /*
5656  * Remove the given control struct from the given list
5657  */
5658 static void
5659 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5660 {
5661 	if (*list == p) {
5662 		ASSERT(p->pscc_prev == NULL);
5663 		if (p->pscc_next != NULL)
5664 			p->pscc_next->pscc_prev = NULL;
5665 		*list = p->pscc_next;
5666 	} else {
5667 		ASSERT(p->pscc_prev != NULL);
5668 		p->pscc_prev->pscc_next = p->pscc_next;
5669 		if (p->pscc_next != NULL)
5670 			p->pscc_next->pscc_prev = p->pscc_prev;
5671 	}
5672 }
5673 
5674 /*
5675  * Stick the control struct specified on the front of the list
5676  */
5677 static void
5678 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5679 {
5680 	pscc_t *h;	/* entry at head of list */
5681 	if ((h = *list) == NULL) {
5682 		*list = p;
5683 		ASSERT(p->pscc_next == NULL);
5684 		ASSERT(p->pscc_prev == NULL);
5685 	} else {
5686 		p->pscc_next = h;
5687 		ASSERT(h->pscc_prev == NULL);
5688 		h->pscc_prev = p;
5689 		ASSERT(p->pscc_prev == NULL);
5690 		*list = p;
5691 	}
5692 }
5693 
5694 /*
5695  * If dip is NULL, process is closing "clone" clean up all its registrations.
5696  * Otherwise only clean up those for dip because process is just giving up
5697  * control of a direct device.
5698  */
5699 void
5700 pm_deregister_watcher(int clone, dev_info_t *dip)
5701 {
5702 	pscc_t	*p, *pn;
5703 	psce_t	*psce;
5704 	int found = 0;
5705 
5706 	if (dip == NULL) {
5707 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5708 		for (p = pm_pscc_interest; p; p = pn) {
5709 			pn = p->pscc_next;
5710 			if (p->pscc_clone == clone) {
5711 				pm_dequeue_pscc(p, &pm_pscc_interest);
5712 				psce = p->pscc_entries;
5713 				ASSERT(psce->psce_references == 1);
5714 				mutex_destroy(&psce->psce_lock);
5715 				kmem_free(psce->psce_first,
5716 				    sizeof (pm_state_change_t) * PSCCOUNT);
5717 				kmem_free(psce, sizeof (*psce));
5718 				kmem_free(p, sizeof (*p));
5719 			}
5720 		}
5721 		pm_interest[clone] = 0;
5722 		rw_exit(&pm_pscc_interest_rwlock);
5723 	}
5724 	found = 0;
5725 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5726 	for (p = pm_pscc_direct; p; p = pn) {
5727 		pn = p->pscc_next;
5728 		if ((dip && p->pscc_dip == dip) ||
5729 		    (dip == NULL && clone == p->pscc_clone)) {
5730 			ASSERT(clone == p->pscc_clone);
5731 			found++;
5732 			/*
5733 			 * Remove from control list
5734 			 */
5735 			pm_dequeue_pscc(p, &pm_pscc_direct);
5736 			/*
5737 			 * If we're the last reference, free the
5738 			 * entries struct.
5739 			 */
5740 			psce = p->pscc_entries;
5741 			ASSERT(psce);
5742 			if (psce->psce_references == 1) {
5743 				kmem_free(psce->psce_first,
5744 				    PSCCOUNT * sizeof (pm_state_change_t));
5745 				kmem_free(psce, sizeof (*psce));
5746 			} else {
5747 				psce->psce_references--;
5748 			}
5749 			kmem_free(p, sizeof (*p));
5750 		}
5751 	}
5752 	ASSERT(dip == NULL || found);
5753 	rw_exit(&pm_pscc_direct_rwlock);
5754 }
5755 
5756 /*
5757  * Search the indicated list for an entry that matches clone, and return a
5758  * pointer to it.  To be interesting, the entry must have something ready to
5759  * be passed up to the controlling process.
5760  * The returned entry will be locked upon return from this call.
5761  */
5762 static psce_t *
5763 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5764 {
5765 	pscc_t	*p;
5766 	psce_t	*psce;
5767 	rw_enter(lock, RW_READER);
5768 	for (p = *list; p; p = p->pscc_next) {
5769 		if (clone == p->pscc_clone) {
5770 			psce = p->pscc_entries;
5771 			mutex_enter(&psce->psce_lock);
5772 			if (psce->psce_out->size) {
5773 				rw_exit(lock);
5774 				return (psce);
5775 			} else {
5776 				mutex_exit(&psce->psce_lock);
5777 			}
5778 		}
5779 	}
5780 	rw_exit(lock);
5781 	return (NULL);
5782 }
5783 
5784 /*
5785  * Find an entry for a particular clone in the direct list.
5786  */
5787 psce_t *
5788 pm_psc_clone_to_direct(int clone)
5789 {
5790 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5791 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5792 	    &pm_pscc_direct_rwlock));
5793 }
5794 
5795 /*
5796  * Find an entry for a particular clone in the interest list.
5797  */
5798 psce_t *
5799 pm_psc_clone_to_interest(int clone)
5800 {
5801 	static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5802 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5803 	    &pm_pscc_interest_rwlock));
5804 }
5805 
5806 /*
5807  * Put the given entry at the head of the blocked list
5808  */
5809 void
5810 pm_enqueue_blocked(pm_rsvp_t *p)
5811 {
5812 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5813 	ASSERT(p->pr_next == NULL);
5814 	ASSERT(p->pr_prev == NULL);
5815 	if (pm_blocked_list != NULL) {
5816 		p->pr_next = pm_blocked_list;
5817 		ASSERT(pm_blocked_list->pr_prev == NULL);
5818 		pm_blocked_list->pr_prev = p;
5819 		pm_blocked_list = p;
5820 	} else {
5821 		pm_blocked_list = p;
5822 	}
5823 }
5824 
5825 /*
5826  * Sets every power managed device back to its default threshold
5827  */
5828 void
5829 pm_all_to_default_thresholds(void)
5830 {
5831 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5832 	    (void *) &pm_system_idle_threshold);
5833 }
5834 
5835 static int
5836 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5837 {
5838 	int thr = (int)(*(int *)arg);
5839 
5840 	if (!PM_GET_PM_INFO(dip))
5841 		return (DDI_WALK_CONTINUE);
5842 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5843 	return (DDI_WALK_CONTINUE);
5844 }
5845 
5846 /*
5847  * Returns the current threshold value (in seconds) for the indicated component
5848  */
5849 int
5850 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5851 {
5852 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5853 		return (DDI_FAILURE);
5854 	} else {
5855 		*threshp = cur_threshold(dip, comp);
5856 		return (DDI_SUCCESS);
5857 	}
5858 }
5859 
5860 /*
5861  * To be called when changing the power level of a component of a device.
5862  * On some platforms, changing power on one device may require that power
5863  * be changed on other, related devices in the same transaction.  Thus, we
5864  * always pass this request to the platform power manager so that all the
5865  * affected devices will be locked.
5866  */
5867 void
5868 pm_lock_power(dev_info_t *dip, int *circp)
5869 {
5870 	power_req_t power_req;
5871 	int result;
5872 
5873 	power_req.request_type = PMR_PPM_LOCK_POWER;
5874 	power_req.req.ppm_lock_power_req.who = dip;
5875 	power_req.req.ppm_lock_power_req.circp = circp;
5876 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5877 }
5878 
5879 /*
5880  * Release the lock (or locks) acquired to change the power of a device.
5881  * See comments for pm_lock_power.
5882  */
5883 void
5884 pm_unlock_power(dev_info_t *dip, int circ)
5885 {
5886 	power_req_t power_req;
5887 	int result;
5888 
5889 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5890 	power_req.req.ppm_unlock_power_req.who = dip;
5891 	power_req.req.ppm_unlock_power_req.circ = circ;
5892 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5893 }
5894 
5895 
5896 /*
5897  * Attempt (without blocking) to acquire the lock(s) needed to change the
5898  * power of a component of a device.  See comments for pm_lock_power.
5899  *
5900  * Return: 1 if lock(s) acquired, 0 if not.
5901  */
5902 int
5903 pm_try_locking_power(dev_info_t *dip, int *circp)
5904 {
5905 	power_req_t power_req;
5906 	int result;
5907 
5908 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5909 	power_req.req.ppm_lock_power_req.who = dip;
5910 	power_req.req.ppm_lock_power_req.circp = circp;
5911 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5912 	return (result);
5913 }
5914 
5915 
5916 /*
5917  * Lock power state of a device.
5918  *
5919  * The implementation handles a special case where another thread may have
5920  * acquired the lock and created/launched this thread to do the work.  If
5921  * the lock cannot be acquired immediately, we check to see if this thread
5922  * is registered as a borrower of the lock.  If so, we may proceed without
5923  * the lock.  This assumes that the lending thread blocks on the completion
5924  * of this thread.
5925  *
5926  * Note 1: for use by ppm only.
5927  *
5928  * Note 2: On failing to get the lock immediately, we search lock_loan list
5929  * for curthread (as borrower of the lock).  On a hit, we check that the
5930  * lending thread already owns the lock we want.  It is safe to compare
5931  * devi_busy_thread and thread id of the lender because in the == case (the
5932  * only one we care about) we know that the owner is blocked.  Similarly,
5933  * If we find that curthread isn't registered as a lock borrower, it is safe
5934  * to use the blocking call (ndi_devi_enter) because we know that if we
5935  * weren't already listed as a borrower (upstream on the call stack) we won't
5936  * become one.
5937  */
5938 void
5939 pm_lock_power_single(dev_info_t *dip, int *circp)
5940 {
5941 	lock_loan_t *cur;
5942 
5943 	/* if the lock is available, we are done. */
5944 	if (ndi_devi_tryenter(dip, circp))
5945 		return;
5946 
5947 	mutex_enter(&pm_loan_lock);
5948 	/* see if our thread is registered as a lock borrower. */
5949 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5950 		if (cur->pmlk_borrower == curthread)
5951 			break;
5952 	mutex_exit(&pm_loan_lock);
5953 
5954 	/* if this thread not already registered, it is safe to block */
5955 	if (cur == NULL)
5956 		ndi_devi_enter(dip, circp);
5957 	else {
5958 		/* registered: does lender own the lock we want? */
5959 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5960 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5961 			cur->pmlk_dip = dip;
5962 		} else /* no: just block for it */
5963 			ndi_devi_enter(dip, circp);
5964 
5965 	}
5966 }
5967 
5968 /*
5969  * Drop the lock on the device's power state.  See comment for
5970  * pm_lock_power_single() for special implementation considerations.
5971  *
5972  * Note: for use by ppm only.
5973  */
5974 void
5975 pm_unlock_power_single(dev_info_t *dip, int circ)
5976 {
5977 	lock_loan_t *cur;
5978 
5979 	/* optimization: mutex not needed to check empty list */
5980 	if (lock_loan_head.pmlk_next == NULL) {
5981 		ndi_devi_exit(dip, circ);
5982 		return;
5983 	}
5984 
5985 	mutex_enter(&pm_loan_lock);
5986 	/* see if our thread is registered as a lock borrower. */
5987 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5988 		if (cur->pmlk_borrower == curthread)
5989 			break;
5990 	mutex_exit(&pm_loan_lock);
5991 
5992 	if (cur == NULL || cur->pmlk_dip != dip)
5993 		/* we acquired the lock directly, so return it */
5994 		ndi_devi_exit(dip, circ);
5995 }
5996 
5997 /*
5998  * Try to take the lock for changing the power level of a component.
5999  *
6000  * Note: for use by ppm only.
6001  */
6002 int
6003 pm_try_locking_power_single(dev_info_t *dip, int *circp)
6004 {
6005 	return (ndi_devi_tryenter(dip, circp));
6006 }
6007 
6008 #ifdef	DEBUG
6009 /*
6010  * The following are used only to print out data structures for debugging
6011  */
6012 void
6013 prdeps(char *msg)
6014 {
6015 
6016 	pm_pdr_t *rp;
6017 	int i;
6018 
6019 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
6020 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
6021 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
6022 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
6023 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
6024 		    (void *)rp->pdr_next);
6025 		if (rp->pdr_kept_count != 0) {
6026 			pm_log("kept list = ");
6027 			i = 0;
6028 			while (i < rp->pdr_kept_count) {
6029 				pm_log("%s ", rp->pdr_kept_paths[i]);
6030 				i++;
6031 			}
6032 			pm_log("\n");
6033 		}
6034 	}
6035 }
6036 
6037 void
6038 pr_noinvol(char *hdr)
6039 {
6040 	pm_noinvol_t *ip;
6041 
6042 	pm_log("%s\n", hdr);
6043 	rw_enter(&pm_noinvol_rwlock, RW_READER);
6044 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
6045 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
6046 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
6047 	rw_exit(&pm_noinvol_rwlock);
6048 }
6049 #endif
6050 
6051 /*
6052  * Attempt to apply the thresholds indicated by rp to the node specified by
6053  * dip.
6054  */
6055 void
6056 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6057 {
6058 	PMD_FUNC(pmf, "apply_recorded_thresh")
6059 	int i, j;
6060 	int comps = PM_NUMCMPTS(dip);
6061 	struct pm_component *cp;
6062 	pm_pte_t *ep;
6063 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
6064 
6065 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
6066 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
6067 	PM_LOCK_DIP(dip);
6068 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
6069 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
6070 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
6071 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
6072 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
6073 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
6074 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
6075 		PM_UNLOCK_DIP(dip);
6076 		return;
6077 	}
6078 
6079 	ep = rp->ptr_entries;
6080 	/*
6081 	 * Here we do the special case of a device threshold
6082 	 */
6083 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6084 		ASSERT(ep && ep->pte_numthresh == 1);
6085 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6086 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6087 		PM_UNLOCK_DIP(dip);
6088 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6089 		if (PM_SCANABLE(dip))
6090 			pm_rescan(dip);
6091 		return;
6092 	}
6093 	for (i = 0; i < comps; i++) {
6094 		cp = PM_CP(dip, i);
6095 		for (j = 0; j < ep->pte_numthresh; j++) {
6096 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6097 			    "to %x\n", pmf, j, PM_DEVICE(dip),
6098 			    i, ep->pte_thresh[j]))
6099 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6100 		}
6101 		ep++;
6102 	}
6103 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6104 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6105 	PM_UNLOCK_DIP(dip);
6106 
6107 	if (PM_SCANABLE(dip))
6108 		pm_rescan(dip);
6109 }
6110 
6111 /*
6112  * Returns true if the threshold specified by rp could be applied to dip
6113  * (that is, the number of components and transitions are the same)
6114  */
6115 int
6116 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6117 {
6118 	PMD_FUNC(pmf, "valid_thresh")
6119 	int comps, i;
6120 	pm_component_t *cp;
6121 	pm_pte_t *ep;
6122 
6123 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6124 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6125 		    rp->ptr_physpath))
6126 		return (0);
6127 	}
6128 	/*
6129 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6130 	 * an entry with numcomps == 0, (since we don't know how many
6131 	 * components there are in advance).  This is always a valid
6132 	 * spec.
6133 	 */
6134 	if (rp->ptr_numcomps == 0) {
6135 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6136 		return (1);
6137 	}
6138 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6139 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6140 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6141 		return (0);
6142 	}
6143 	ep = rp->ptr_entries;
6144 	for (i = 0; i < comps; i++) {
6145 		cp = PM_CP(dip, i);
6146 		if ((ep + i)->pte_numthresh !=
6147 		    cp->pmc_comp.pmc_numlevels - 1) {
6148 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6149 			    pmf, rp->ptr_physpath, i,
6150 			    cp->pmc_comp.pmc_numlevels - 1,
6151 			    (ep + i)->pte_numthresh))
6152 			return (0);
6153 		}
6154 	}
6155 	return (1);
6156 }
6157 
6158 /*
6159  * Remove any recorded threshold for device physpath
6160  * We know there will be at most one.
6161  */
6162 void
6163 pm_unrecord_threshold(char *physpath)
6164 {
6165 	pm_thresh_rec_t *pptr, *ptr;
6166 
6167 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6168 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6169 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6170 			if (pptr) {
6171 				pptr->ptr_next = ptr->ptr_next;
6172 			} else {
6173 				ASSERT(pm_thresh_head == ptr);
6174 				pm_thresh_head = ptr->ptr_next;
6175 			}
6176 			kmem_free(ptr, ptr->ptr_size);
6177 			break;
6178 		}
6179 		pptr = ptr;
6180 	}
6181 	rw_exit(&pm_thresh_rwlock);
6182 }
6183 
6184 /*
6185  * Discard all recorded thresholds.  We are returning to the default pm state.
6186  */
6187 void
6188 pm_discard_thresholds(void)
6189 {
6190 	pm_thresh_rec_t *rp;
6191 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6192 	while (pm_thresh_head) {
6193 		rp = pm_thresh_head;
6194 		pm_thresh_head = rp->ptr_next;
6195 		kmem_free(rp, rp->ptr_size);
6196 	}
6197 	rw_exit(&pm_thresh_rwlock);
6198 }
6199 
6200 /*
6201  * Discard all recorded dependencies.  We are returning to the default pm state.
6202  */
6203 void
6204 pm_discard_dependencies(void)
6205 {
6206 	pm_pdr_t *rp;
6207 	int i;
6208 	size_t length;
6209 
6210 #ifdef DEBUG
6211 	if (pm_debug & PMD_DEP)
6212 		prdeps("Before discard\n");
6213 #endif
6214 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6215 
6216 #ifdef DEBUG
6217 	if (pm_debug & PMD_DEP)
6218 		prdeps("After discard\n");
6219 #endif
6220 	while (pm_dep_head) {
6221 		rp = pm_dep_head;
6222 		if (!rp->pdr_isprop) {
6223 			ASSERT(rp->pdr_satisfied == 0);
6224 			ASSERT(pm_unresolved_deps);
6225 			pm_unresolved_deps--;
6226 		} else {
6227 			ASSERT(pm_prop_deps);
6228 			pm_prop_deps--;
6229 		}
6230 		pm_dep_head = rp->pdr_next;
6231 		if (rp->pdr_kept_count)  {
6232 			for (i = 0; i < rp->pdr_kept_count; i++) {
6233 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6234 				kmem_free(rp->pdr_kept_paths[i], length);
6235 			}
6236 			kmem_free(rp->pdr_kept_paths,
6237 			    rp->pdr_kept_count * sizeof (char **));
6238 		}
6239 		kmem_free(rp, rp->pdr_size);
6240 	}
6241 }
6242 
6243 
6244 static int
6245 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6246 {
6247 	_NOTE(ARGUNUSED(arg))
6248 	char *pathbuf;
6249 
6250 	if (PM_GET_PM_INFO(dip) == NULL)
6251 		return (DDI_WALK_CONTINUE);
6252 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6253 	(void) ddi_pathname(dip, pathbuf);
6254 	pm_free_keeper(pathbuf, 0);
6255 	kmem_free(pathbuf, MAXPATHLEN);
6256 	return (DDI_WALK_CONTINUE);
6257 }
6258 
6259 static int
6260 pm_kept_walk(dev_info_t *dip, void *arg)
6261 {
6262 	_NOTE(ARGUNUSED(arg))
6263 	char *pathbuf;
6264 
6265 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6266 	(void) ddi_pathname(dip, pathbuf);
6267 	(void) pm_kept(pathbuf);
6268 	kmem_free(pathbuf, MAXPATHLEN);
6269 
6270 	return (DDI_WALK_CONTINUE);
6271 }
6272 
6273 static int
6274 pm_keeper_walk(dev_info_t *dip, void *arg)
6275 {
6276 	_NOTE(ARGUNUSED(arg))
6277 	char *pathbuf;
6278 
6279 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6280 	(void) ddi_pathname(dip, pathbuf);
6281 	(void) pm_keeper(pathbuf);
6282 	kmem_free(pathbuf, MAXPATHLEN);
6283 
6284 	return (DDI_WALK_CONTINUE);
6285 }
6286 
6287 static char *
6288 pdw_type_decode(int type)
6289 {
6290 	switch (type) {
6291 	case PM_DEP_WK_POWER_ON:
6292 		return ("power on");
6293 	case PM_DEP_WK_POWER_OFF:
6294 		return ("power off");
6295 	case PM_DEP_WK_DETACH:
6296 		return ("detach");
6297 	case PM_DEP_WK_REMOVE_DEP:
6298 		return ("remove dep");
6299 	case PM_DEP_WK_BRINGUP_SELF:
6300 		return ("bringup self");
6301 	case PM_DEP_WK_RECORD_KEEPER:
6302 		return ("add dependent");
6303 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6304 		return ("add dependent property");
6305 	case PM_DEP_WK_KEPT:
6306 		return ("kept");
6307 	case PM_DEP_WK_KEEPER:
6308 		return ("keeper");
6309 	case PM_DEP_WK_ATTACH:
6310 		return ("attach");
6311 	case PM_DEP_WK_CHECK_KEPT:
6312 		return ("check kept");
6313 	case PM_DEP_WK_CPR_SUSPEND:
6314 		return ("suspend");
6315 	case PM_DEP_WK_CPR_RESUME:
6316 		return ("resume");
6317 	default:
6318 		return ("unknown");
6319 	}
6320 
6321 }
6322 
6323 static void
6324 pm_rele_dep(char *keeper)
6325 {
6326 	PMD_FUNC(pmf, "rele_dep")
6327 	pm_pdr_t *dp;
6328 	char *kept_path = NULL;
6329 	dev_info_t *kept = NULL;
6330 	int count = 0;
6331 
6332 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6333 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6334 			continue;
6335 		for (count = 0; count < dp->pdr_kept_count; count++) {
6336 			kept_path = dp->pdr_kept_paths[count];
6337 			if (kept_path == NULL)
6338 				continue;
6339 			kept = pm_name_to_dip(kept_path, 1);
6340 			if (kept) {
6341 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6342 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6343 				    keeper))
6344 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6345 				pm_rele_power(kept);
6346 				ddi_release_devi(kept);
6347 			}
6348 		}
6349 	}
6350 }
6351 
6352 /*
6353  * Called when we are just released from direct PM.  Bring ourself up
6354  * if our keeper is up since dependency is not honored while a kept
6355  * device is under direct PM.
6356  */
6357 static void
6358 pm_bring_self_up(char *keptpath)
6359 {
6360 	PMD_FUNC(pmf, "bring_self_up")
6361 	dev_info_t *kept;
6362 	dev_info_t *keeper;
6363 	pm_pdr_t *dp;
6364 	int i, j;
6365 	int up = 0, circ;
6366 
6367 	kept = pm_name_to_dip(keptpath, 1);
6368 	if (kept == NULL)
6369 		return;
6370 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6371 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6372 		if (dp->pdr_kept_count == 0)
6373 			continue;
6374 		for (i = 0; i < dp->pdr_kept_count; i++) {
6375 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6376 				continue;
6377 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6378 			if (keeper) {
6379 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6380 				    pmf, PM_DEVICE(keeper)))
6381 				PM_LOCK_POWER(keeper, &circ);
6382 				for (j = 0; j < PM_NUMCMPTS(keeper);
6383 				    j++) {
6384 					if (PM_CURPOWER(keeper, j)) {
6385 						PMD(PMD_KEEPS, ("%s: comp="
6386 						    "%d is up\n", pmf, j))
6387 						up++;
6388 					}
6389 				}
6390 				if (up) {
6391 					if (PM_SKBU(kept))
6392 						DEVI(kept)->devi_pm_flags &=
6393 						    ~PMC_SKIP_BRINGUP;
6394 					bring_pmdep_up(kept, 1);
6395 				}
6396 				PM_UNLOCK_POWER(keeper, circ);
6397 				ddi_release_devi(keeper);
6398 			}
6399 		}
6400 	}
6401 	ddi_release_devi(kept);
6402 }
6403 
6404 static void
6405 pm_process_dep_request(pm_dep_wk_t *work)
6406 {
6407 	PMD_FUNC(pmf, "dep_req")
6408 	int ret;
6409 
6410 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6411 	    pdw_type_decode(work->pdw_type)))
6412 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6413 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6414 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6415 
6416 	switch (work->pdw_type) {
6417 	case PM_DEP_WK_POWER_ON:
6418 		/* Bring up the kept devices and put a hold on them */
6419 		bring_wekeeps_up(work->pdw_keeper);
6420 		break;
6421 	case PM_DEP_WK_POWER_OFF:
6422 		/* Release the kept devices */
6423 		pm_rele_dep(work->pdw_keeper);
6424 		break;
6425 	case PM_DEP_WK_DETACH:
6426 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6427 		break;
6428 	case PM_DEP_WK_REMOVE_DEP:
6429 		pm_discard_dependencies();
6430 		break;
6431 	case PM_DEP_WK_BRINGUP_SELF:
6432 		/*
6433 		 * We deferred satisfying our dependency till now, so satisfy
6434 		 * it again and bring ourselves up.
6435 		 */
6436 		pm_bring_self_up(work->pdw_kept);
6437 		break;
6438 	case PM_DEP_WK_RECORD_KEEPER:
6439 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6440 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6441 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6442 		break;
6443 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6444 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6445 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6446 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6447 		break;
6448 	case PM_DEP_WK_KEPT:
6449 		ret = pm_kept(work->pdw_kept);
6450 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6451 		    ret))
6452 		break;
6453 	case PM_DEP_WK_KEEPER:
6454 		ret = pm_keeper(work->pdw_keeper);
6455 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6456 		    pmf, ret))
6457 		break;
6458 	case PM_DEP_WK_ATTACH:
6459 		ret = pm_keeper(work->pdw_keeper);
6460 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6461 		    pmf, ret))
6462 		ret = pm_kept(work->pdw_kept);
6463 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6464 		    pmf, ret))
6465 		break;
6466 	case PM_DEP_WK_CHECK_KEPT:
6467 		ret = pm_is_kept(work->pdw_kept);
6468 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6469 		    pmf, work->pdw_kept, ret))
6470 		break;
6471 	case PM_DEP_WK_CPR_SUSPEND:
6472 		pm_discard_dependencies();
6473 		break;
6474 	case PM_DEP_WK_CPR_RESUME:
6475 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6476 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6477 		break;
6478 	default:
6479 		ASSERT(0);
6480 		break;
6481 	}
6482 	/*
6483 	 * Free the work structure if the requester is not waiting
6484 	 * Otherwise it is the requester's responsiblity to free it.
6485 	 */
6486 	if (!work->pdw_wait) {
6487 		if (work->pdw_keeper)
6488 			kmem_free(work->pdw_keeper,
6489 			    strlen(work->pdw_keeper) + 1);
6490 		if (work->pdw_kept)
6491 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6492 		kmem_free(work, sizeof (pm_dep_wk_t));
6493 	} else {
6494 		/*
6495 		 * Notify requester if it is waiting for it.
6496 		 */
6497 		work->pdw_ret = ret;
6498 		work->pdw_done = 1;
6499 		cv_signal(&work->pdw_cv);
6500 	}
6501 }
6502 
6503 /*
6504  * Process PM dependency requests.
6505  */
6506 static void
6507 pm_dep_thread(void)
6508 {
6509 	pm_dep_wk_t *work;
6510 	callb_cpr_t cprinfo;
6511 
6512 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6513 	    "pm_dep_thread");
6514 	for (;;) {
6515 		mutex_enter(&pm_dep_thread_lock);
6516 		if (pm_dep_thread_workq == NULL) {
6517 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6518 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6519 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6520 		}
6521 		work = pm_dep_thread_workq;
6522 		pm_dep_thread_workq = work->pdw_next;
6523 		if (pm_dep_thread_tail == work)
6524 			pm_dep_thread_tail = work->pdw_next;
6525 		mutex_exit(&pm_dep_thread_lock);
6526 		pm_process_dep_request(work);
6527 
6528 	}
6529 	/*NOTREACHED*/
6530 }
6531 
6532 /*
6533  * Set the power level of the indicated device to unknown (if it is not a
6534  * backwards compatible device), as it has just been resumed, and it won't
6535  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6536  */
6537 void
6538 pm_forget_power_level(dev_info_t *dip)
6539 {
6540 	dev_info_t *pdip = ddi_get_parent(dip);
6541 	int i, count = 0;
6542 
6543 	if (!PM_ISBC(dip)) {
6544 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6545 			count += (PM_CURPOWER(dip, i) == 0);
6546 
6547 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6548 			e_pm_hold_rele_power(pdip, count);
6549 
6550 		/*
6551 		 * Count this as a power cycle if we care
6552 		 */
6553 		if (DEVI(dip)->devi_pm_volpmd &&
6554 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6555 			DEVI(dip)->devi_pm_volpmd = 0;
6556 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6557 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6558 	}
6559 }
6560 
6561 /*
6562  * This function advises the caller whether it should make a power-off
6563  * transition at this time or not.  If the transition is not advised
6564  * at this time, the time that the next power-off transition can
6565  * be made from now is returned through "intervalp" pointer.
6566  * This function returns:
6567  *
6568  *  1  power-off advised
6569  *  0  power-off not advised, intervalp will point to seconds from
6570  *	  now that a power-off is advised.  If it is passed the number
6571  *	  of years that policy specifies the device should last,
6572  *	  a large number is returned as the time interval.
6573  *  -1  error
6574  */
6575 int
6576 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6577 {
6578 	PMD_FUNC(pmf, "pm_trans_check")
6579 	char dbuf[DC_SCSI_MFR_LEN];
6580 	struct pm_scsi_cycles *scp;
6581 	int service_years, service_weeks, full_years;
6582 	time_t now, service_seconds, tdiff;
6583 	time_t within_year, when_allowed;
6584 	char *ptr;
6585 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6586 	int cycles_diff, cycles_over;
6587 
6588 	if (datap == NULL) {
6589 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6590 		return (-1);
6591 	}
6592 
6593 	if (datap->format == DC_SCSI_FORMAT) {
6594 		/*
6595 		 * Power cycles of the scsi drives are distributed
6596 		 * over 5 years with the following percentage ratio:
6597 		 *
6598 		 *	30%, 25%, 20%, 15%, and 10%
6599 		 *
6600 		 * The power cycle quota for each year is distributed
6601 		 * linearly through out the year.  The equation for
6602 		 * determining the expected cycles is:
6603 		 *
6604 		 *	e = a * (n / y)
6605 		 *
6606 		 * e = expected cycles
6607 		 * a = allocated cycles for this year
6608 		 * n = number of seconds since beginning of this year
6609 		 * y = number of seconds in a year
6610 		 *
6611 		 * Note that beginning of the year starts the day that
6612 		 * the drive has been put on service.
6613 		 *
6614 		 * If the drive has passed its expected cycles, we
6615 		 * can determine when it can start to power cycle
6616 		 * again to keep it on track to meet the 5-year
6617 		 * life expectancy.  The equation for determining
6618 		 * when to power cycle is:
6619 		 *
6620 		 *	w = y * (c / a)
6621 		 *
6622 		 * w = when it can power cycle again
6623 		 * y = number of seconds in a year
6624 		 * c = current number of cycles
6625 		 * a = allocated cycles for the year
6626 		 *
6627 		 */
6628 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6629 
6630 		scp = &datap->un.scsi_cycles;
6631 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6632 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6633 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6634 		if (scp->ncycles < 0 || scp->flag != 0) {
6635 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6636 			return (-1);
6637 		}
6638 
6639 		if (scp->ncycles > scp->lifemax) {
6640 			*intervalp = (LONG_MAX / hz);
6641 			return (0);
6642 		}
6643 
6644 		/*
6645 		 * convert service date to time_t
6646 		 */
6647 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6648 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6649 		ptr = dbuf;
6650 		service_years = stoi(&ptr) - EPOCH_YEAR;
6651 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6652 		    DC_SCSI_WEEK_LEN);
6653 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6654 
6655 		/*
6656 		 * scsi standard does not specify WW data,
6657 		 * could be (00-51) or (01-52)
6658 		 */
6659 		ptr = dbuf;
6660 		service_weeks = stoi(&ptr);
6661 		if (service_years < 0 ||
6662 		    service_weeks < 0 || service_weeks > 52) {
6663 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6664 			    pmf, service_years, service_weeks))
6665 			return (-1);
6666 		}
6667 
6668 		/*
6669 		 * calculate service date in seconds-since-epoch,
6670 		 * adding one day for each leap-year.
6671 		 *
6672 		 * (years-since-epoch + 2) fixes integer truncation,
6673 		 * example: (8) leap-years during [1972, 2000]
6674 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6675 		 */
6676 		service_seconds = (service_years * DC_SPY) +
6677 		    (service_weeks * DC_SPW) +
6678 		    (((service_years + 2) / 4) * DC_SPD);
6679 
6680 		now = gethrestime_sec();
6681 		/*
6682 		 * since the granularity of 'svc_date' is day not second,
6683 		 * 'now' should be rounded up to full day.
6684 		 */
6685 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6686 		if (service_seconds > now) {
6687 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6688 			    "than now (%ld)!\n", pmf, service_seconds, now))
6689 			return (-1);
6690 		}
6691 
6692 		tdiff = now - service_seconds;
6693 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6694 
6695 		/*
6696 		 * NOTE - Leap years are not considered in the calculations
6697 		 * below.
6698 		 */
6699 		full_years = (tdiff / DC_SPY);
6700 		if ((full_years >= DC_SCSI_NPY) &&
6701 		    (scp->ncycles <= scp->lifemax))
6702 			return (1);
6703 
6704 		/*
6705 		 * Determine what is the normal cycle usage for the
6706 		 * device at the beginning and the end of this year.
6707 		 */
6708 		lower_bound_cycles = (!full_years) ? 0 :
6709 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6710 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6711 
6712 		if (scp->ncycles <= lower_bound_cycles)
6713 			return (1);
6714 
6715 		/*
6716 		 * The linear slope that determines how many cycles
6717 		 * are allowed this year is number of seconds
6718 		 * passed this year over total number of seconds in a year.
6719 		 */
6720 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6721 		within_year = (tdiff % DC_SPY);
6722 		cycles_allowed = lower_bound_cycles +
6723 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6724 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6725 		    full_years, within_year))
6726 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6727 		    cycles_allowed))
6728 
6729 		if (scp->ncycles <= cycles_allowed)
6730 			return (1);
6731 
6732 		/*
6733 		 * The transition is not advised now but we can
6734 		 * determine when the next transition can be made.
6735 		 *
6736 		 * Depending on how many cycles the device has been
6737 		 * over-used, we may need to skip years with
6738 		 * different percentage quota in order to determine
6739 		 * when the next transition can be made.
6740 		 */
6741 		cycles_over = (scp->ncycles - lower_bound_cycles);
6742 		while (cycles_over > cycles_diff) {
6743 			full_years++;
6744 			if (full_years >= DC_SCSI_NPY) {
6745 				*intervalp = (LONG_MAX / hz);
6746 				return (0);
6747 			}
6748 			cycles_over -= cycles_diff;
6749 			lower_bound_cycles = upper_bound_cycles;
6750 			upper_bound_cycles =
6751 			    (scp->lifemax * pcnt[full_years]) / 100;
6752 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6753 		}
6754 
6755 		/*
6756 		 * The linear slope that determines when the next transition
6757 		 * can be made is the relative position of used cycles within a
6758 		 * year over total number of cycles within that year.
6759 		 */
6760 		when_allowed = service_seconds + (full_years * DC_SPY) +
6761 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6762 		*intervalp = (when_allowed - now);
6763 		if (*intervalp > (LONG_MAX / hz))
6764 			*intervalp = (LONG_MAX / hz);
6765 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6766 		    *intervalp))
6767 		return (0);
6768 	}
6769 
6770 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6771 	return (-1);
6772 }
6773 
6774 /*
6775  * Nexus drivers call into pm framework to indicate which child driver is about
6776  * to be installed.  In some platforms, ppm may need to configure the hardware
6777  * for successful installation of a driver.
6778  */
6779 int
6780 pm_init_child(dev_info_t *dip)
6781 {
6782 	power_req_t power_req;
6783 
6784 	ASSERT(ddi_binding_name(dip));
6785 	ASSERT(ddi_get_name_addr(dip));
6786 	pm_ppm_claim(dip);
6787 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6788 		power_req.request_type = PMR_PPM_INIT_CHILD;
6789 		power_req.req.ppm_config_req.who = dip;
6790 		ASSERT(PPM(dip) != NULL);
6791 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6792 		    NULL));
6793 	} else {
6794 #ifdef DEBUG
6795 		/* pass it to the default handler so we can debug things */
6796 		power_req.request_type = PMR_PPM_INIT_CHILD;
6797 		power_req.req.ppm_config_req.who = dip;
6798 		(void) pm_ctlops(NULL, dip,
6799 		    DDI_CTLOPS_POWER, &power_req, NULL);
6800 #endif
6801 	}
6802 	return (DDI_SUCCESS);
6803 }
6804 
6805 /*
6806  * Bring parent of a node that is about to be probed up to full power, and
6807  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6808  * it is time to let it go down again
6809  */
6810 void
6811 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6812 {
6813 	int result;
6814 	power_req_t power_req;
6815 
6816 	bzero(cp, sizeof (*cp));
6817 	cp->ppc_dip = dip;
6818 
6819 	pm_ppm_claim(dip);
6820 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6821 		power_req.request_type = PMR_PPM_PRE_PROBE;
6822 		power_req.req.ppm_config_req.who = dip;
6823 		ASSERT(PPM(dip) != NULL);
6824 		(void) pm_ctlops(PPM(dip), dip,
6825 		    DDI_CTLOPS_POWER, &power_req, &result);
6826 		cp->ppc_ppm = PPM(dip);
6827 	} else {
6828 #ifdef DEBUG
6829 		/* pass it to the default handler so we can debug things */
6830 		power_req.request_type = PMR_PPM_PRE_PROBE;
6831 		power_req.req.ppm_config_req.who = dip;
6832 		(void) pm_ctlops(NULL, dip,
6833 		    DDI_CTLOPS_POWER, &power_req, &result);
6834 #endif
6835 		cp->ppc_ppm = NULL;
6836 	}
6837 }
6838 
6839 int
6840 pm_pre_config(dev_info_t *dip, char *devnm)
6841 {
6842 	PMD_FUNC(pmf, "pre_config")
6843 	int ret;
6844 
6845 	if (MDI_VHCI(dip)) {
6846 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6847 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6848 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6849 	} else if (!PM_GET_PM_INFO(dip))
6850 		return (DDI_SUCCESS);
6851 
6852 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6853 	pm_hold_power(dip);
6854 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6855 	if (ret != DDI_SUCCESS)
6856 		pm_rele_power(dip);
6857 	return (ret);
6858 }
6859 
6860 /*
6861  * This routine is called by devfs during its walk to unconfigue a node.
6862  * If the call is due to auto mod_unloads and the dip is not at its
6863  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6864  * return DDI_SUCCESS.
6865  */
6866 int
6867 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6868 {
6869 	PMD_FUNC(pmf, "pre_unconfig")
6870 	int ret;
6871 
6872 	if (MDI_VHCI(dip)) {
6873 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6874 		    PM_DEVICE(dip), flags))
6875 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6876 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6877 	} else if (!PM_GET_PM_INFO(dip))
6878 		return (DDI_SUCCESS);
6879 
6880 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6881 	    flags))
6882 	*held = 0;
6883 
6884 	/*
6885 	 * If the dip is a leaf node, don't power it up.
6886 	 */
6887 	if (!ddi_get_child(dip))
6888 		return (DDI_SUCCESS);
6889 
6890 	/*
6891 	 * Do not power up the node if it is called due to auto-modunload.
6892 	 */
6893 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6894 		return (DDI_FAILURE);
6895 
6896 	pm_hold_power(dip);
6897 	*held = 1;
6898 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6899 	if (ret != DDI_SUCCESS) {
6900 		pm_rele_power(dip);
6901 		*held = 0;
6902 	}
6903 	return (ret);
6904 }
6905 
6906 /*
6907  * Notify ppm of attach action.  Parent is already held at full power by
6908  * probe action.
6909  */
6910 void
6911 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6912 {
6913 	static char *me = "pm_pre_attach";
6914 	power_req_t power_req;
6915 	int result;
6916 
6917 	/*
6918 	 * Initialize and fill in the PPM cookie
6919 	 */
6920 	bzero(cp, sizeof (*cp));
6921 	cp->ppc_cmd = (int)cmd;
6922 	cp->ppc_ppm = PPM(dip);
6923 	cp->ppc_dip = dip;
6924 
6925 	/*
6926 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6927 	 * Power Management stuff. DDI_RESUME also has to purge it's
6928 	 * powerlevel information.
6929 	 */
6930 	switch (cmd) {
6931 	case DDI_ATTACH:
6932 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6933 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6934 			power_req.req.ppm_config_req.who = dip;
6935 			ASSERT(PPM(dip));
6936 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6937 			    &power_req, &result);
6938 		}
6939 #ifdef DEBUG
6940 		else {
6941 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6942 			power_req.req.ppm_config_req.who = dip;
6943 			(void) pm_ctlops(NULL, dip,
6944 			    DDI_CTLOPS_POWER, &power_req, &result);
6945 		}
6946 #endif
6947 		break;
6948 	case DDI_RESUME:
6949 		pm_forget_power_level(dip);
6950 
6951 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6952 			power_req.request_type = PMR_PPM_PRE_RESUME;
6953 			power_req.req.resume_req.who = cp->ppc_dip;
6954 			power_req.req.resume_req.cmd =
6955 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6956 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6957 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6958 			    DDI_CTLOPS_POWER, &power_req, &result);
6959 		}
6960 #ifdef DEBUG
6961 		else {
6962 			power_req.request_type = PMR_PPM_PRE_RESUME;
6963 			power_req.req.resume_req.who = cp->ppc_dip;
6964 			power_req.req.resume_req.cmd =
6965 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6966 			(void) pm_ctlops(NULL, cp->ppc_dip,
6967 			    DDI_CTLOPS_POWER, &power_req, &result);
6968 		}
6969 #endif
6970 		break;
6971 
6972 	case DDI_PM_RESUME:
6973 		break;
6974 
6975 	default:
6976 		panic(me);
6977 	}
6978 }
6979 
6980 /*
6981  * Nexus drivers call into pm framework to indicate which child driver is
6982  * being uninstalled.  In some platforms, ppm may need to reconfigure the
6983  * hardware since the device driver is no longer installed.
6984  */
6985 int
6986 pm_uninit_child(dev_info_t *dip)
6987 {
6988 	power_req_t power_req;
6989 
6990 	ASSERT(ddi_binding_name(dip));
6991 	ASSERT(ddi_get_name_addr(dip));
6992 	pm_ppm_claim(dip);
6993 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6994 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
6995 		power_req.req.ppm_config_req.who = dip;
6996 		ASSERT(PPM(dip));
6997 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6998 		    NULL));
6999 	} else {
7000 #ifdef DEBUG
7001 		/* pass it to the default handler so we can debug things */
7002 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
7003 		power_req.req.ppm_config_req.who = dip;
7004 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
7005 #endif
7006 	}
7007 	return (DDI_SUCCESS);
7008 }
7009 /*
7010  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
7011  * Also notify ppm of result of probe if there is a ppm that cares
7012  */
7013 void
7014 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
7015 {
7016 	_NOTE(ARGUNUSED(probe_failed))
7017 	int result;
7018 	power_req_t power_req;
7019 
7020 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7021 		power_req.request_type = PMR_PPM_POST_PROBE;
7022 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7023 		power_req.req.ppm_config_req.result = ret;
7024 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7025 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
7026 		    &power_req, &result);
7027 	}
7028 #ifdef DEBUG
7029 	else {
7030 		power_req.request_type = PMR_PPM_POST_PROBE;
7031 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7032 		power_req.req.ppm_config_req.result = ret;
7033 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
7034 		    &power_req, &result);
7035 	}
7036 #endif
7037 }
7038 
7039 void
7040 pm_post_config(dev_info_t *dip, char *devnm)
7041 {
7042 	PMD_FUNC(pmf, "post_config")
7043 
7044 	if (MDI_VHCI(dip)) {
7045 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7046 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
7047 		return;
7048 	} else if (!PM_GET_PM_INFO(dip))
7049 		return;
7050 
7051 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7052 	pm_rele_power(dip);
7053 }
7054 
7055 void
7056 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
7057 {
7058 	PMD_FUNC(pmf, "post_unconfig")
7059 
7060 	if (MDI_VHCI(dip)) {
7061 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
7062 		    PM_DEVICE(dip), held))
7063 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
7064 		return;
7065 	} else if (!PM_GET_PM_INFO(dip))
7066 		return;
7067 
7068 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
7069 	    held))
7070 	if (!held)
7071 		return;
7072 	/*
7073 	 * We have held power in pre_unconfig, release it here.
7074 	 */
7075 	pm_rele_power(dip);
7076 }
7077 
7078 /*
7079  * Notify ppm of result of attach if there is a ppm that cares
7080  */
7081 void
7082 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
7083 {
7084 	int result;
7085 	power_req_t power_req;
7086 	dev_info_t	*dip;
7087 
7088 	if (cp->ppc_cmd != DDI_ATTACH)
7089 		return;
7090 
7091 	dip = cp->ppc_dip;
7092 
7093 	if (ret == DDI_SUCCESS) {
7094 		/*
7095 		 * Attach succeeded, so proceed to doing post-attach pm tasks
7096 		 */
7097 		if (PM_GET_PM_INFO(dip) == NULL)
7098 			(void) pm_start(dip);
7099 	} else {
7100 		/*
7101 		 * Attach may have got pm started before failing
7102 		 */
7103 		pm_stop(dip);
7104 	}
7105 
7106 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7107 		power_req.request_type = PMR_PPM_POST_ATTACH;
7108 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7109 		power_req.req.ppm_config_req.result = ret;
7110 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7111 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7112 		    DDI_CTLOPS_POWER, &power_req, &result);
7113 	}
7114 #ifdef DEBUG
7115 	else {
7116 		power_req.request_type = PMR_PPM_POST_ATTACH;
7117 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7118 		power_req.req.ppm_config_req.result = ret;
7119 		(void) pm_ctlops(NULL, cp->ppc_dip,
7120 		    DDI_CTLOPS_POWER, &power_req, &result);
7121 	}
7122 #endif
7123 }
7124 
7125 /*
7126  * Notify ppm of attach action.  Parent is already held at full power by
7127  * probe action.
7128  */
7129 void
7130 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7131 {
7132 	int result;
7133 	power_req_t power_req;
7134 
7135 	bzero(cp, sizeof (*cp));
7136 	cp->ppc_dip = dip;
7137 	cp->ppc_cmd = (int)cmd;
7138 
7139 	switch (cmd) {
7140 	case DDI_DETACH:
7141 		pm_detaching(dip);		/* suspend pm while detaching */
7142 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7143 			power_req.request_type = PMR_PPM_PRE_DETACH;
7144 			power_req.req.ppm_config_req.who = dip;
7145 			ASSERT(PPM(dip));
7146 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7147 			    &power_req, &result);
7148 			cp->ppc_ppm = PPM(dip);
7149 		} else {
7150 #ifdef DEBUG
7151 			/* pass to the default handler so we can debug things */
7152 			power_req.request_type = PMR_PPM_PRE_DETACH;
7153 			power_req.req.ppm_config_req.who = dip;
7154 			(void) pm_ctlops(NULL, dip,
7155 			    DDI_CTLOPS_POWER, &power_req, &result);
7156 #endif
7157 			cp->ppc_ppm = NULL;
7158 		}
7159 		break;
7160 
7161 	default:
7162 		break;
7163 	}
7164 }
7165 
7166 /*
7167  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7168  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7169  * make an entry to record the details, which includes certain flag settings.
7170  */
7171 static void
7172 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7173     int wasvolpmd, major_t major)
7174 {
7175 	PMD_FUNC(pmf, "record_invol_path")
7176 	major_t pm_path_to_major(char *);
7177 	size_t plen;
7178 	pm_noinvol_t *ip, *np, *pp;
7179 	pp = NULL;
7180 
7181 	plen = strlen(path) + 1;
7182 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7183 	np->ni_size = plen;
7184 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7185 	np->ni_noinvolpm = noinvolpm;
7186 	np->ni_volpmd = volpmd;
7187 	np->ni_wasvolpmd = wasvolpmd;
7188 	np->ni_flags = flags;
7189 	(void) strcpy(np->ni_path, path);
7190 	/*
7191 	 * If we haven't actually seen the node attached, it is hard to figure
7192 	 * out its major.  If we could hold the node by path, we would be much
7193 	 * happier here.
7194 	 */
7195 	if (major == DDI_MAJOR_T_NONE) {
7196 		np->ni_major = pm_path_to_major(path);
7197 	} else {
7198 		np->ni_major = major;
7199 	}
7200 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7201 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7202 		int comp = strcmp(path, ip->ni_path);
7203 		if (comp < 0) {
7204 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7205 			    pmf, path, ip->ni_path))
7206 			/* insert before current entry */
7207 			np->ni_next = ip;
7208 			if (pp) {
7209 				pp->ni_next = np;
7210 			} else {
7211 				pm_noinvol_head = np;
7212 			}
7213 			rw_exit(&pm_noinvol_rwlock);
7214 #ifdef DEBUG
7215 			if (pm_debug & PMD_NOINVOL)
7216 				pr_noinvol("record_invol_path exit0");
7217 #endif
7218 			return;
7219 		} else if (comp == 0) {
7220 			panic("%s already in pm_noinvol list", path);
7221 		}
7222 	}
7223 	/*
7224 	 * If we did not find an entry in the list that this should go before,
7225 	 * then it must go at the end
7226 	 */
7227 	if (pp) {
7228 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7229 		    pp->ni_path))
7230 		ASSERT(pp->ni_next == 0);
7231 		pp->ni_next = np;
7232 	} else {
7233 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7234 		ASSERT(!pm_noinvol_head);
7235 		pm_noinvol_head = np;
7236 	}
7237 	rw_exit(&pm_noinvol_rwlock);
7238 #ifdef DEBUG
7239 	if (pm_debug & PMD_NOINVOL)
7240 		pr_noinvol("record_invol_path exit");
7241 #endif
7242 }
7243 
7244 void
7245 pm_record_invol(dev_info_t *dip)
7246 {
7247 	char *pathbuf;
7248 	int pm_all_components_off(dev_info_t *);
7249 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7250 
7251 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7252 	(void) ddi_pathname(dip, pathbuf);
7253 
7254 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7255 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7256 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7257 
7258 	/*
7259 	 * If this child's detach will be holding up its ancestors, then we
7260 	 * allow for an exception to that if all children of this type have
7261 	 * gone down voluntarily.
7262 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7263 	 */
7264 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7265 	    dip);
7266 	kmem_free(pathbuf, MAXPATHLEN);
7267 }
7268 
7269 void
7270 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7271 {
7272 	dev_info_t *dip = cp->ppc_dip;
7273 	int result;
7274 	power_req_t power_req;
7275 
7276 	switch (cp->ppc_cmd) {
7277 	case DDI_DETACH:
7278 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7279 			power_req.request_type = PMR_PPM_POST_DETACH;
7280 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7281 			power_req.req.ppm_config_req.result = ret;
7282 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7283 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7284 			    DDI_CTLOPS_POWER, &power_req, &result);
7285 		}
7286 #ifdef DEBUG
7287 		else {
7288 			power_req.request_type = PMR_PPM_POST_DETACH;
7289 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7290 			power_req.req.ppm_config_req.result = ret;
7291 			(void) pm_ctlops(NULL, cp->ppc_dip,
7292 			    DDI_CTLOPS_POWER, &power_req, &result);
7293 		}
7294 #endif
7295 		if (ret == DDI_SUCCESS) {
7296 			/*
7297 			 * For hotplug detach we assume it is *really* gone
7298 			 */
7299 			if (cp->ppc_cmd == DDI_DETACH &&
7300 			    ((DEVI(dip)->devi_pm_flags &
7301 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7302 			    DEVI(dip)->devi_pm_noinvolpm))
7303 				pm_record_invol(dip);
7304 			DEVI(dip)->devi_pm_flags &=
7305 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7306 
7307 			/*
7308 			 * If console fb is detaching, then we don't need to
7309 			 * worry any more about it going off (pm_detaching has
7310 			 * brought up all components)
7311 			 */
7312 			if (PM_IS_CFB(dip)) {
7313 				mutex_enter(&pm_cfb_lock);
7314 				ASSERT(cfb_dip_detaching);
7315 				ASSERT(cfb_dip == NULL);
7316 				ASSERT(pm_cfb_comps_off == 0);
7317 				cfb_dip_detaching = NULL;
7318 				mutex_exit(&pm_cfb_lock);
7319 			}
7320 			pm_stop(dip);	/* make it permanent */
7321 		} else {
7322 			if (PM_IS_CFB(dip)) {
7323 				mutex_enter(&pm_cfb_lock);
7324 				ASSERT(cfb_dip_detaching);
7325 				ASSERT(cfb_dip == NULL);
7326 				ASSERT(pm_cfb_comps_off == 0);
7327 				cfb_dip = cfb_dip_detaching;
7328 				cfb_dip_detaching = NULL;
7329 				mutex_exit(&pm_cfb_lock);
7330 			}
7331 			pm_detach_failed(dip);	/* resume power management */
7332 		}
7333 		break;
7334 	case DDI_PM_SUSPEND:
7335 		break;
7336 	case DDI_SUSPEND:
7337 		break;				/* legal, but nothing to do */
7338 	default:
7339 #ifdef DEBUG
7340 		panic("pm_post_detach: unrecognized cmd %d for detach",
7341 		    cp->ppc_cmd);
7342 		/*NOTREACHED*/
7343 #else
7344 		break;
7345 #endif
7346 	}
7347 }
7348 
7349 /*
7350  * Called after vfs_mountroot has got the clock started to fix up timestamps
7351  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7352  * devices look busy but have a 0 busycnt
7353  */
7354 int
7355 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7356 {
7357 	_NOTE(ARGUNUSED(arg))
7358 
7359 	pm_info_t *info = PM_GET_PM_INFO(dip);
7360 	struct pm_component *cp;
7361 	int i;
7362 
7363 	if (!info)
7364 		return (DDI_WALK_CONTINUE);
7365 	PM_LOCK_BUSY(dip);
7366 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7367 		cp = PM_CP(dip, i);
7368 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7369 			cp->pmc_timestamp = gethrestime_sec();
7370 	}
7371 	PM_UNLOCK_BUSY(dip);
7372 	return (DDI_WALK_CONTINUE);
7373 }
7374 
7375 /*
7376  * Called at attach time to see if the device being attached has a record in
7377  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7378  * parents and set a flag in the dip
7379  */
7380 void
7381 pm_noinvol_specd(dev_info_t *dip)
7382 {
7383 	PMD_FUNC(pmf, "noinvol_specd")
7384 	char *pathbuf;
7385 	pm_noinvol_t *ip, *pp = NULL;
7386 	int wasvolpmd;
7387 	int found = 0;
7388 
7389 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7390 		return;
7391 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7392 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7393 	(void) ddi_pathname(dip, pathbuf);
7394 
7395 	PM_LOCK_DIP(dip);
7396 	DEVI(dip)->devi_pm_volpmd = 0;
7397 	DEVI(dip)->devi_pm_noinvolpm = 0;
7398 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7399 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7400 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7401 		    pmf, pathbuf, ip->ni_path))
7402 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7403 			found++;
7404 			break;
7405 		}
7406 	}
7407 	rw_exit(&pm_noinvol_rwlock);
7408 	if (!found) {
7409 		PM_UNLOCK_DIP(dip);
7410 		kmem_free(pathbuf, MAXPATHLEN);
7411 		return;
7412 	}
7413 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7414 	pp = NULL;
7415 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7416 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7417 		    pmf, pathbuf, ip->ni_path))
7418 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7419 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7420 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7421 			/*
7422 			 * Handle special case of console fb
7423 			 */
7424 			if (PM_IS_CFB(dip)) {
7425 				mutex_enter(&pm_cfb_lock);
7426 				cfb_dip = dip;
7427 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7428 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7429 				mutex_exit(&pm_cfb_lock);
7430 			}
7431 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7432 			ASSERT((DEVI(dip)->devi_pm_flags &
7433 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7434 			    DEVI(dip)->devi_pm_noinvolpm);
7435 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7436 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7437 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7438 			    ip->ni_noinvolpm, ip->ni_volpmd,
7439 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7440 			/*
7441 			 * free the entry in hopes the list will now be empty
7442 			 * and we won't have to search it any more until the
7443 			 * device detaches
7444 			 */
7445 			if (pp) {
7446 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7447 				    pmf, ip->ni_path, pp->ni_path))
7448 				pp->ni_next = ip->ni_next;
7449 			} else {
7450 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7451 				    pmf, ip->ni_path))
7452 				ASSERT(pm_noinvol_head == ip);
7453 				pm_noinvol_head = ip->ni_next;
7454 			}
7455 			PM_UNLOCK_DIP(dip);
7456 			wasvolpmd = ip->ni_wasvolpmd;
7457 			rw_exit(&pm_noinvol_rwlock);
7458 			kmem_free(ip->ni_path, ip->ni_size);
7459 			kmem_free(ip, sizeof (*ip));
7460 			/*
7461 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7462 			 * (and volpmd if appropriate)
7463 			 */
7464 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7465 			    wasvolpmd, pathbuf, dip);
7466 #ifdef DEBUG
7467 			if (pm_debug & PMD_NOINVOL)
7468 				pr_noinvol("noinvol_specd exit");
7469 #endif
7470 			kmem_free(pathbuf, MAXPATHLEN);
7471 			return;
7472 		}
7473 	}
7474 	kmem_free(pathbuf, MAXPATHLEN);
7475 	rw_exit(&pm_noinvol_rwlock);
7476 	PM_UNLOCK_DIP(dip);
7477 }
7478 
7479 int
7480 pm_all_components_off(dev_info_t *dip)
7481 {
7482 	int i;
7483 	pm_component_t *cp;
7484 
7485 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7486 		cp = PM_CP(dip, i);
7487 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7488 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7489 			return (0);
7490 	}
7491 	return (1);	/* all off */
7492 }
7493 
7494 /*
7495  * Make sure that all "no involuntary power cycles" devices are attached.
7496  * Called before doing a cpr suspend to make sure the driver has a say about
7497  * the power cycle
7498  */
7499 int
7500 pm_reattach_noinvol(void)
7501 {
7502 	PMD_FUNC(pmf, "reattach_noinvol")
7503 	pm_noinvol_t *ip;
7504 	char *path;
7505 	dev_info_t *dip;
7506 
7507 	/*
7508 	 * Prevent the modunload thread from unloading any modules until we
7509 	 * have completely stopped all kernel threads.
7510 	 */
7511 	modunload_disable();
7512 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7513 		/*
7514 		 * Forget we'v ever seen any entry
7515 		 */
7516 		ip->ni_persistent = 0;
7517 	}
7518 restart:
7519 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7520 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7521 #ifdef PMDDEBUG
7522 		major_t maj;
7523 		maj = ip->ni_major;
7524 #endif
7525 		path = ip->ni_path;
7526 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7527 			if (ip->ni_persistent) {
7528 				/*
7529 				 * If we weren't able to make this entry
7530 				 * go away, then we give up, as
7531 				 * holding/attaching the driver ought to have
7532 				 * resulted in this entry being deleted
7533 				 */
7534 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7535 				    "(%s|%d)\n", pmf, ip->ni_path,
7536 				    ddi_major_to_name(maj), (int)maj))
7537 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7538 				    ip->ni_path);
7539 				modunload_enable();
7540 				rw_exit(&pm_noinvol_rwlock);
7541 				return (0);
7542 			}
7543 			ip->ni_persistent++;
7544 			rw_exit(&pm_noinvol_rwlock);
7545 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7546 			dip = e_ddi_hold_devi_by_path(path, 0);
7547 			if (dip == NULL) {
7548 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7549 				    pmf, path, (int)maj))
7550 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7551 				    "driver", path);
7552 				modunload_enable();
7553 				return (0);
7554 			} else {
7555 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7556 				/*
7557 				 * Since the modunload thread is stopped, we
7558 				 * don't have to keep the driver held, which
7559 				 * saves a ton of bookkeeping
7560 				 */
7561 				ddi_release_devi(dip);
7562 				goto restart;
7563 			}
7564 		} else {
7565 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7566 			    pmf, ip->ni_path))
7567 			continue;
7568 		}
7569 	}
7570 	rw_exit(&pm_noinvol_rwlock);
7571 	return (1);
7572 }
7573 
7574 void
7575 pm_reattach_noinvol_fini(void)
7576 {
7577 	modunload_enable();
7578 }
7579 
7580 /*
7581  * Display pm support code
7582  */
7583 
7584 
7585 /*
7586  * console frame-buffer power-mgmt gets enabled when debugging
7587  * services are not present or console fbpm override is set
7588  */
7589 void
7590 pm_cfb_setup(const char *stdout_path)
7591 {
7592 	PMD_FUNC(pmf, "cfb_setup")
7593 	extern int obpdebug;
7594 	char *devname;
7595 	dev_info_t *dip;
7596 	int devname_len;
7597 	extern dev_info_t *fbdip;
7598 
7599 	/*
7600 	 * By virtue of this function being called (from consconfig),
7601 	 * we know stdout is a framebuffer.
7602 	 */
7603 	stdout_is_framebuffer = 1;
7604 
7605 	if (obpdebug || (boothowto & RB_DEBUG)) {
7606 		if (pm_cfb_override == 0) {
7607 			/*
7608 			 * Console is frame buffer, but we want to suppress
7609 			 * pm on it because of debugging setup
7610 			 */
7611 			pm_cfb_enabled = 0;
7612 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7613 			    "console power management.");
7614 			/*
7615 			 * however, we still need to know which is the console
7616 			 * fb in order to suppress pm on it
7617 			 */
7618 		} else {
7619 			cmn_err(CE_WARN, "Kernel debugger present: see "
7620 			    "kmdb(1M) for interaction with power management.");
7621 		}
7622 	}
7623 #ifdef DEBUG
7624 	/*
7625 	 * IF console is fb and is power managed, don't do prom_printfs from
7626 	 * pm debug macro
7627 	 */
7628 	if (pm_cfb_enabled && !pm_debug_to_console) {
7629 		if (pm_debug)
7630 			prom_printf("pm debug output will be to log only\n");
7631 		pm_divertdebug++;
7632 	}
7633 #endif
7634 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7635 	devname_len = strlen(devname) + 1;
7636 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7637 	/* if the driver is attached */
7638 	if ((dip = fbdip) != NULL) {
7639 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7640 		    PM_DEVICE(dip)))
7641 		/*
7642 		 * We set up here as if the driver were power manageable in case
7643 		 * we get a later attach of a pm'able driver (which would result
7644 		 * in a panic later)
7645 		 */
7646 		cfb_dip = dip;
7647 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7648 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7649 		    PM_DEVICE(dip)))
7650 #ifdef DEBUG
7651 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7652 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7653 			    pmf, PM_DEVICE(dip)))
7654 		}
7655 #endif
7656 	} else {
7657 		char *ep;
7658 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7659 		pm_record_invol_path(devname,
7660 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7661 		    DDI_MAJOR_T_NONE);
7662 		for (ep = strrchr(devname, '/'); ep != devname;
7663 		    ep = strrchr(devname, '/')) {
7664 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7665 			*ep = '\0';
7666 			dip = pm_name_to_dip(devname, 0);
7667 			if (dip != NULL) {
7668 				/*
7669 				 * Walk up the tree incrementing
7670 				 * devi_pm_noinvolpm
7671 				 */
7672 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7673 				    0, 0, devname, dip);
7674 				break;
7675 			} else {
7676 				pm_record_invol_path(devname,
7677 				    PMC_NO_INVOL, 1, 0, 0, DDI_MAJOR_T_NONE);
7678 			}
7679 		}
7680 	}
7681 	kmem_free(devname, devname_len);
7682 }
7683 
7684 void
7685 pm_cfb_rele(void)
7686 {
7687 	mutex_enter(&pm_cfb_lock);
7688 	/*
7689 	 * this call isn't using the console any  more, it is ok to take it
7690 	 * down if the count goes to 0
7691 	 */
7692 	cfb_inuse--;
7693 	mutex_exit(&pm_cfb_lock);
7694 }
7695 
7696 /*
7697  * software interrupt handler for fbpm; this function exists because we can't
7698  * bring up the frame buffer power from above lock level.  So if we need to,
7699  * we instead schedule a softint that runs this routine and takes us into
7700  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7701  */
7702 static uint_t
7703 pm_cfb_softint(caddr_t int_handler_arg)
7704 {
7705 	_NOTE(ARGUNUSED(int_handler_arg))
7706 	int rval = DDI_INTR_UNCLAIMED;
7707 
7708 	mutex_enter(&pm_cfb_lock);
7709 	if (pm_soft_pending) {
7710 		mutex_exit(&pm_cfb_lock);
7711 		debug_enter((char *)NULL);
7712 		/* acquired in debug_enter before calling pm_cfb_trigger */
7713 		pm_cfb_rele();
7714 		mutex_enter(&pm_cfb_lock);
7715 		pm_soft_pending = 0;
7716 		mutex_exit(&pm_cfb_lock);
7717 		rval = DDI_INTR_CLAIMED;
7718 	} else
7719 		mutex_exit(&pm_cfb_lock);
7720 
7721 	return (rval);
7722 }
7723 
7724 void
7725 pm_cfb_setup_intr(void)
7726 {
7727 	PMD_FUNC(pmf, "cfb_setup_intr")
7728 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7729 	void pm_cfb_check_and_powerup(void);
7730 
7731 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7732 #ifdef PMDDEBUG
7733 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7734 #endif
7735 
7736 	if (!stdout_is_framebuffer) {
7737 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7738 		return;
7739 	}
7740 
7741 	/*
7742 	 * setup software interrupt handler
7743 	 */
7744 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7745 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7746 		panic("pm: unable to register soft intr.");
7747 
7748 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7749 }
7750 
7751 /*
7752  * Checks to see if it is safe to write to the console wrt power management
7753  * (i.e. if the console is a framebuffer, then it must be at full power)
7754  * returns 1 when power is off (power-up is needed)
7755  * returns 0 when power is on (power-up not needed)
7756  */
7757 int
7758 pm_cfb_check_and_hold(void)
7759 {
7760 	/*
7761 	 * cfb_dip is set iff console is a power manageable frame buffer
7762 	 * device
7763 	 */
7764 	extern int modrootloaded;
7765 
7766 	mutex_enter(&pm_cfb_lock);
7767 	cfb_inuse++;
7768 	ASSERT(cfb_inuse);	/* wrap? */
7769 	if (modrootloaded && cfb_dip) {
7770 		/*
7771 		 * don't power down the frame buffer, the prom is using it
7772 		 */
7773 		if (pm_cfb_comps_off) {
7774 			mutex_exit(&pm_cfb_lock);
7775 			return (1);
7776 		}
7777 	}
7778 	mutex_exit(&pm_cfb_lock);
7779 	return (0);
7780 }
7781 
7782 /*
7783  * turn on cfb power (which is known to be off).
7784  * Must be called below lock level!
7785  */
7786 void
7787 pm_cfb_powerup(void)
7788 {
7789 	pm_info_t *info;
7790 	int norm;
7791 	int ccount, ci;
7792 	int unused;
7793 #ifdef DEBUG
7794 	/*
7795 	 * Can't reenter prom_prekern, so suppress pm debug messages
7796 	 * (still go to circular buffer).
7797 	 */
7798 	mutex_enter(&pm_debug_lock);
7799 	pm_divertdebug++;
7800 	mutex_exit(&pm_debug_lock);
7801 #endif
7802 	info = PM_GET_PM_INFO(cfb_dip);
7803 	ASSERT(info);
7804 
7805 	ccount = PM_NUMCMPTS(cfb_dip);
7806 	for (ci = 0; ci < ccount; ci++) {
7807 		norm = pm_get_normal_power(cfb_dip, ci);
7808 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7809 		    PM_CANBLOCK_BYPASS, 0, &unused);
7810 	}
7811 #ifdef DEBUG
7812 	mutex_enter(&pm_debug_lock);
7813 	pm_divertdebug--;
7814 	mutex_exit(&pm_debug_lock);
7815 #endif
7816 }
7817 
7818 /*
7819  * Check if the console framebuffer is powered up.  If not power it up.
7820  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7821  * must be released by calling pm_cfb_rele when the console fb operation
7822  * is completed.
7823  */
7824 void
7825 pm_cfb_check_and_powerup(void)
7826 {
7827 	if (pm_cfb_check_and_hold())
7828 		pm_cfb_powerup();
7829 }
7830 
7831 /*
7832  * Trigger a low level interrupt to power up console frame buffer.
7833  */
7834 void
7835 pm_cfb_trigger(void)
7836 {
7837 	if (cfb_dip == NULL)
7838 		return;
7839 
7840 	mutex_enter(&pm_cfb_lock);
7841 	/*
7842 	 * If machine appears to be hung, pulling the keyboard connector of
7843 	 * the console will cause a high level interrupt and go to debug_enter.
7844 	 * But, if the fb is powered down, this routine will be called to bring
7845 	 * it up (by generating a softint to do the work).  If soft interrupts
7846 	 * are not running, and the keyboard connector is pulled again, the
7847 	 * following code detects this condition and calls panic which allows
7848 	 * the fb to be brought up from high level.
7849 	 *
7850 	 * If two nearly simultaneous calls to debug_enter occur (both from
7851 	 * high level) the code described above will cause a panic.
7852 	 */
7853 	if (lbolt <= pm_soft_pending) {
7854 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7855 		panic(panicstr);	/* does a power up at any intr level */
7856 		/* NOTREACHED */
7857 	}
7858 	pm_soft_pending = lbolt;
7859 	mutex_exit(&pm_cfb_lock);
7860 	ddi_trigger_softintr(pm_soft_id);
7861 }
7862 
7863 major_t
7864 pm_path_to_major(char *path)
7865 {
7866 	PMD_FUNC(pmf, "path_to_major")
7867 	char *np, *ap, *bp;
7868 	major_t ret;
7869 	size_t len;
7870 	static major_t i_path_to_major(char *, char *);
7871 
7872 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7873 
7874 	np = strrchr(path, '/');
7875 	if (np != NULL)
7876 		np++;
7877 	else
7878 		np = path;
7879 	len = strlen(np) + 1;
7880 	bp = kmem_alloc(len, KM_SLEEP);
7881 	(void) strcpy(bp, np);
7882 	if ((ap = strchr(bp, '@')) != NULL) {
7883 		*ap = '\0';
7884 	}
7885 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7886 	ret = i_path_to_major(path, np);
7887 	kmem_free(bp, len);
7888 	return (ret);
7889 }
7890 
7891 #ifdef DEBUG
7892 #ifndef sparc
7893 clock_t pt_sleep = 1;
7894 #endif
7895 
7896 char	*pm_msgp;
7897 char	*pm_bufend;
7898 char	*pm_msgbuf = NULL;
7899 int	pm_logpages = 0x100;
7900 #include <sys/sunldi.h>
7901 #include <sys/uio.h>
7902 clock_t	pm_log_sleep = 1000;
7903 int	pm_extra_cr = 1;
7904 volatile int pm_tty = 1;
7905 
7906 #define	PMLOGPGS	pm_logpages
7907 
7908 #if defined(__x86)
7909 void pm_printf(char *s);
7910 #endif
7911 
7912 /*PRINTFLIKE1*/
7913 void
7914 pm_log(const char *fmt, ...)
7915 {
7916 	va_list adx;
7917 	size_t size;
7918 
7919 	mutex_enter(&pm_debug_lock);
7920 	if (pm_msgbuf == NULL) {
7921 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7922 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7923 		pm_msgp = pm_msgbuf;
7924 	}
7925 	va_start(adx, fmt);
7926 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7927 	va_end(adx);
7928 	va_start(adx, fmt);
7929 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7930 		bzero(pm_msgp, pm_bufend - pm_msgp);
7931 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7932 		if (!pm_divertdebug)
7933 			prom_printf("%s", pm_msgp);
7934 #if defined(__x86)
7935 		if (pm_tty) {
7936 			pm_printf(pm_msgp);
7937 			if (pm_extra_cr)
7938 				pm_printf("\r");
7939 		}
7940 #endif
7941 		pm_msgp = pm_msgbuf + size;
7942 	} else {
7943 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7944 #if defined(__x86)
7945 		if (pm_tty) {
7946 			pm_printf(pm_msgp);
7947 			if (pm_extra_cr)
7948 				pm_printf("\r");
7949 		}
7950 #endif
7951 		if (!pm_divertdebug)
7952 			prom_printf("%s", pm_msgp);
7953 		pm_msgp += size;
7954 	}
7955 	va_end(adx);
7956 	mutex_exit(&pm_debug_lock);
7957 	drv_usecwait((clock_t)pm_log_sleep);
7958 }
7959 #endif	/* DEBUG */
7960 
7961 /*
7962  * We want to save the state of any directly pm'd devices over the suspend/
7963  * resume process so that we can put them back the way the controlling
7964  * process left them.
7965  */
7966 void
7967 pm_save_direct_levels(void)
7968 {
7969 	pm_processes_stopped = 1;
7970 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7971 }
7972 
7973 static int
7974 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7975 {
7976 	_NOTE(ARGUNUSED(arg))
7977 	int i;
7978 	int *ip;
7979 	pm_info_t *info = PM_GET_PM_INFO(dip);
7980 
7981 	if (!info)
7982 		return (DDI_WALK_CONTINUE);
7983 
7984 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
7985 		if (PM_NUMCMPTS(dip) > 2) {
7986 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
7987 			    sizeof (int), KM_SLEEP);
7988 			ip = info->pmi_lp;
7989 		} else {
7990 			ip = info->pmi_levels;
7991 		}
7992 		/* autopm and processes are stopped, ok not to lock power */
7993 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
7994 			*ip++ = PM_CURPOWER(dip, i);
7995 		/*
7996 		 * There is a small window between stopping the
7997 		 * processes and setting pm_processes_stopped where
7998 		 * a driver could get hung up in a pm_raise_power()
7999 		 * call.  Free any such driver now.
8000 		 */
8001 		pm_proceed(dip, PMP_RELEASE, -1, -1);
8002 	}
8003 
8004 	return (DDI_WALK_CONTINUE);
8005 }
8006 
8007 void
8008 pm_restore_direct_levels(void)
8009 {
8010 	/*
8011 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
8012 	 * threads failed) then we don't want to try to restore them
8013 	 */
8014 	if (!pm_processes_stopped)
8015 		return;
8016 
8017 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
8018 	pm_processes_stopped = 0;
8019 }
8020 
8021 static int
8022 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
8023 {
8024 	_NOTE(ARGUNUSED(arg))
8025 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
8026 	int i, nc, result;
8027 	int *ip;
8028 
8029 	pm_info_t *info = PM_GET_PM_INFO(dip);
8030 	if (!info)
8031 		return (DDI_WALK_CONTINUE);
8032 
8033 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8034 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
8035 			ip = &info->pmi_lp[nc - 1];
8036 		} else {
8037 			ip = &info->pmi_levels[nc - 1];
8038 		}
8039 		/*
8040 		 * Because fb drivers fail attempts to turn off the
8041 		 * fb when the monitor is on, but treat a request to
8042 		 * turn on the monitor as a request to turn on the
8043 		 * fb too, we process components in descending order
8044 		 * Because autopm is disabled and processes aren't
8045 		 * running, it is ok to examine current power outside
8046 		 * of the power lock
8047 		 */
8048 		for (i = nc - 1; i >= 0; i--, ip--) {
8049 			if (PM_CURPOWER(dip, i) == *ip)
8050 				continue;
8051 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
8052 			    PM_CANBLOCK_BYPASS, 0, &result) != DDI_SUCCESS) {
8053 				cmn_err(CE_WARN, "cpr: unable "
8054 				    "to restore power level of "
8055 				    "component %d of directly "
8056 				    "power manged device %s@%s"
8057 				    " to %d",
8058 				    i, PM_NAME(dip),
8059 				    PM_ADDR(dip), *ip);
8060 				PMD(PMD_FAIL, ("%s: failed to restore "
8061 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
8062 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
8063 				    PM_CURPOWER(dip, i), *ip, result))
8064 			}
8065 		}
8066 		if (nc > 2) {
8067 			kmem_free(info->pmi_lp, nc * sizeof (int));
8068 			info->pmi_lp = NULL;
8069 		}
8070 	}
8071 	return (DDI_WALK_CONTINUE);
8072 }
8073 
8074 /*
8075  * Stolen from the bootdev module
8076  * attempt to convert a path to a major number
8077  */
8078 static major_t
8079 i_path_to_major(char *path, char *leaf_name)
8080 {
8081 	extern major_t path_to_major(char *pathname);
8082 	major_t maj;
8083 
8084 	if ((maj = path_to_major(path)) == DDI_MAJOR_T_NONE) {
8085 		maj = ddi_name_to_major(leaf_name);
8086 	}
8087 
8088 	return (maj);
8089 }
8090 
8091 /*
8092  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
8093  * An entry in the list means that the device is detached, so we need to
8094  * adjust its ancestors as if they had just seen this attach, and any detached
8095  * ancestors need to have their list entries adjusted.
8096  */
8097 void
8098 pm_driver_removed(major_t major)
8099 {
8100 	static void i_pm_driver_removed(major_t major);
8101 
8102 	/*
8103 	 * Serialize removal of drivers. This is to keep ancestors of
8104 	 * a node that is being deleted from getting deleted and added back
8105 	 * with different counters.
8106 	 */
8107 	mutex_enter(&pm_remdrv_lock);
8108 	i_pm_driver_removed(major);
8109 	mutex_exit(&pm_remdrv_lock);
8110 }
8111 
8112 /*
8113  * This routine is called recursively by pm_noinvol_process_ancestors()
8114  */
8115 static void
8116 i_pm_driver_removed(major_t major)
8117 {
8118 	PMD_FUNC(pmf, "driver_removed")
8119 	static void adjust_ancestors(char *, int);
8120 	static int pm_is_noinvol_ancestor(pm_noinvol_t *);
8121 	static void pm_noinvol_process_ancestors(char *);
8122 	pm_noinvol_t *ip, *pp = NULL;
8123 	int wasvolpmd;
8124 	ASSERT(major != DDI_MAJOR_T_NONE);
8125 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
8126 again:
8127 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8128 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
8129 		if (major != ip->ni_major)
8130 			continue;
8131 		/*
8132 		 * If it is an ancestor of no-invol node, which is
8133 		 * not removed, skip it. This is to cover the case of
8134 		 * ancestor removed without removing its descendants.
8135 		 */
8136 		if (pm_is_noinvol_ancestor(ip)) {
8137 			ip->ni_flags |= PMC_DRIVER_REMOVED;
8138 			continue;
8139 		}
8140 		wasvolpmd = ip->ni_wasvolpmd;
8141 		/*
8142 		 * remove the entry from the list
8143 		 */
8144 		if (pp) {
8145 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8146 			    pmf, ip->ni_path, pp->ni_path))
8147 			pp->ni_next = ip->ni_next;
8148 		} else {
8149 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8150 			    ip->ni_path))
8151 			ASSERT(pm_noinvol_head == ip);
8152 			pm_noinvol_head = ip->ni_next;
8153 		}
8154 		rw_exit(&pm_noinvol_rwlock);
8155 		adjust_ancestors(ip->ni_path, wasvolpmd);
8156 		/*
8157 		 * Had an ancestor been removed before this node, it would have
8158 		 * been skipped. Adjust the no-invol counters for such skipped
8159 		 * ancestors.
8160 		 */
8161 		pm_noinvol_process_ancestors(ip->ni_path);
8162 		kmem_free(ip->ni_path, ip->ni_size);
8163 		kmem_free(ip, sizeof (*ip));
8164 		goto again;
8165 	}
8166 	rw_exit(&pm_noinvol_rwlock);
8167 }
8168 
8169 /*
8170  * returns 1, if *aip is a ancestor of a no-invol node
8171  *	   0, otherwise
8172  */
8173 static int
8174 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8175 {
8176 	pm_noinvol_t *ip;
8177 
8178 	ASSERT(strlen(aip->ni_path) != 0);
8179 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8180 		if (ip == aip)
8181 			continue;
8182 		/*
8183 		 * To be an ancestor, the path must be an initial substring of
8184 		 * the descendent, and end just before a '/' in the
8185 		 * descendent's path.
8186 		 */
8187 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8188 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8189 			return (1);
8190 	}
8191 	return (0);
8192 }
8193 
8194 /*
8195  * scan through the pm_noinvolpm list adjusting ancestors of the current
8196  * node;  Modifies string *path.
8197  */
8198 static void
8199 adjust_ancestors(char *path, int wasvolpmd)
8200 {
8201 	PMD_FUNC(pmf, "adjust_ancestors")
8202 	char *cp;
8203 	pm_noinvol_t *lp;
8204 	pm_noinvol_t *pp = NULL;
8205 	major_t locked = DDI_MAJOR_T_NONE;
8206 	dev_info_t *dip;
8207 	char	*pathbuf;
8208 	size_t pathbuflen = strlen(path) + 1;
8209 
8210 	/*
8211 	 * First we look up the ancestor's dip.  If we find it, then we
8212 	 * adjust counts up the tree
8213 	 */
8214 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8215 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8216 	(void) strcpy(pathbuf, path);
8217 	cp = strrchr(pathbuf, '/');
8218 	if (cp == NULL)	{
8219 		/* if no ancestors, then nothing to do */
8220 		kmem_free(pathbuf, pathbuflen);
8221 		return;
8222 	}
8223 	*cp = '\0';
8224 	dip = pm_name_to_dip(pathbuf, 1);
8225 	if (dip != NULL) {
8226 		locked = PM_MAJOR(dip);
8227 
8228 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8229 		    path, dip);
8230 
8231 		if (locked != DDI_MAJOR_T_NONE)
8232 			ddi_release_devi(dip);
8233 	} else {
8234 		char *apath;
8235 		size_t len = strlen(pathbuf) + 1;
8236 		int  lock_held = 1;
8237 
8238 		/*
8239 		 * Now check for ancestors that exist only in the list
8240 		 */
8241 		apath = kmem_alloc(len, KM_SLEEP);
8242 		(void) strcpy(apath, pathbuf);
8243 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8244 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8245 			/*
8246 			 * This can only happen once.  Since we have to drop
8247 			 * the lock, we need to extract the relevant info.
8248 			 */
8249 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8250 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8251 				    lp->ni_path, lp->ni_noinvolpm,
8252 				    lp->ni_noinvolpm - 1))
8253 				lp->ni_noinvolpm--;
8254 				if (wasvolpmd && lp->ni_volpmd) {
8255 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8256 					    "%d\n", pmf, lp->ni_path,
8257 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8258 					lp->ni_volpmd--;
8259 				}
8260 				/*
8261 				 * remove the entry from the list, if there
8262 				 * are no more no-invol descendants and node
8263 				 * itself is not a no-invol node.
8264 				 */
8265 				if (!(lp->ni_noinvolpm ||
8266 				    (lp->ni_flags & PMC_NO_INVOL))) {
8267 					ASSERT(lp->ni_volpmd == 0);
8268 					if (pp) {
8269 						PMD(PMD_NOINVOL, ("%s: freeing "
8270 						    "%s, prev is %s\n", pmf,
8271 						    lp->ni_path, pp->ni_path))
8272 						pp->ni_next = lp->ni_next;
8273 					} else {
8274 						PMD(PMD_NOINVOL, ("%s: free %s "
8275 						    "head\n", pmf, lp->ni_path))
8276 						ASSERT(pm_noinvol_head == lp);
8277 						pm_noinvol_head = lp->ni_next;
8278 					}
8279 					lock_held = 0;
8280 					rw_exit(&pm_noinvol_rwlock);
8281 					adjust_ancestors(apath, wasvolpmd);
8282 					/* restore apath */
8283 					(void) strcpy(apath, pathbuf);
8284 					kmem_free(lp->ni_path, lp->ni_size);
8285 					kmem_free(lp, sizeof (*lp));
8286 				}
8287 				break;
8288 			}
8289 		}
8290 		if (lock_held)
8291 			rw_exit(&pm_noinvol_rwlock);
8292 		adjust_ancestors(apath, wasvolpmd);
8293 		kmem_free(apath, len);
8294 	}
8295 	kmem_free(pathbuf, pathbuflen);
8296 }
8297 
8298 /*
8299  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8300  * which were skipped even though their drivers were removed.
8301  */
8302 static void
8303 pm_noinvol_process_ancestors(char *path)
8304 {
8305 	pm_noinvol_t *lp;
8306 
8307 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8308 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8309 		if (strstr(path, lp->ni_path) &&
8310 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8311 			rw_exit(&pm_noinvol_rwlock);
8312 			i_pm_driver_removed(lp->ni_major);
8313 			return;
8314 		}
8315 	}
8316 	rw_exit(&pm_noinvol_rwlock);
8317 }
8318 
8319 /*
8320  * Returns true if (detached) device needs to be kept up because it exported the
8321  * "no-involuntary-power-cycles" property or we're pretending it did (console
8322  * fb case) or it is an ancestor of such a device and has used up the "one
8323  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8324  * upon detach.  In any event, we need an exact hit on the path or we return
8325  * false.
8326  */
8327 int
8328 pm_noinvol_detached(char *path)
8329 {
8330 	PMD_FUNC(pmf, "noinvol_detached")
8331 	pm_noinvol_t *ip;
8332 	int ret = 0;
8333 
8334 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8335 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8336 		if (strcmp(path, ip->ni_path) == 0) {
8337 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8338 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8339 				    "%s\n", pmf, path))
8340 				ret = 1;
8341 				break;
8342 			}
8343 #ifdef	DEBUG
8344 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8345 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8346 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8347 				    path))
8348 #endif
8349 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8350 			break;
8351 		}
8352 	}
8353 	rw_exit(&pm_noinvol_rwlock);
8354 	return (ret);
8355 }
8356 
8357 int
8358 pm_is_cfb(dev_info_t *dip)
8359 {
8360 	return (dip == cfb_dip);
8361 }
8362 
8363 #ifdef	DEBUG
8364 /*
8365  * Return true if all components of the console frame buffer are at
8366  * "normal" power, i.e., fully on.  For the case where the console is not
8367  * a framebuffer, we also return true
8368  */
8369 int
8370 pm_cfb_is_up(void)
8371 {
8372 	return (pm_cfb_comps_off == 0);
8373 }
8374 #endif
8375 
8376 /*
8377  * Preventing scan from powering down the node by incrementing the
8378  * kidsupcnt.
8379  */
8380 void
8381 pm_hold_power(dev_info_t *dip)
8382 {
8383 	e_pm_hold_rele_power(dip, 1);
8384 }
8385 
8386 /*
8387  * Releasing the hold by decrementing the kidsupcnt allowing scan
8388  * to power down the node if all conditions are met.
8389  */
8390 void
8391 pm_rele_power(dev_info_t *dip)
8392 {
8393 	e_pm_hold_rele_power(dip, -1);
8394 }
8395 
8396 /*
8397  * A wrapper of pm_all_to_normal() to power up a dip
8398  * to its normal level
8399  */
8400 int
8401 pm_powerup(dev_info_t *dip)
8402 {
8403 	PMD_FUNC(pmf, "pm_powerup")
8404 
8405 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8406 	ASSERT(!(servicing_interrupt()));
8407 
8408 	/*
8409 	 * in case this node is not already participating pm
8410 	 */
8411 	if (!PM_GET_PM_INFO(dip)) {
8412 		if (!DEVI_IS_ATTACHING(dip))
8413 			return (DDI_SUCCESS);
8414 		if (pm_start(dip) != DDI_SUCCESS)
8415 			return (DDI_FAILURE);
8416 		if (!PM_GET_PM_INFO(dip))
8417 			return (DDI_SUCCESS);
8418 	}
8419 
8420 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8421 }
8422 
8423 int
8424 pm_rescan_walk(dev_info_t *dip, void *arg)
8425 {
8426 	_NOTE(ARGUNUSED(arg))
8427 
8428 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8429 		return (DDI_WALK_CONTINUE);
8430 
8431 	/*
8432 	 * Currently pm_cpr_callb/resume code is the only caller
8433 	 * and it needs to make sure that stopped scan get
8434 	 * reactivated. Otherwise, rescan walk needn't reactive
8435 	 * stopped scan.
8436 	 */
8437 	pm_scan_init(dip);
8438 
8439 	(void) pm_rescan(dip);
8440 	return (DDI_WALK_CONTINUE);
8441 }
8442 
8443 static dev_info_t *
8444 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8445 {
8446 	dev_info_t *wdip, *pdip;
8447 
8448 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8449 		pdip = ddi_get_parent(wdip);
8450 		if (pdip == dip)
8451 			return (wdip);
8452 	}
8453 	return (NULL);
8454 }
8455 
8456 int
8457 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8458     void *arg, void *result)
8459 {
8460 	PMD_FUNC(pmf, "bp_bus_power")
8461 	dev_info_t	*cdip;
8462 	pm_info_t	*cinfo;
8463 	pm_bp_child_pwrchg_t	*bpc;
8464 	pm_sp_misc_t		*pspm;
8465 	pm_bp_nexus_pwrup_t *bpn;
8466 	pm_bp_child_pwrchg_t new_bpc;
8467 	pm_bp_noinvol_t *bpi;
8468 	dev_info_t *tdip;
8469 	char *pathbuf;
8470 	int		ret = DDI_SUCCESS;
8471 	int		errno = 0;
8472 	pm_component_t *cp;
8473 
8474 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8475 	    pm_decode_op(op)))
8476 	switch (op) {
8477 	case BUS_POWER_CHILD_PWRCHG:
8478 		bpc = (pm_bp_child_pwrchg_t *)arg;
8479 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8480 		tdip = bpc->bpc_dip;
8481 		cdip = pm_get_next_descendent(dip, tdip);
8482 		cinfo = PM_GET_PM_INFO(cdip);
8483 		if (cdip != tdip) {
8484 			/*
8485 			 * If the node is an involved parent, it needs to
8486 			 * power up the node as it is needed.  There is nothing
8487 			 * else the framework can do here.
8488 			 */
8489 			if (PM_WANTS_NOTIFICATION(cdip)) {
8490 				PMD(PMD_SET, ("%s: call bus_power for "
8491 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8492 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8493 				    impl_arg, op, arg, result));
8494 			}
8495 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8496 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8497 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8498 			/*
8499 			 * we presume that the parent needs to be up in
8500 			 * order for the child to change state (either
8501 			 * because it must already be on if the child is on
8502 			 * (and the pm_all_to_normal_nexus() will be a nop)
8503 			 * or because it will need to be on for the child
8504 			 * to come on; so we make the call regardless
8505 			 */
8506 			pm_hold_power(cdip);
8507 			if (cinfo) {
8508 				pm_canblock_t canblock = pspm->pspm_canblock;
8509 				ret = pm_all_to_normal_nexus(cdip, canblock);
8510 				if (ret != DDI_SUCCESS) {
8511 					pm_rele_power(cdip);
8512 					return (ret);
8513 				}
8514 			}
8515 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8516 			    PM_DEVICE(cdip)))
8517 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8518 			    result);
8519 			pm_rele_power(cdip);
8520 		} else {
8521 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8522 			    result);
8523 		}
8524 		return (ret);
8525 
8526 	case BUS_POWER_NEXUS_PWRUP:
8527 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8528 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8529 
8530 		if (!e_pm_valid_info(dip, NULL) ||
8531 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8532 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8533 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8534 			    pmf, PM_DEVICE(dip)))
8535 			*pspm->pspm_errnop = EIO;
8536 			*(int *)result = DDI_FAILURE;
8537 			return (DDI_FAILURE);
8538 		}
8539 
8540 		ASSERT(bpn->bpn_dip == dip);
8541 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8542 		    PM_DEVICE(dip)))
8543 		new_bpc.bpc_dip = dip;
8544 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8545 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8546 		new_bpc.bpc_comp = bpn->bpn_comp;
8547 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8548 		new_bpc.bpc_nlevel = bpn->bpn_level;
8549 		new_bpc.bpc_private = bpn->bpn_private;
8550 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8551 		    PM_LEVEL_UPONLY;
8552 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8553 		    &errno;
8554 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8555 		    (void *)&new_bpc, result);
8556 		kmem_free(pathbuf, MAXPATHLEN);
8557 		return (ret);
8558 
8559 	case BUS_POWER_NOINVOL:
8560 		bpi = (pm_bp_noinvol_t *)arg;
8561 		tdip = bpi->bpni_dip;
8562 		cdip = pm_get_next_descendent(dip, tdip);
8563 
8564 		/* In case of rem_drv, the leaf node has been removed */
8565 		if (cdip == NULL)
8566 			return (DDI_SUCCESS);
8567 
8568 		cinfo = PM_GET_PM_INFO(cdip);
8569 		if (cdip != tdip) {
8570 			if (PM_WANTS_NOTIFICATION(cdip)) {
8571 				PMD(PMD_NOINVOL,
8572 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8573 				    pmf, PM_DEVICE(cdip)))
8574 				ret = (*PM_BUS_POWER_FUNC(cdip))
8575 				    (cdip, NULL, op, arg, result);
8576 				if ((cinfo) && (ret == DDI_SUCCESS))
8577 					(void) pm_noinvol_update_node(cdip,
8578 					    bpi);
8579 				return (ret);
8580 			} else {
8581 				PMD(PMD_NOINVOL,
8582 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8583 				    PM_DEVICE(cdip)))
8584 				ret = pm_busop_bus_power(cdip, NULL, op,
8585 				    arg, result);
8586 				/*
8587 				 * Update the current node.
8588 				 */
8589 				if ((cinfo) && (ret == DDI_SUCCESS))
8590 					(void) pm_noinvol_update_node(cdip,
8591 					    bpi);
8592 				return (ret);
8593 			}
8594 		} else {
8595 			/*
8596 			 * For attach, detach, power up:
8597 			 * Do nothing for leaf node since its
8598 			 * counts are already updated.
8599 			 * For CFB and driver removal, since the
8600 			 * path and the target dip passed in is up to and incl.
8601 			 * the immediate ancestor, need to do the update.
8602 			 */
8603 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8604 			    "reached\n", pmf, PM_DEVICE(cdip)))
8605 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8606 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8607 				(void) pm_noinvol_update_node(cdip, bpi);
8608 			return (DDI_SUCCESS);
8609 		}
8610 
8611 	default:
8612 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8613 		return (DDI_FAILURE);
8614 	}
8615 }
8616 
8617 static int
8618 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8619     void *arg, void *resultp)
8620 {
8621 	_NOTE(ARGUNUSED(impl_arg))
8622 	PMD_FUNC(pmf, "bp_set_power")
8623 	pm_ppm_devlist_t *devl = NULL;
8624 	int clevel, circ;
8625 #ifdef	DEBUG
8626 	int circ_db, ccirc_db;
8627 #endif
8628 	int ret = DDI_SUCCESS;
8629 	dev_info_t *cdip;
8630 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8631 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8632 	pm_canblock_t canblock = pspm->pspm_canblock;
8633 	int scan = pspm->pspm_scan;
8634 	int comp = bpc->bpc_comp;
8635 	int olevel = bpc->bpc_olevel;
8636 	int nlevel = bpc->bpc_nlevel;
8637 	int comps_off_incr = 0;
8638 	dev_info_t *pdip = ddi_get_parent(dip);
8639 	int dodeps;
8640 	int direction = pspm->pspm_direction;
8641 	int *errnop = pspm->pspm_errnop;
8642 #ifdef PMDDEBUG
8643 	char *dir = pm_decode_direction(direction);
8644 #endif
8645 	int *iresp = (int *)resultp;
8646 	time_t	idletime, thresh;
8647 	pm_component_t *cp = PM_CP(dip, comp);
8648 	int work_type;
8649 
8650 	*iresp = DDI_SUCCESS;
8651 	*errnop = 0;
8652 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8653 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8654 	    pm_decode_op(op)))
8655 
8656 	/*
8657 	 * The following set of conditions indicate we are here to handle a
8658 	 * driver's pm_[raise|lower]_power request, but the device is being
8659 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8660 	 * we want to pm_block and pass a status back to the caller based
8661 	 * on whether the controlling process's next activity on the device
8662 	 * matches the current request or not.  This distinction tells
8663 	 * downstream functions to avoid calling into a driver or changing
8664 	 * the framework's power state.  To actually block, we need:
8665 	 *
8666 	 * PM_ISDIRECT(dip)
8667 	 *	no reason to block unless a process is directly controlling dev
8668 	 * direction != PM_LEVEL_EXACT
8669 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8670 	 * !pm_processes_stopped
8671 	 *	don't block if controlling proc already be stopped for cpr
8672 	 * canblock != PM_CANBLOCK_BYPASS
8673 	 *	our caller must not have explicitly prevented blocking
8674 	 */
8675 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8676 		PM_LOCK_DIP(dip);
8677 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8678 			/* releases dip lock */
8679 			ret = pm_busop_match_request(dip, bpc);
8680 			if (ret == EAGAIN) {
8681 				PM_LOCK_DIP(dip);
8682 				continue;
8683 			}
8684 			return (*iresp = ret);
8685 		}
8686 		PM_UNLOCK_DIP(dip);
8687 	}
8688 	/* BC device is never scanned, so power will stick until we are done */
8689 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8690 	    direction != PM_LEVEL_DOWNONLY) {
8691 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8692 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8693 		    canblock, 0, resultp) != DDI_SUCCESS) {
8694 			/* *resultp set by pm_set_power */
8695 			return (DDI_FAILURE);
8696 		}
8697 	}
8698 	if (PM_WANTS_NOTIFICATION(pdip)) {
8699 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8700 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8701 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8702 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8703 		if (ret != DDI_SUCCESS) {
8704 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8705 			    pmf, PM_DEVICE(pdip)))
8706 			return (DDI_FAILURE);
8707 		}
8708 	} else {
8709 		/*
8710 		 * Since we don't know what the actual power level is,
8711 		 * we place a power hold on the parent no matter what
8712 		 * component and level is changing.
8713 		 */
8714 		pm_hold_power(pdip);
8715 	}
8716 	PM_LOCK_POWER(dip, &circ);
8717 	clevel = PM_CURPOWER(dip, comp);
8718 	/*
8719 	 * It's possible that a call was made to pm_update_maxpower()
8720 	 * on another thread before we took the lock above. So, we need to
8721 	 * make sure that this request isn't processed after the
8722 	 * change of power executed on behalf of pm_update_maxpower().
8723 	 */
8724 	if (nlevel > pm_get_normal_power(dip, comp)) {
8725 		PMD(PMD_SET, ("%s: requested level is higher than normal.\n",
8726 		    pmf))
8727 		ret = DDI_FAILURE;
8728 		*iresp = DDI_FAILURE;
8729 		goto post_notify;
8730 	}
8731 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8732 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8733 	    clevel, dir))
8734 	switch (direction) {
8735 	case PM_LEVEL_UPONLY:
8736 		/* Powering up */
8737 		if (clevel >= nlevel) {
8738 			PMD(PMD_SET, ("%s: current level is already "
8739 			    "at or above the requested level.\n", pmf))
8740 			*iresp = DDI_SUCCESS;
8741 			ret = DDI_SUCCESS;
8742 			goto post_notify;
8743 		}
8744 		break;
8745 	case PM_LEVEL_EXACT:
8746 		/* specific level request */
8747 		if (clevel == nlevel && !PM_ISBC(dip)) {
8748 			PMD(PMD_SET, ("%s: current level is already "
8749 			    "at the requested level.\n", pmf))
8750 			*iresp = DDI_SUCCESS;
8751 			ret = DDI_SUCCESS;
8752 			goto post_notify;
8753 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8754 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8755 			if (!pm_cfb_enabled) {
8756 				PMD(PMD_ERROR | PMD_CFB,
8757 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8758 				*errnop = EINVAL;
8759 				*iresp = DDI_FAILURE;
8760 				ret = DDI_FAILURE;
8761 				goto post_notify;
8762 			}
8763 			mutex_enter(&pm_cfb_lock);
8764 			while (cfb_inuse) {
8765 				mutex_exit(&pm_cfb_lock);
8766 				if (delay_sig(1) == EINTR) {
8767 					ret = DDI_FAILURE;
8768 					*iresp = DDI_FAILURE;
8769 					*errnop = EINTR;
8770 					goto post_notify;
8771 				}
8772 				mutex_enter(&pm_cfb_lock);
8773 			}
8774 			mutex_exit(&pm_cfb_lock);
8775 		}
8776 		break;
8777 	case PM_LEVEL_DOWNONLY:
8778 		/* Powering down */
8779 		thresh = cur_threshold(dip, comp);
8780 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8781 		if (scan && ((PM_KUC(dip) != 0) ||
8782 		    (cp->pmc_busycount > 0) ||
8783 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8784 #ifdef	DEBUG
8785 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8786 				PMD(PMD_SET, ("%s: scan failed: "
8787 				    "kidsupcnt != 0\n", pmf))
8788 			if (cp->pmc_busycount > 0)
8789 				PMD(PMD_SET, ("%s: scan failed: "
8790 				    "device become busy\n", pmf))
8791 			if (idletime < thresh)
8792 				PMD(PMD_SET, ("%s: scan failed: device "
8793 				    "hasn't been idle long enough\n", pmf))
8794 #endif
8795 			*iresp = DDI_FAILURE;
8796 			*errnop = EBUSY;
8797 			ret = DDI_FAILURE;
8798 			goto post_notify;
8799 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8800 			PMD(PMD_SET, ("%s: current level is already at "
8801 			    "or below the requested level.\n", pmf))
8802 			*iresp = DDI_SUCCESS;
8803 			ret = DDI_SUCCESS;
8804 			goto post_notify;
8805 		}
8806 		break;
8807 	}
8808 
8809 	if (PM_IS_CFB(dip) && (comps_off_incr =
8810 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8811 		/*
8812 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8813 		 * component from full power.  Remember that we tried to
8814 		 * lower power in case it fails and we need to back out
8815 		 * the adjustment.
8816 		 */
8817 		update_comps_off(comps_off_incr, dip);
8818 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8819 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8820 		    pm_cfb_comps_off))
8821 	}
8822 
8823 	if ((*iresp = power_dev(dip,
8824 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8825 #ifdef DEBUG
8826 		/*
8827 		 * All descendents of this node should already be powered off.
8828 		 */
8829 		if (PM_CURPOWER(dip, comp) == 0) {
8830 			pm_desc_pwrchk_t pdpchk;
8831 			pdpchk.pdpc_dip = dip;
8832 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8833 			ndi_devi_enter(dip, &circ_db);
8834 			for (cdip = ddi_get_child(dip); cdip != NULL;
8835 			    cdip = ddi_get_next_sibling(cdip)) {
8836 				ndi_devi_enter(cdip, &ccirc_db);
8837 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8838 				    (void *)&pdpchk);
8839 				ndi_devi_exit(cdip, ccirc_db);
8840 			}
8841 			ndi_devi_exit(dip, circ_db);
8842 		}
8843 #endif
8844 		/*
8845 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8846 		 * back up to full power.
8847 		 */
8848 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8849 			update_comps_off(comps_off_incr, dip);
8850 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8851 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8852 			    comp, clevel, nlevel, pm_cfb_comps_off))
8853 		}
8854 		dodeps = 0;
8855 		if (POWERING_OFF(clevel, nlevel)) {
8856 			if (PM_ISBC(dip)) {
8857 				dodeps = (comp == 0);
8858 			} else {
8859 				int i;
8860 				dodeps = 1;
8861 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8862 					/* if some component still on */
8863 					if (PM_CURPOWER(dip, i)) {
8864 						dodeps = 0;
8865 						break;
8866 					}
8867 				}
8868 			}
8869 			if (dodeps)
8870 				work_type = PM_DEP_WK_POWER_OFF;
8871 		} else if (POWERING_ON(clevel, nlevel)) {
8872 			if (PM_ISBC(dip)) {
8873 				dodeps = (comp == 0);
8874 			} else {
8875 				int i;
8876 				dodeps = 1;
8877 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8878 					if (i == comp)
8879 						continue;
8880 					if (PM_CURPOWER(dip, i) > 0) {
8881 						dodeps = 0;
8882 						break;
8883 					}
8884 				}
8885 			}
8886 			if (dodeps)
8887 				work_type = PM_DEP_WK_POWER_ON;
8888 		}
8889 
8890 		if (dodeps) {
8891 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8892 
8893 			(void) ddi_pathname(dip, pathbuf);
8894 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8895 			    PM_DEP_NOWAIT, NULL, 0);
8896 			kmem_free(pathbuf, MAXPATHLEN);
8897 		}
8898 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8899 			int old;
8900 
8901 			/* If old power cached during deadlock, use it. */
8902 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8903 			    cp->pmc_phc_pwr : olevel);
8904 			mutex_enter(&pm_rsvp_lock);
8905 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8906 			    old, canblock);
8907 			pm_enqueue_notify_others(&devl, canblock);
8908 			mutex_exit(&pm_rsvp_lock);
8909 		} else {
8910 			pm_ppm_devlist_t *p;
8911 			pm_ppm_devlist_t *next;
8912 			for (p = devl; p != NULL; p = next) {
8913 				next = p->ppd_next;
8914 				kmem_free(p, sizeof (pm_ppm_devlist_t));
8915 			}
8916 			devl = NULL;
8917 		}
8918 
8919 		/*
8920 		 * If we are coming from a scan, don't do it again,
8921 		 * else we can have infinite loops.
8922 		 */
8923 		if (!scan)
8924 			pm_rescan(dip);
8925 	} else {
8926 		/* if we incremented pm_comps_off_count, but failed */
8927 		if (comps_off_incr > 0) {
8928 			update_comps_off(-comps_off_incr, dip);
8929 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8930 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8931 			    comp, clevel, nlevel, pm_cfb_comps_off))
8932 		}
8933 		*errnop = EIO;
8934 	}
8935 
8936 post_notify:
8937 	/*
8938 	 * This thread may have been in deadlock with pm_power_has_changed.
8939 	 * Before releasing power lock, clear the flag which marks this
8940 	 * condition.
8941 	 */
8942 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8943 
8944 	/*
8945 	 * Update the old power level in the bus power structure with the
8946 	 * actual power level before the transition was made to the new level.
8947 	 * Some involved parents depend on this information to keep track of
8948 	 * their children's power transition.
8949 	 */
8950 	if (*iresp != DDI_FAILURE)
8951 		bpc->bpc_olevel = clevel;
8952 
8953 	if (PM_WANTS_NOTIFICATION(pdip)) {
8954 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8955 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8956 		PM_UNLOCK_POWER(dip, circ);
8957 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8958 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8959 		    PM_DEVICE(dip), ret))
8960 	} else {
8961 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8962 		PM_UNLOCK_POWER(dip, circ);
8963 		/*
8964 		 * Now that we know what power transition has occurred
8965 		 * (if any), release the power hold.  Leave the hold
8966 		 * in effect in the case of OFF->ON transition.
8967 		 */
8968 		if (!(clevel == 0 && nlevel > 0 &&
8969 		    (!PM_ISBC(dip) || comp == 0)))
8970 			pm_rele_power(pdip);
8971 		/*
8972 		 * If the power transition was an ON->OFF transition,
8973 		 * remove the power hold from the parent.
8974 		 */
8975 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8976 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8977 			pm_rele_power(pdip);
8978 	}
8979 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
8980 		return (DDI_FAILURE);
8981 	else
8982 		return (DDI_SUCCESS);
8983 }
8984 
8985 /*
8986  * If an app (SunVTS or Xsun) has taken control, then block until it
8987  * gives it up or makes the requested power level change, unless
8988  * we have other instructions about blocking.  Returns DDI_SUCCESS,
8989  * DDI_FAILURE or EAGAIN (owner released device from directpm).
8990  */
8991 static int
8992 pm_busop_match_request(dev_info_t *dip, void *arg)
8993 {
8994 	PMD_FUNC(pmf, "bp_match_request")
8995 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8996 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8997 	int comp = bpc->bpc_comp;
8998 	int nlevel = bpc->bpc_nlevel;
8999 	pm_canblock_t canblock = pspm->pspm_canblock;
9000 	int direction = pspm->pspm_direction;
9001 	int clevel, circ;
9002 
9003 	ASSERT(PM_IAM_LOCKING_DIP(dip));
9004 	PM_LOCK_POWER(dip, &circ);
9005 	clevel = PM_CURPOWER(dip, comp);
9006 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
9007 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
9008 	if (direction == PM_LEVEL_UPONLY) {
9009 		if (clevel >= nlevel) {
9010 			PM_UNLOCK_POWER(dip, circ);
9011 			PM_UNLOCK_DIP(dip);
9012 			return (DDI_SUCCESS);
9013 		}
9014 	} else if (clevel == nlevel) {
9015 		PM_UNLOCK_POWER(dip, circ);
9016 		PM_UNLOCK_DIP(dip);
9017 		return (DDI_SUCCESS);
9018 	}
9019 	if (canblock == PM_CANBLOCK_FAIL) {
9020 		PM_UNLOCK_POWER(dip, circ);
9021 		PM_UNLOCK_DIP(dip);
9022 		return (DDI_FAILURE);
9023 	}
9024 	if (canblock == PM_CANBLOCK_BLOCK) {
9025 		/*
9026 		 * To avoid a deadlock, we must not hold the
9027 		 * power lock when we pm_block.
9028 		 */
9029 		PM_UNLOCK_POWER(dip, circ);
9030 		PMD(PMD_SET, ("%s: blocking\n", pmf))
9031 		/* pm_block releases dip lock */
9032 		switch (pm_block(dip, comp, nlevel, clevel)) {
9033 		case PMP_RELEASE:
9034 			return (EAGAIN);
9035 		case PMP_SUCCEED:
9036 			return (DDI_SUCCESS);
9037 		case PMP_FAIL:
9038 			return (DDI_FAILURE);
9039 		}
9040 	} else {
9041 		ASSERT(0);
9042 	}
9043 	_NOTE(NOTREACHED);
9044 	return (DDI_FAILURE);	/* keep gcc happy */
9045 }
9046 
9047 static int
9048 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
9049 {
9050 	PMD_FUNC(pmf, "all_to_normal_nexus")
9051 	int		*normal;
9052 	int		i, ncomps;
9053 	size_t		size;
9054 	int		changefailed = 0;
9055 	int		ret, result = DDI_SUCCESS;
9056 	pm_bp_nexus_pwrup_t	bpn;
9057 	pm_sp_misc_t	pspm;
9058 
9059 	ASSERT(PM_GET_PM_INFO(dip));
9060 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9061 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
9062 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
9063 		return (DDI_FAILURE);
9064 	}
9065 	ncomps = PM_NUMCMPTS(dip);
9066 	for (i = 0; i < ncomps; i++) {
9067 		bpn.bpn_dip = dip;
9068 		bpn.bpn_comp = i;
9069 		bpn.bpn_level = normal[i];
9070 		pspm.pspm_canblock = canblock;
9071 		pspm.pspm_scan = 0;
9072 		bpn.bpn_private = &pspm;
9073 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
9074 		    (void *)&bpn, (void *)&result);
9075 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
9076 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
9077 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
9078 			    i, normal[i], result))
9079 			changefailed++;
9080 		}
9081 	}
9082 	kmem_free(normal, size);
9083 	if (changefailed) {
9084 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
9085 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
9086 		return (DDI_FAILURE);
9087 	}
9088 	return (DDI_SUCCESS);
9089 }
9090 
9091 int
9092 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
9093     dev_info_t *tdip)
9094 {
9095 	PMD_FUNC(pmf, "noinvol_update")
9096 	pm_bp_noinvol_t args;
9097 	int ret;
9098 	int result = DDI_SUCCESS;
9099 
9100 	args.bpni_path = path;
9101 	args.bpni_dip = tdip;
9102 	args.bpni_cmd = subcmd;
9103 	args.bpni_wasvolpmd = wasvolpmd;
9104 	args.bpni_volpmd = volpmd;
9105 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
9106 	    "volpmd %d wasvolpmd %d\n", pmf,
9107 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
9108 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
9109 	    &args, &result);
9110 	return (ret);
9111 }
9112 
9113 void
9114 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
9115 {
9116 	PMD_FUNC(pmf, "noinvol_update_node")
9117 
9118 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9119 	switch (req->bpni_cmd) {
9120 	case PM_BP_NOINVOL_ATTACH:
9121 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
9122 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9123 		    DEVI(dip)->devi_pm_noinvolpm,
9124 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9125 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9126 		PM_LOCK_DIP(dip);
9127 		DEVI(dip)->devi_pm_noinvolpm--;
9128 		if (req->bpni_wasvolpmd) {
9129 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
9130 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9131 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9132 			    DEVI(dip)->devi_pm_volpmd - 1))
9133 			if (DEVI(dip)->devi_pm_volpmd)
9134 				DEVI(dip)->devi_pm_volpmd--;
9135 		}
9136 		PM_UNLOCK_DIP(dip);
9137 		break;
9138 
9139 	case PM_BP_NOINVOL_DETACH:
9140 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
9141 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
9142 		    DEVI(dip)->devi_pm_noinvolpm,
9143 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9144 		PM_LOCK_DIP(dip);
9145 		DEVI(dip)->devi_pm_noinvolpm++;
9146 		if (req->bpni_wasvolpmd) {
9147 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
9148 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9149 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9150 			    DEVI(dip)->devi_pm_volpmd + 1))
9151 			DEVI(dip)->devi_pm_volpmd++;
9152 		}
9153 		PM_UNLOCK_DIP(dip);
9154 		break;
9155 
9156 	case PM_BP_NOINVOL_REMDRV:
9157 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9158 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9159 		    DEVI(dip)->devi_pm_noinvolpm,
9160 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9161 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9162 		PM_LOCK_DIP(dip);
9163 		DEVI(dip)->devi_pm_noinvolpm--;
9164 		if (req->bpni_wasvolpmd) {
9165 			PMD(PMD_NOINVOL,
9166 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9167 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9168 			    DEVI(dip)->devi_pm_volpmd,
9169 			    DEVI(dip)->devi_pm_volpmd - 1))
9170 			/*
9171 			 * A power up could come in between and
9172 			 * clear the volpmd, if that's the case,
9173 			 * volpmd would be clear.
9174 			 */
9175 			if (DEVI(dip)->devi_pm_volpmd)
9176 				DEVI(dip)->devi_pm_volpmd--;
9177 		}
9178 		PM_UNLOCK_DIP(dip);
9179 		break;
9180 
9181 	case PM_BP_NOINVOL_CFB:
9182 		PMD(PMD_NOINVOL,
9183 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9184 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9185 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9186 		PM_LOCK_DIP(dip);
9187 		DEVI(dip)->devi_pm_noinvolpm++;
9188 		PM_UNLOCK_DIP(dip);
9189 		break;
9190 
9191 	case PM_BP_NOINVOL_POWER:
9192 		PMD(PMD_NOINVOL,
9193 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9194 		    pmf, PM_DEVICE(dip),
9195 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9196 		    req->bpni_volpmd))
9197 		PM_LOCK_DIP(dip);
9198 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9199 		PM_UNLOCK_DIP(dip);
9200 		break;
9201 
9202 	default:
9203 		break;
9204 	}
9205 
9206 }
9207 
9208 #ifdef DEBUG
9209 static int
9210 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9211 {
9212 	PMD_FUNC(pmf, "desc_pwrchk")
9213 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9214 	pm_info_t *info = PM_GET_PM_INFO(dip);
9215 	int i;
9216 	/* LINTED */
9217 	int curpwr, ce_level;
9218 
9219 	if (!info)
9220 		return (DDI_WALK_CONTINUE);
9221 
9222 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9223 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9224 		/* LINTED */
9225 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9226 			continue;
9227 		/* E_FUNC_SET_NOT_USED */
9228 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9229 		    CE_WARN;
9230 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9231 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9232 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9233 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9234 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9235 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9236 	}
9237 	return (DDI_WALK_CONTINUE);
9238 }
9239 #endif
9240 
9241 /*
9242  * Record the fact that one thread is borrowing the lock on a device node.
9243  * Use is restricted to the case where the lending thread will block until
9244  * the borrowing thread (always curthread) completes.
9245  */
9246 void
9247 pm_borrow_lock(kthread_t *lender)
9248 {
9249 	lock_loan_t *prev = &lock_loan_head;
9250 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9251 
9252 	cur->pmlk_borrower = curthread;
9253 	cur->pmlk_lender = lender;
9254 	mutex_enter(&pm_loan_lock);
9255 	cur->pmlk_next = prev->pmlk_next;
9256 	prev->pmlk_next = cur;
9257 	mutex_exit(&pm_loan_lock);
9258 }
9259 
9260 /*
9261  * Return the borrowed lock.  A thread can borrow only one.
9262  */
9263 void
9264 pm_return_lock(void)
9265 {
9266 	lock_loan_t *cur;
9267 	lock_loan_t *prev = &lock_loan_head;
9268 
9269 	mutex_enter(&pm_loan_lock);
9270 	ASSERT(prev->pmlk_next != NULL);
9271 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9272 		if (cur->pmlk_borrower == curthread)
9273 			break;
9274 
9275 	ASSERT(cur != NULL);
9276 	prev->pmlk_next = cur->pmlk_next;
9277 	mutex_exit(&pm_loan_lock);
9278 	kmem_free(cur, sizeof (*cur));
9279 }
9280 
9281 #if defined(__x86)
9282 
9283 #define	CPR_RXR	0x1
9284 #define	CPR_TXR	0x20
9285 #define	CPR_DATAREG	0x3f8
9286 #define	CPR_LSTAT	0x3fd
9287 #define	CPR_INTRCTL	0x3f9
9288 
9289 char
9290 pm_getchar(void)
9291 {
9292 	while ((inb(CPR_LSTAT) & CPR_RXR) != CPR_RXR)
9293 		drv_usecwait(10);
9294 
9295 	return (inb(CPR_DATAREG));
9296 
9297 }
9298 
9299 void
9300 pm_putchar(char c)
9301 {
9302 	while ((inb(CPR_LSTAT) & CPR_TXR) == 0)
9303 		drv_usecwait(10);
9304 
9305 	outb(CPR_DATAREG, c);
9306 }
9307 
9308 void
9309 pm_printf(char *s)
9310 {
9311 	while (*s) {
9312 		pm_putchar(*s++);
9313 	}
9314 }
9315 
9316 #endif
9317 
9318 int
9319 pm_ppm_searchlist(pm_searchargs_t *sp)
9320 {
9321 	power_req_t power_req;
9322 	int result = 0;
9323 	/* LINTED */
9324 	int ret;
9325 
9326 	power_req.request_type = PMR_PPM_SEARCH_LIST;
9327 	power_req.req.ppm_search_list_req.searchlist = sp;
9328 	ASSERT(DEVI(ddi_root_node())->devi_pm_ppm);
9329 	ret = pm_ctlops((dev_info_t *)DEVI(ddi_root_node())->devi_pm_ppm,
9330 	    ddi_root_node(), DDI_CTLOPS_POWER, &power_req, &result);
9331 	PMD(PMD_SX, ("pm_ppm_searchlist returns %d, result %d\n",
9332 	    ret, result))
9333 	return (result);
9334 }
9335