xref: /titanic_51/usr/src/uts/common/os/sunpm.c (revision 65488c97aeb108aeffd7b61db3b2b3bcb4fc9d72)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * sunpm.c builds sunpm.o	"power management framework"
28  *	kernel-resident power management code.  Implements power management
29  *	policy
30  *	Assumes: all backwards compat. device components wake up on &
31  *		 the pm_info pointer in dev_info is initially NULL
32  *
33  * PM - (device) Power Management
34  *
35  * Each device may have 0 or more components.  If a device has no components,
36  * then it can't be power managed.  Each component has 2 or more
37  * power states.
38  *
39  * "Backwards Compatible" (bc) devices:
40  * There are two different types of devices from the point of view of this
41  * code.  The original type, left over from the original PM implementation on
42  * the voyager platform are known in this code as "backwards compatible"
43  * devices (PM_ISBC(dip) returns true).
44  * They are recognized by the pm code by the lack of a pm-components property
45  * and a call made by the driver to pm_create_components(9F).
46  * For these devices, component 0 is special, and represents the power state
47  * of the device.  If component 0 is to be set to power level 0 (off), then
48  * the framework must first call into the driver's detach(9E) routine with
49  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
50  * After setting component 0 from 0 to a non-zero power level, a call must be
51  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
52  *
53  * Currently, the only way to get a bc device power managed is via a set of
54  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
55  *
56  * For non-bc devices, the driver describes the components by exporting a
57  * pm-components(9P) property that tells how many components there are,
58  * tells what each component's power state values are, and provides human
59  * readable strings (currently unused) for each component name and power state.
60  * Devices which export pm-components(9P) are automatically power managed
61  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
62  * after parsing power.conf(4)). The exception to this rule is that power
63  * manageable CPU devices may be automatically managed independently of autopm
64  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
65  * ioctls) cpupm. If the CPU devices are not managed independently, then they
66  * are managed by autopm. In either case, for automatically power managed
67  * devices, all components are considered independent of each other, and it is
68  * up to the driver to decide when a transition requires saving or restoring
69  * hardware state.
70  *
71  * Each device component also has a threshold time associated with each power
72  * transition (see power.conf(4)), and a busy/idle state maintained by the
73  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
74  * Components are created idle.
75  *
76  * The PM framework provides several functions:
77  * -implement PM policy as described in power.conf(4)
78  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
79  *  Policies consist of:
80  *    -set threshold values (defaults if none provided by pmconfig)
81  *    -set dependencies among devices
82  *    -enable/disable autopm
83  *    -enable/disable cpupm
84  *    -turn down idle components based on thresholds (if autopm or cpupm is
85  *     enabled) (aka scanning)
86  *    -maintain power states based on dependencies among devices
87  *    -upon request, or when the frame buffer powers off, attempt to turn off
88  *     all components that are idle or become idle over the next (10 sec)
89  *     period in an attempt to get down to an EnergyStar compliant state
90  *    -prevent powering off of a device which exported the
91  *     pm-no-involuntary-power-cycles property without active involvement of
92  *     the device's driver (so no removing power when the device driver is
93  *     not attached)
94  * -provide a mechanism for a device driver to request that a device's component
95  *  be brought back to the power level necessary for the use of the device
96  * -allow a process to directly control the power levels of device components
97  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
98  * -ensure that the console frame buffer is powered up before being referenced
99  *  via prom_printf() or other prom calls that might generate console output
100  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
101  * -provide "backwards compatible" behavior for devices without pm-components
102  *  property
103  *
104  * Scanning:
105  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
106  * component of each managed device to its lowest power based on the threshold
107  * of idleness associated with each transition and the busy/idle state of the
108  * component.
109  *
110  * The actual work of this is done by pm_scan_dev(), which cycles through each
111  * component of a device, checking its idleness against its current threshold,
112  * and calling pm_set_power() as appropriate to change the power level.
113  * This function also indicates when it would next be profitable to scan the
114  * device again, and a new scan is scheduled after that time.
115  *
116  * Dependencies:
117  * It is possible to establish a dependency between the power states of two
118  * otherwise unrelated devices.  This is currently done to ensure that the
119  * cdrom is always up whenever the console framebuffer is up, so that the user
120  * can insert a cdrom and see a popup as a result.
121  *
122  * The dependency terminology used in power.conf(4) is not easy to understand,
123  * so we've adopted a different terminology in the implementation.  We write
124  * of a "keeps up" and a "kept up" device.  A relationship can be established
125  * where one device keeps up another.  That means that if the keepsup device
126  * has any component that is at a non-zero power level, all components of the
127  * "kept up" device must be brought to full power.  This relationship is
128  * asynchronous.  When the keeping device is powered up, a request is queued
129  * to a worker thread to bring up the kept device.  The caller does not wait.
130  * Scan will not turn down a kept up device.
131  *
132  * Direct PM:
133  * A device may be directly power managed by a process.  If a device is
134  * directly pm'd, then it will not be scanned, and dependencies will not be
135  * enforced.  * If a directly pm'd device's driver requests a power change (via
136  * pm_raise_power(9F)), then the request is blocked and notification is sent
137  * to the controlling process, which must issue the requested power change for
138  * the driver to proceed.
139  *
140  */
141 
142 #include <sys/types.h>
143 #include <sys/errno.h>
144 #include <sys/callb.h>		/* callback registration during CPR */
145 #include <sys/conf.h>		/* driver flags and functions */
146 #include <sys/open.h>		/* OTYP_CHR definition */
147 #include <sys/stat.h>		/* S_IFCHR definition */
148 #include <sys/pathname.h>	/* name -> dev_info xlation */
149 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
150 #include <sys/kmem.h>		/* memory alloc stuff */
151 #include <sys/debug.h>
152 #include <sys/archsystm.h>
153 #include <sys/pm.h>
154 #include <sys/ddi.h>
155 #include <sys/sunddi.h>
156 #include <sys/sunndi.h>
157 #include <sys/sunpm.h>
158 #include <sys/epm.h>
159 #include <sys/vfs.h>
160 #include <sys/mode.h>
161 #include <sys/mkdev.h>
162 #include <sys/promif.h>
163 #include <sys/consdev.h>
164 #include <sys/esunddi.h>
165 #include <sys/modctl.h>
166 #include <sys/fs/ufs_fs.h>
167 #include <sys/note.h>
168 #include <sys/taskq.h>
169 #include <sys/bootconf.h>
170 #include <sys/reboot.h>
171 #include <sys/spl.h>
172 #include <sys/disp.h>
173 #include <sys/sobject.h>
174 #include <sys/sunmdi.h>
175 #include <sys/systm.h>
176 #include <sys/cpuvar.h>
177 #include <sys/cyclic.h>
178 #include <sys/uadmin.h>
179 #include <sys/srn.h>
180 
181 
182 /*
183  * PM LOCKING
184  *	The list of locks:
185  * Global pm mutex locks.
186  *
187  * pm_scan_lock:
188  *		It protects the timeout id of the scan thread, and the value
189  *		of autopm_enabled and cpupm.  This lock is not held
190  *		concurrently with any other PM locks.
191  *
192  * pm_clone_lock:	Protects the clone list and count of poll events
193  *		pending for the pm driver.
194  *		Lock ordering:
195  *			pm_clone_lock -> pm_pscc_interest_rwlock,
196  *			pm_clone_lock -> pm_pscc_direct_rwlock.
197  *
198  * pm_rsvp_lock:
199  *		Used to synchronize the data structures used for processes
200  *		to rendezvous with state change information when doing
201  *		direct PM.
202  *		Lock ordering:
203  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
204  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
205  *			pm_rsvp_lock -> pm_clone_lock.
206  *
207  * ppm_lock:	protects the list of registered ppm drivers
208  *		Lock ordering:
209  *			ppm_lock -> ppm driver unit_lock
210  *
211  * pm_compcnt_lock:
212  *		Protects count of components that are not at their lowest
213  *		power level.
214  *		Lock ordering:
215  *			pm_compcnt_lock -> ppm_lock.
216  *
217  * pm_dep_thread_lock:
218  *		Protects work list for pm_dep_thread.  Not taken concurrently
219  *		with any other pm lock.
220  *
221  * pm_remdrv_lock:
222  *		Serializes the operation of removing noinvol data structure
223  *		entries for a branch of the tree when a driver has been
224  *		removed from the system (modctl_rem_major).
225  *		Lock ordering:
226  *			pm_remdrv_lock -> pm_noinvol_rwlock.
227  *
228  * pm_cfb_lock: (High level spin lock)
229  *		Protects the count of how many components of the console
230  *		frame buffer are off (so we know if we have to bring up the
231  *		console as a result of a prom_printf, etc.
232  *		No other locks are taken while holding this lock.
233  *
234  * pm_loan_lock:
235  *		Protects the lock_loan list.  List is used to record that one
236  *		thread has acquired a power lock but has launched another thread
237  *		to complete its processing.  An entry in the list indicates that
238  *		the worker thread can borrow the lock held by the other thread,
239  *		which must block on the completion of the worker.  Use is
240  *		specific to module loading.
241  *		No other locks are taken while holding this lock.
242  *
243  * Global PM rwlocks
244  *
245  * pm_thresh_rwlock:
246  *		Protects the list of thresholds recorded for future use (when
247  *		devices attach).
248  *		Lock ordering:
249  *			pm_thresh_rwlock -> devi_pm_lock
250  *
251  * pm_noinvol_rwlock:
252  *		Protects list of detached nodes that had noinvol registered.
253  *		No other PM locks are taken while holding pm_noinvol_rwlock.
254  *
255  * pm_pscc_direct_rwlock:
256  *		Protects the list that maps devices being directly power
257  *		managed to the processes that manage them.
258  *		Lock ordering:
259  *			pm_pscc_direct_rwlock -> psce_lock
260  *
261  * pm_pscc_interest_rwlock;
262  *		Protects the list that maps state change events to processes
263  *		that want to know about them.
264  *		Lock ordering:
265  *			pm_pscc_interest_rwlock -> psce_lock
266  *
267  * per-dip locks:
268  *
269  * Each node has these per-dip locks, which are only used if the device is
270  * a candidate for power management (e.g. has pm components)
271  *
272  * devi_pm_lock:
273  *		Protects all power management state of the node except for
274  *		power level, which is protected by ndi_devi_enter().
275  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
276  *		Lock ordering:
277  *			devi_pm_lock -> pm_rsvp_lock,
278  *			devi_pm_lock -> pm_dep_thread_lock,
279  *			devi_pm_lock -> pm_noinvol_rwlock,
280  *			devi_pm_lock -> power lock
281  *
282  * power lock (ndi_devi_enter()):
283  *		Since changing power level is possibly a slow operation (30
284  *		seconds to spin up a disk drive), this is locked separately.
285  *		Since a call into the driver to change the power level of one
286  *		component may result in a call back into the framework to change
287  *		the power level of another, this lock allows re-entrancy by
288  *		the same thread (ndi_devi_enter is used for this because
289  *		the USB framework uses ndi_devi_enter in its power entry point,
290  *		and use of any other lock would produce a deadlock.
291  *
292  * devi_pm_busy_lock:
293  *		This lock protects the integrity of the busy count.  It is
294  *		only taken by pm_busy_component() and pm_idle_component and
295  *		some code that adjust the busy time after the timer gets set
296  *		up or after a CPR operation.  It is per-dip to keep from
297  *		single-threading all the disk drivers on a system.
298  *		It could be per component instead, but most devices have
299  *		only one component.
300  *		No other PM locks are taken while holding this lock.
301  *
302  */
303 
304 static int stdout_is_framebuffer;
305 static kmutex_t	e_pm_power_lock;
306 static kmutex_t pm_loan_lock;
307 kmutex_t	pm_scan_lock;
308 callb_id_t	pm_cpr_cb_id;
309 callb_id_t	pm_panic_cb_id;
310 callb_id_t	pm_halt_cb_id;
311 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
312 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
313 
314 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
315 clock_t pm_default_min_scan = PM_DEFAULT_MIN_SCAN;
316 clock_t pm_cpu_min_scan = PM_CPU_MIN_SCAN;
317 
318 #define	PM_MIN_SCAN(dip)	(PM_ISCPU(dip) ? pm_cpu_min_scan : \
319 				    pm_default_min_scan)
320 
321 static int pm_busop_set_power(dev_info_t *,
322     void *, pm_bus_power_op_t, void *, void *);
323 static int pm_busop_match_request(dev_info_t *, void *);
324 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
325 static void e_pm_set_max_power(dev_info_t *, int, int);
326 static int e_pm_get_max_power(dev_info_t *, int);
327 
328 /*
329  * Dependency Processing is done thru a seperate thread.
330  */
331 kmutex_t	pm_dep_thread_lock;
332 kcondvar_t	pm_dep_thread_cv;
333 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
334 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
335 
336 /*
337  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
338  * power managing things in single user mode that have been suppressed via
339  * power.conf entries.  Protected by pm_scan_lock.
340  */
341 int		autopm_enabled;
342 
343 /*
344  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
345  * to define the power management behavior of CPU devices separate from
346  * autopm. Protected by pm_scan_lock.
347  */
348 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
349 
350 /*
351  * Defines the default mode of operation for CPU power management,
352  * either the polling implementation, or the event based dispatcher driven
353  * implementation.
354  */
355 pm_cpupm_t	cpupm_default_mode = PM_CPUPM_EVENT;
356 
357 /*
358  * AutoS3 depends on autopm being enabled, and must be enabled by
359  * PM_START_AUTOS3 command.
360  */
361 int		autoS3_enabled;
362 
363 #if !defined(__sparc)
364 /*
365  * on sparc these live in fillsysinfo.c
366  *
367  * If this variable is non-zero, cpr should return "not supported" when
368  * it is queried even though it would normally be supported on this platform.
369  */
370 int cpr_supported_override;
371 
372 /*
373  * Some platforms may need to support CPR even in the absence of
374  * having the correct platform id information.  If this
375  * variable is non-zero, cpr should proceed even in the absence
376  * of otherwise being qualified.
377  */
378 int cpr_platform_enable = 0;
379 
380 #endif
381 
382 /*
383  * pm_S3_enabled indicates that we believe the platform can support S3,
384  * which we get from pmconfig(1M)
385  */
386 int		pm_S3_enabled;
387 
388 /*
389  * This flag is true while processes are stopped for a checkpoint/resume.
390  * Controlling processes of direct pm'd devices are not available to
391  * participate in power level changes, so we bypass them when this is set.
392  */
393 static int	pm_processes_stopped;
394 
395 #ifdef	DEBUG
396 
397 /*
398  * see common/sys/epm.h for PMD_* values
399  */
400 
401 uint_t		pm_debug = 0;
402 
403 /*
404  * If pm_divertdebug is set, then no prom_printf calls will be made by
405  * PMD(), which will prevent debug output from bringing up the console
406  * frame buffer.  Clearing this variable before setting pm_debug will result
407  * in PMD output going to the console.
408  *
409  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
410  * deadlocks and decremented at the end of pm_set_power()
411  */
412 uint_t		pm_divertdebug = 1;
413 volatile uint_t pm_debug_to_console = 0;
414 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
415 
416 void prdeps(char *);
417 #endif
418 
419 /* Globals */
420 
421 /*
422  * List of recorded thresholds and dependencies
423  */
424 pm_thresh_rec_t *pm_thresh_head;
425 krwlock_t pm_thresh_rwlock;
426 
427 pm_pdr_t *pm_dep_head;
428 static int pm_unresolved_deps = 0;
429 static int pm_prop_deps = 0;
430 
431 /*
432  * List of devices that exported no-involuntary-power-cycles property
433  */
434 pm_noinvol_t *pm_noinvol_head;
435 
436 /*
437  * Locks used in noinvol processing
438  */
439 krwlock_t pm_noinvol_rwlock;
440 kmutex_t pm_remdrv_lock;
441 
442 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
443 int pm_system_idle_threshold;
444 int pm_cpu_idle_threshold;
445 
446 /*
447  * By default nexus has 0 threshold, and depends on its children to keep it up
448  */
449 int pm_default_nexus_threshold = 0;
450 
451 /*
452  * Data structures shared with common/io/pm.c
453  */
454 kmutex_t	pm_clone_lock;
455 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
456 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
457 unsigned char	pm_interest[PM_MAX_CLONE];
458 struct pollhead	pm_pollhead;
459 
460 /*
461  * Data structures shared with common/io/srn.c
462  */
463 kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
464 void (*srn_signal)(int type, int event);
465 int srn_inuse;				/* stop srn detach */
466 
467 extern int	hz;
468 extern char	*platform_module_list[];
469 
470 /*
471  * Wrappers for use in ddi_walk_devs
472  */
473 
474 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
475 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
476 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
477 static int		pm_discard_dep_walk(dev_info_t *, void *);
478 #ifdef DEBUG
479 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
480 #endif
481 
482 /*
483  * Routines for managing noinvol devices
484  */
485 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
486 void			pm_noinvol_update_node(dev_info_t *,
487 			    pm_bp_noinvol_t *req);
488 
489 kmutex_t pm_rsvp_lock;
490 kmutex_t pm_compcnt_lock;
491 krwlock_t pm_pscc_direct_rwlock;
492 krwlock_t pm_pscc_interest_rwlock;
493 
494 #define	PSC_INTEREST	0	/* belongs to interest psc list */
495 #define	PSC_DIRECT	1	/* belongs to direct psc list */
496 
497 pscc_t *pm_pscc_interest;
498 pscc_t *pm_pscc_direct;
499 
500 #define	PM_MAJOR(dip) ddi_driver_major(dip)
501 #define	PM_IS_NEXUS(dip) ((PM_MAJOR(dip) == DDI_MAJOR_T_NONE) ? 0 : \
502 	NEXUS_DRV(devopsp[PM_MAJOR(dip)]))
503 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
504 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
505 
506 #define	PM_INCR_NOTLOWEST(dip) {					\
507 	mutex_enter(&pm_compcnt_lock);					\
508 	if (!PM_IS_NEXUS(dip) ||					\
509 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
510 		if (pm_comps_notlowest == 0)				\
511 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
512 		pm_comps_notlowest++;					\
513 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
514 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
515 	}								\
516 	mutex_exit(&pm_compcnt_lock);					\
517 }
518 #define	PM_DECR_NOTLOWEST(dip) {					\
519 	mutex_enter(&pm_compcnt_lock);					\
520 	if (!PM_IS_NEXUS(dip) ||					\
521 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
522 		ASSERT(pm_comps_notlowest);				\
523 		pm_comps_notlowest--;					\
524 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
525 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
526 		if (pm_comps_notlowest == 0)				\
527 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
528 	}								\
529 	mutex_exit(&pm_compcnt_lock);					\
530 }
531 
532 /*
533  * console frame-buffer power-management is not enabled when
534  * debugging services are present.  to override, set pm_cfb_override
535  * to non-zero.
536  */
537 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
538 kmutex_t pm_cfb_lock;
539 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
540 #ifdef DEBUG
541 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
542 #else
543 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
544 #endif
545 
546 static dev_info_t *cfb_dip = 0;
547 static dev_info_t *cfb_dip_detaching = 0;
548 uint_t cfb_inuse = 0;
549 static ddi_softintr_t pm_soft_id;
550 static clock_t pm_soft_pending;
551 int	pm_scans_disabled = 0;
552 
553 /*
554  * A structure to record the fact that one thread has borrowed a lock held
555  * by another thread.  The context requires that the lender block on the
556  * completion of the borrower.
557  */
558 typedef struct lock_loan {
559 	struct lock_loan	*pmlk_next;
560 	kthread_t		*pmlk_borrower;
561 	kthread_t		*pmlk_lender;
562 	dev_info_t		*pmlk_dip;
563 } lock_loan_t;
564 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
565 
566 #ifdef	DEBUG
567 #ifdef	PMDDEBUG
568 #define	PMD_FUNC(func, name)	char *(func) = (name);
569 #else	/* !PMDDEBUG */
570 #define	PMD_FUNC(func, name)
571 #endif	/* PMDDEBUG */
572 #else	/* !DEBUG */
573 #define	PMD_FUNC(func, name)
574 #endif	/* DEBUG */
575 
576 
577 /*
578  * Must be called before first device (including pseudo) attach
579  */
580 void
581 pm_init_locks(void)
582 {
583 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
584 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
585 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
586 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
587 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
588 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
589 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
590 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
591 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
592 }
593 
594 static int pm_reset_timestamps(dev_info_t *, void *);
595 
596 static boolean_t
597 pm_cpr_callb(void *arg, int code)
598 {
599 	_NOTE(ARGUNUSED(arg))
600 	static int auto_save;
601 	static pm_cpupm_t cpupm_save;
602 
603 	switch (code) {
604 	case CB_CODE_CPR_CHKPT:
605 		/*
606 		 * Cancel scan or wait for scan in progress to finish
607 		 * Other threads may be trying to restart the scan, so we
608 		 * have to keep at it unil it sticks
609 		 */
610 		mutex_enter(&pm_scan_lock);
611 		ASSERT(!pm_scans_disabled);
612 		pm_scans_disabled = 1;
613 		auto_save = autopm_enabled;
614 		autopm_enabled = 0;
615 		cpupm_save = cpupm;
616 		cpupm = PM_CPUPM_NOTSET;
617 		mutex_exit(&pm_scan_lock);
618 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
619 		break;
620 
621 	case CB_CODE_CPR_RESUME:
622 		ASSERT(!autopm_enabled);
623 		ASSERT(cpupm == PM_CPUPM_NOTSET);
624 		ASSERT(pm_scans_disabled);
625 		pm_scans_disabled = 0;
626 		/*
627 		 * Call pm_reset_timestamps to reset timestamps of each
628 		 * device to the time when the system is resumed so that their
629 		 * idleness can be re-calculated. That's to avoid devices from
630 		 * being powered down right after resume if the system was in
631 		 * suspended mode long enough.
632 		 */
633 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
634 
635 		autopm_enabled = auto_save;
636 		cpupm = cpupm_save;
637 		/*
638 		 * If there is any auto-pm device, get the scanning
639 		 * going. Otherwise don't bother.
640 		 */
641 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
642 		break;
643 	}
644 	return (B_TRUE);
645 }
646 
647 /*
648  * This callback routine is called when there is a system panic.  This function
649  * exists for prototype matching.
650  */
651 static boolean_t
652 pm_panic_callb(void *arg, int code)
653 {
654 	_NOTE(ARGUNUSED(arg, code))
655 	void pm_cfb_check_and_powerup(void);
656 	PMD(PMD_CFB, ("pm_panic_callb\n"))
657 	pm_cfb_check_and_powerup();
658 	return (B_TRUE);
659 }
660 
661 static boolean_t
662 pm_halt_callb(void *arg, int code)
663 {
664 	_NOTE(ARGUNUSED(arg, code))
665 	return (B_TRUE);
666 }
667 
668 static void pm_dep_thread(void);
669 
670 /*
671  * This needs to be called after the root and platform drivers are loaded
672  * and be single-threaded with respect to driver attach/detach
673  */
674 void
675 pm_init(void)
676 {
677 	PMD_FUNC(pmf, "pm_init")
678 	char **mod;
679 	extern pri_t minclsyspri;
680 
681 	pm_comps_notlowest = 0;
682 	pm_system_idle_threshold = pm_default_idle_threshold;
683 	pm_cpu_idle_threshold = 0;
684 
685 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
686 	    CB_CL_CPR_PM, "pm_cpr");
687 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
688 	    CB_CL_PANIC, "pm_panic");
689 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
690 	    CB_CL_HALT, "pm_halt");
691 
692 	/*
693 	 * Create a thread to do dependency processing.
694 	 */
695 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
696 	    TS_RUN, minclsyspri);
697 
698 	/*
699 	 * loadrootmodules already loaded these ppm drivers, now get them
700 	 * attached so they can claim the root drivers as they attach
701 	 */
702 	for (mod = platform_module_list; *mod; mod++) {
703 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
704 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
705 			    *mod);
706 		} else {
707 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
708 			    ddi_major_to_name(ddi_name_to_major(*mod))))
709 		}
710 	}
711 }
712 
713 /*
714  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
715  * enabled) when device becomes power managed or after a failed detach and
716  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
717  * a CPR resume to get all the devices scanning again.
718  */
719 void
720 pm_scan_init(dev_info_t *dip)
721 {
722 	PMD_FUNC(pmf, "scan_init")
723 	pm_scan_t	*scanp;
724 
725 	ASSERT(!PM_ISBC(dip));
726 
727 	PM_LOCK_DIP(dip);
728 	scanp = PM_GET_PM_SCAN(dip);
729 	if (!scanp) {
730 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
731 		    pmf, PM_DEVICE(dip)))
732 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
733 		DEVI(dip)->devi_pm_scan = scanp;
734 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
735 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
736 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
737 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
738 	}
739 	PM_UNLOCK_DIP(dip);
740 }
741 
742 /*
743  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
744  */
745 void
746 pm_scan_fini(dev_info_t *dip)
747 {
748 	PMD_FUNC(pmf, "scan_fini")
749 	pm_scan_t	*scanp;
750 
751 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
752 	ASSERT(!PM_ISBC(dip));
753 	PM_LOCK_DIP(dip);
754 	scanp = PM_GET_PM_SCAN(dip);
755 	if (!scanp) {
756 		PM_UNLOCK_DIP(dip);
757 		return;
758 	}
759 
760 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
761 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
762 
763 	kmem_free(scanp, sizeof (pm_scan_t));
764 	DEVI(dip)->devi_pm_scan = NULL;
765 	PM_UNLOCK_DIP(dip);
766 }
767 
768 /*
769  * Given a pointer to a component struct, return the current power level
770  * (struct contains index unless it is a continuous level).
771  * Located here in hopes of getting both this and dev_is_needed into the
772  * cache together
773  */
774 static int
775 cur_power(pm_component_t *cp)
776 {
777 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
778 		return (cp->pmc_cur_pwr);
779 
780 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
781 }
782 
783 static char *
784 pm_decode_direction(int direction)
785 {
786 	switch (direction) {
787 	case PM_LEVEL_UPONLY:
788 		return ("up");
789 
790 	case PM_LEVEL_EXACT:
791 		return ("exact");
792 
793 	case PM_LEVEL_DOWNONLY:
794 		return ("down");
795 
796 	default:
797 		return ("INVALID DIRECTION");
798 	}
799 }
800 
801 char *
802 pm_decode_op(pm_bus_power_op_t op)
803 {
804 	switch (op) {
805 	case BUS_POWER_CHILD_PWRCHG:
806 		return ("CHILD_PWRCHG");
807 	case BUS_POWER_NEXUS_PWRUP:
808 		return ("NEXUS_PWRUP");
809 	case BUS_POWER_PRE_NOTIFICATION:
810 		return ("PRE_NOTIFICATION");
811 	case BUS_POWER_POST_NOTIFICATION:
812 		return ("POST_NOTIFICATION");
813 	case BUS_POWER_HAS_CHANGED:
814 		return ("HAS_CHANGED");
815 	case BUS_POWER_NOINVOL:
816 		return ("NOINVOL");
817 	default:
818 		return ("UNKNOWN OP");
819 	}
820 }
821 
822 /*
823  * Returns true if level is a possible (valid) power level for component
824  */
825 int
826 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
827 {
828 	PMD_FUNC(pmf, "e_pm_valid_power")
829 	pm_component_t *cp = PM_CP(dip, cmpt);
830 	int i;
831 	int *ip = cp->pmc_comp.pmc_lvals;
832 	int limit = cp->pmc_comp.pmc_numlevels;
833 
834 	if (level < 0)
835 		return (0);
836 	for (i = 0; i < limit; i++) {
837 		if (level == *ip++)
838 			return (1);
839 	}
840 #ifdef DEBUG
841 	if (pm_debug & PMD_FAIL) {
842 		ip = cp->pmc_comp.pmc_lvals;
843 
844 		for (i = 0; i < limit; i++)
845 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
846 			    pmf, i, *ip++))
847 	}
848 #endif
849 	return (0);
850 }
851 
852 static int pm_start(dev_info_t *dip);
853 /*
854  * Returns true if device is pm'd (after calling pm_start if need be)
855  */
856 int
857 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
858 {
859 	pm_info_t *info;
860 
861 	/*
862 	 * Check if the device is power managed if not.
863 	 * To make the common case (device is power managed already)
864 	 * fast, we check without the lock.  If device is not already
865 	 * power managed, then we take the lock and the long route through
866 	 * go get it managed.  Devices never go unmanaged until they
867 	 * detach.
868 	 */
869 	info = PM_GET_PM_INFO(dip);
870 	if (!info) {
871 		if (!DEVI_IS_ATTACHING(dip)) {
872 			return (0);
873 		}
874 		if (pm_start(dip) != DDI_SUCCESS) {
875 			return (0);
876 		}
877 		info = PM_GET_PM_INFO(dip);
878 	}
879 	ASSERT(info);
880 	if (infop != NULL)
881 		*infop = info;
882 	return (1);
883 }
884 
885 int
886 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
887 {
888 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
889 		if (cpp != NULL)
890 			*cpp = PM_CP(dip, cmpt);
891 		return (1);
892 	} else {
893 		return (0);
894 	}
895 }
896 
897 /*
898  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
899  */
900 static int
901 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
902 {
903 	PMD_FUNC(pmf, "din")
904 	pm_component_t *cp;
905 	char *pathbuf;
906 	int result;
907 
908 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
909 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
910 	    !e_pm_valid_power(dip, cmpt, level))
911 		return (DDI_FAILURE);
912 
913 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
914 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
915 	    level, cur_power(cp)))
916 
917 	if (pm_set_power(dip, cmpt, level,  direction,
918 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
919 		if (direction == PM_LEVEL_UPONLY) {
920 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
921 			(void) ddi_pathname(dip, pathbuf);
922 			cmn_err(CE_WARN, "Device %s failed to power up.",
923 			    pathbuf);
924 			kmem_free(pathbuf, MAXPATHLEN);
925 		}
926 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
927 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
928 		    pm_decode_direction(direction), level, result))
929 		return (DDI_FAILURE);
930 	}
931 
932 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
933 	    PM_DEVICE(dip)))
934 	pm_rescan(dip);
935 	return (DDI_SUCCESS);
936 }
937 
938 /*
939  * We can get multiple pm_rescan() threads, if one of them discovers
940  * that no scan is running at the moment, it kicks it into action.
941  * Otherwise, it tells the current scanning thread to scan again when
942  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
943  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
944  * thread at a time runs the pm_scan_dev() code.
945  */
946 void
947 pm_rescan(void *arg)
948 {
949 	PMD_FUNC(pmf, "rescan")
950 	dev_info_t	*dip = (dev_info_t *)arg;
951 	pm_info_t	*info;
952 	pm_scan_t	*scanp;
953 	timeout_id_t	scanid;
954 
955 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
956 	PM_LOCK_DIP(dip);
957 	info = PM_GET_PM_INFO(dip);
958 	scanp = PM_GET_PM_SCAN(dip);
959 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
960 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
961 		PM_UNLOCK_DIP(dip);
962 		return;
963 	}
964 	if (scanp->ps_scan_flags & PM_SCANNING) {
965 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
966 		PM_UNLOCK_DIP(dip);
967 		return;
968 	} else if (scanp->ps_scan_id) {
969 		scanid = scanp->ps_scan_id;
970 		scanp->ps_scan_id = 0;
971 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
972 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
973 		PM_UNLOCK_DIP(dip);
974 		(void) untimeout(scanid);
975 		PM_LOCK_DIP(dip);
976 	}
977 
978 	/*
979 	 * Dispatching pm_scan during attach time is risky due to the fact that
980 	 * attach might soon fail and dip dissolved, and panic may happen while
981 	 * attempting to stop scan. So schedule a pm_rescan instead.
982 	 * (Note that if either of the first two terms are true, taskq_dispatch
983 	 * will not be invoked).
984 	 *
985 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
986 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
987 	 * to regulate the dispatching.
988 	 *
989 	 * Scan is stopped before the device is detached (in pm_detaching())
990 	 * but it may get re-started during the post_detach processing if the
991 	 * driver fails to detach.
992 	 */
993 	if (DEVI_IS_ATTACHING(dip) ||
994 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
995 	    !taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP)) {
996 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
997 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
998 		if (scanp->ps_scan_id) {
999 			scanid = scanp->ps_scan_id;
1000 			scanp->ps_scan_id = 0;
1001 			PM_UNLOCK_DIP(dip);
1002 			(void) untimeout(scanid);
1003 			PM_LOCK_DIP(dip);
1004 			if (scanp->ps_scan_id) {
1005 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
1006 				    "thread scheduled pm_rescan, scanid %lx\n",
1007 				    pmf, PM_DEVICE(dip),
1008 				    (ulong_t)scanp->ps_scan_id))
1009 				PM_UNLOCK_DIP(dip);
1010 				return;
1011 			}
1012 		}
1013 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1014 		    (scanp->ps_idle_down ? pm_id_ticks :
1015 		    (PM_MIN_SCAN(dip) * hz)));
1016 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1017 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1018 		    (ulong_t)scanp->ps_scan_id))
1019 	} else {
1020 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1021 		    pmf, PM_DEVICE(dip)))
1022 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1023 	}
1024 	PM_UNLOCK_DIP(dip);
1025 }
1026 
1027 void
1028 pm_scan(void *arg)
1029 {
1030 	PMD_FUNC(pmf, "scan")
1031 	dev_info_t	*dip = (dev_info_t *)arg;
1032 	pm_scan_t	*scanp;
1033 	time_t		nextscan;
1034 
1035 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1036 
1037 	PM_LOCK_DIP(dip);
1038 	scanp = PM_GET_PM_SCAN(dip);
1039 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1040 
1041 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1042 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1043 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1044 		PM_UNLOCK_DIP(dip);
1045 		return;
1046 	}
1047 
1048 	if (scanp->ps_idle_down) {
1049 		/*
1050 		 * make sure we remember idledown was in affect until
1051 		 * we've completed the scan
1052 		 */
1053 		PMID_SET_SCANS(scanp->ps_idle_down)
1054 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1055 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1056 	}
1057 
1058 	/* possible having two threads running pm_scan() */
1059 	if (scanp->ps_scan_flags & PM_SCANNING) {
1060 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1061 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1062 		    pmf, PM_DEVICE(dip)))
1063 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1064 		PM_UNLOCK_DIP(dip);
1065 		return;
1066 	}
1067 
1068 	scanp->ps_scan_flags |= PM_SCANNING;
1069 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1070 	do {
1071 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1072 		PM_UNLOCK_DIP(dip);
1073 		nextscan = pm_scan_dev(dip);
1074 		PM_LOCK_DIP(dip);
1075 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1076 
1077 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1078 	scanp->ps_scan_flags &= ~PM_SCANNING;
1079 
1080 	if (scanp->ps_idle_down) {
1081 		scanp->ps_idle_down &= ~PMID_SCANS;
1082 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1083 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1084 	}
1085 
1086 	/* schedule for next idle check */
1087 	if (nextscan != LONG_MAX) {
1088 		if (nextscan > (LONG_MAX / hz))
1089 			nextscan = (LONG_MAX - 1) / hz;
1090 		if (scanp->ps_scan_id) {
1091 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1092 			    "another rescan scheduled scanid(%lx)\n", pmf,
1093 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1094 			PM_UNLOCK_DIP(dip);
1095 			return;
1096 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1097 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1098 			    (clock_t)(nextscan * hz));
1099 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1100 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1101 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1102 		}
1103 	}
1104 	PM_UNLOCK_DIP(dip);
1105 }
1106 
1107 void
1108 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1109 {
1110 	int components = PM_NUMCMPTS(dip);
1111 	int i;
1112 
1113 	ASSERT(components > 0);
1114 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1115 	for (i = 0; i < components; i++) {
1116 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1117 	}
1118 	PM_UNLOCK_BUSY(dip);
1119 }
1120 
1121 /*
1122  * Returns true if device needs to be kept up because it exported the
1123  * "no-involuntary-power-cycles" property or we're pretending it did (console
1124  * fb case) or it is an ancestor of such a device and has used up the "one
1125  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1126  * upon detach
1127  */
1128 int
1129 pm_noinvol(dev_info_t *dip)
1130 {
1131 	PMD_FUNC(pmf, "noinvol")
1132 
1133 	/*
1134 	 * This doesn't change over the life of a driver, so no locking needed
1135 	 */
1136 	if (PM_IS_CFB(dip)) {
1137 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1138 		    pmf, PM_DEVICE(dip)))
1139 		return (1);
1140 	}
1141 	/*
1142 	 * Not an issue if no such kids
1143 	 */
1144 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1145 #ifdef DEBUG
1146 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1147 			dev_info_t *pdip = dip;
1148 			do {
1149 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1150 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1151 				    DEVI(pdip)->devi_pm_noinvolpm,
1152 				    DEVI(pdip)->devi_pm_volpmd))
1153 				pdip = ddi_get_parent(pdip);
1154 			} while (pdip);
1155 		}
1156 #endif
1157 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1158 		return (0);
1159 	}
1160 
1161 	/*
1162 	 * Since we now maintain the counts correct at every node, we no longer
1163 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1164 	 * without the children getting their counts adjusted.
1165 	 */
1166 
1167 #ifdef	DEBUG
1168 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1169 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1170 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1171 		    PM_DEVICE(dip)))
1172 #endif
1173 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1174 }
1175 
1176 static int	cur_threshold(dev_info_t *, int);
1177 static int	pm_next_lower_power(pm_component_t *, int);
1178 
1179 /*
1180  * This function performs the actual scanning of the device.
1181  * It attempts to power off the indicated device's components if they have
1182  * been idle and other restrictions are met.
1183  * pm_scan_dev calculates and returns when the next scan should happen for
1184  * this device.
1185  */
1186 time_t
1187 pm_scan_dev(dev_info_t *dip)
1188 {
1189 	PMD_FUNC(pmf, "scan_dev")
1190 	pm_scan_t	*scanp;
1191 	time_t		*timestamp, idletime, now, thresh;
1192 	time_t		timeleft = 0;
1193 #ifdef PMDDEBUG
1194 	int		curpwr;
1195 #endif
1196 	int		i, nxtpwr, pwrndx, unused;
1197 	size_t		size;
1198 	pm_component_t	 *cp;
1199 	dev_info_t	*pdip = ddi_get_parent(dip);
1200 	int		circ;
1201 	clock_t		min_scan = pm_default_min_scan;
1202 
1203 	/*
1204 	 * skip attaching device
1205 	 */
1206 	if (DEVI_IS_ATTACHING(dip)) {
1207 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1208 		    pmf, PM_DEVICE(dip), min_scan))
1209 		return (min_scan);
1210 	}
1211 
1212 	PM_LOCK_DIP(dip);
1213 	scanp = PM_GET_PM_SCAN(dip);
1214 	min_scan = PM_MIN_SCAN(dip);
1215 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1216 
1217 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1218 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1219 	    PM_KUC(dip)))
1220 
1221 	/* no scan under the following conditions */
1222 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1223 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1224 	    (PM_KUC(dip) != 0) ||
1225 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1226 		PM_UNLOCK_DIP(dip);
1227 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1228 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1229 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1230 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1231 		    cpupm, PM_KUC(dip),
1232 		    PM_ISDIRECT(dip) ? "is" : "is not",
1233 		    pm_noinvol(dip) ? "is" : "is not"))
1234 		return (LONG_MAX);
1235 	}
1236 	PM_UNLOCK_DIP(dip);
1237 
1238 	if (!ndi_devi_tryenter(pdip, &circ)) {
1239 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1240 		    pmf, PM_DEVICE(pdip)))
1241 		return ((time_t)1);
1242 	}
1243 	now = gethrestime_sec();
1244 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1245 	timestamp = kmem_alloc(size, KM_SLEEP);
1246 	pm_get_timestamps(dip, timestamp);
1247 
1248 	/*
1249 	 * Since we removed support for backwards compatible devices,
1250 	 * (see big comment at top of file)
1251 	 * it is no longer required to deal with component 0 last.
1252 	 */
1253 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1254 		/*
1255 		 * If already off (an optimization, perhaps)
1256 		 */
1257 		cp = PM_CP(dip, i);
1258 		pwrndx = cp->pmc_cur_pwr;
1259 #ifdef PMDDEBUG
1260 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1261 		    PM_LEVEL_UNKNOWN :
1262 		    cp->pmc_comp.pmc_lvals[pwrndx];
1263 #endif
1264 
1265 		if (pwrndx == 0) {
1266 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1267 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1268 			/* skip device if off or at its lowest */
1269 			continue;
1270 		}
1271 
1272 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1273 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1274 			/* were busy or newly became busy by another thread */
1275 			if (timeleft == 0)
1276 				timeleft = max(thresh, min_scan);
1277 			else
1278 				timeleft = min(
1279 				    timeleft, max(thresh, min_scan));
1280 			continue;
1281 		}
1282 
1283 		idletime = now - timestamp[i];		/* idle time */
1284 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1285 		    pmf, PM_DEVICE(dip), i, idletime))
1286 		if (idletime >= thresh || PM_IS_PID(dip)) {
1287 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1288 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1289 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1290 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1291 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1292 			    PM_CURPOWER(dip, i) != nxtpwr) {
1293 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1294 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1295 				    i, curpwr, nxtpwr))
1296 				timeleft = min_scan;
1297 				continue;
1298 			} else {
1299 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1300 				    "%d->%d, GOOD curpwr %d\n", pmf,
1301 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1302 				    cur_power(cp)))
1303 
1304 				if (nxtpwr == 0)	/* component went off */
1305 					continue;
1306 
1307 				/*
1308 				 * scan to next lower level
1309 				 */
1310 				if (timeleft == 0)
1311 					timeleft = max(
1312 					    1, cur_threshold(dip, i));
1313 				else
1314 					timeleft = min(timeleft,
1315 					    max(1, cur_threshold(dip, i)));
1316 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1317 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1318 				    i, timeleft))
1319 			}
1320 		} else {	/* comp not idle long enough */
1321 			if (timeleft == 0)
1322 				timeleft = thresh - idletime;
1323 			else
1324 				timeleft = min(timeleft, (thresh - idletime));
1325 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1326 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1327 		}
1328 	}
1329 	ndi_devi_exit(pdip, circ);
1330 	kmem_free(timestamp, size);
1331 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1332 	    PM_DEVICE(dip), timeleft))
1333 
1334 	/*
1335 	 * if components are already at lowest level, timeleft is left 0
1336 	 */
1337 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1338 }
1339 
1340 /*
1341  * pm_scan_stop - cancel scheduled pm_rescan,
1342  *                wait for termination of dispatched pm_scan thread
1343  *                     and active pm_scan_dev thread.
1344  */
1345 void
1346 pm_scan_stop(dev_info_t *dip)
1347 {
1348 	PMD_FUNC(pmf, "scan_stop")
1349 	pm_scan_t	*scanp;
1350 	timeout_id_t	scanid;
1351 
1352 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1353 	PM_LOCK_DIP(dip);
1354 	scanp = PM_GET_PM_SCAN(dip);
1355 	if (!scanp) {
1356 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1357 		    pmf, PM_DEVICE(dip)))
1358 		PM_UNLOCK_DIP(dip);
1359 		return;
1360 	}
1361 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1362 
1363 	/* cancel scheduled scan taskq */
1364 	while (scanp->ps_scan_id) {
1365 		scanid = scanp->ps_scan_id;
1366 		scanp->ps_scan_id = 0;
1367 		PM_UNLOCK_DIP(dip);
1368 		(void) untimeout(scanid);
1369 		PM_LOCK_DIP(dip);
1370 	}
1371 
1372 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1373 		PM_UNLOCK_DIP(dip);
1374 		delay(1);
1375 		PM_LOCK_DIP(dip);
1376 	}
1377 	PM_UNLOCK_DIP(dip);
1378 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1379 }
1380 
1381 int
1382 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1383 {
1384 	_NOTE(ARGUNUSED(arg))
1385 
1386 	if (!PM_GET_PM_SCAN(dip))
1387 		return (DDI_WALK_CONTINUE);
1388 	ASSERT(!PM_ISBC(dip));
1389 	pm_scan_stop(dip);
1390 	return (DDI_WALK_CONTINUE);
1391 }
1392 
1393 /*
1394  * Converts a power level value to its index
1395  */
1396 static int
1397 power_val_to_index(pm_component_t *cp, int val)
1398 {
1399 	int limit, i, *ip;
1400 
1401 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1402 	    val != PM_LEVEL_EXACT);
1403 	/*  convert power value into index (i) */
1404 	limit = cp->pmc_comp.pmc_numlevels;
1405 	ip = cp->pmc_comp.pmc_lvals;
1406 	for (i = 0; i < limit; i++)
1407 		if (val == *ip++)
1408 			return (i);
1409 	return (-1);
1410 }
1411 
1412 /*
1413  * Converts a numeric power level to a printable string
1414  */
1415 static char *
1416 power_val_to_string(pm_component_t *cp, int val)
1417 {
1418 	int index;
1419 
1420 	if (val == PM_LEVEL_UPONLY)
1421 		return ("<UPONLY>");
1422 
1423 	if (val == PM_LEVEL_UNKNOWN ||
1424 	    (index = power_val_to_index(cp, val)) == -1)
1425 		return ("<LEVEL_UNKNOWN>");
1426 
1427 	return (cp->pmc_comp.pmc_lnames[index]);
1428 }
1429 
1430 /*
1431  * Return true if this node has been claimed by a ppm.
1432  */
1433 static int
1434 pm_ppm_claimed(dev_info_t *dip)
1435 {
1436 	return (PPM(dip) != NULL);
1437 }
1438 
1439 /*
1440  * A node which was voluntarily power managed has just used up its "free cycle"
1441  * and need is volpmd field cleared, and the same done to all its descendents
1442  */
1443 static void
1444 pm_clear_volpm_dip(dev_info_t *dip)
1445 {
1446 	PMD_FUNC(pmf, "clear_volpm_dip")
1447 
1448 	if (dip == NULL)
1449 		return;
1450 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1451 	    PM_DEVICE(dip)))
1452 	DEVI(dip)->devi_pm_volpmd = 0;
1453 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1454 		pm_clear_volpm_dip(dip);
1455 	}
1456 }
1457 
1458 /*
1459  * A node which was voluntarily power managed has used up the "free cycles"
1460  * for the subtree that it is the root of.  Scan through the list of detached
1461  * nodes and adjust the counts of any that are descendents of the node.
1462  */
1463 static void
1464 pm_clear_volpm_list(dev_info_t *dip)
1465 {
1466 	PMD_FUNC(pmf, "clear_volpm_list")
1467 	char	*pathbuf;
1468 	size_t	len;
1469 	pm_noinvol_t *ip;
1470 
1471 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1472 	(void) ddi_pathname(dip, pathbuf);
1473 	len = strlen(pathbuf);
1474 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1475 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1476 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1477 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1478 		    ip->ni_path))
1479 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1480 		    ip->ni_path[len] == '/') {
1481 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1482 			    ip->ni_path))
1483 			ip->ni_volpmd = 0;
1484 			ip->ni_wasvolpmd = 0;
1485 		}
1486 	}
1487 	kmem_free(pathbuf, MAXPATHLEN);
1488 	rw_exit(&pm_noinvol_rwlock);
1489 }
1490 
1491 /*
1492  * Powers a device, suspending or resuming the driver if it is a backward
1493  * compatible device, calling into ppm to change power level.
1494  * Called with the component's power lock held.
1495  */
1496 static int
1497 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1498     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1499 {
1500 	PMD_FUNC(pmf, "power_dev")
1501 	power_req_t power_req;
1502 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1503 	int		resume_needed = 0;
1504 	int		suspended = 0;
1505 	int		result;
1506 #ifdef PMDDEBUG
1507 	struct pm_component *cp = PM_CP(dip, comp);
1508 #endif
1509 	int		bc = PM_ISBC(dip);
1510 	int pm_all_components_off(dev_info_t *);
1511 	int		clearvolpmd = 0;
1512 	char		pathbuf[MAXNAMELEN];
1513 #ifdef PMDDEBUG
1514 	char *ppmname, *ppmaddr;
1515 #endif
1516 	/*
1517 	 * If this is comp 0 of a backwards compat device and we are
1518 	 * going to take the power away, we need to detach it with
1519 	 * DDI_PM_SUSPEND command.
1520 	 */
1521 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1522 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1523 			/* We could not suspend before turning cmpt zero off */
1524 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1525 			    pmf, PM_DEVICE(dip)))
1526 			return (DDI_FAILURE);
1527 		} else {
1528 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1529 			suspended++;
1530 		}
1531 	}
1532 	power_req.request_type = PMR_PPM_SET_POWER;
1533 	power_req.req.ppm_set_power_req.who = dip;
1534 	power_req.req.ppm_set_power_req.cmpt = comp;
1535 	power_req.req.ppm_set_power_req.old_level = old_level;
1536 	power_req.req.ppm_set_power_req.new_level = level;
1537 	power_req.req.ppm_set_power_req.canblock = canblock;
1538 	power_req.req.ppm_set_power_req.cookie = NULL;
1539 #ifdef PMDDEBUG
1540 	if (pm_ppm_claimed(dip)) {
1541 		ppmname = PM_NAME(PPM(dip));
1542 		ppmaddr = PM_ADDR(PPM(dip));
1543 
1544 	} else {
1545 		ppmname = "noppm";
1546 		ppmaddr = "0";
1547 	}
1548 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1549 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1550 	    power_val_to_string(cp, old_level), old_level,
1551 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1552 #endif
1553 	/*
1554 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1555 	 * bc device comp 0 is powering on, then we count it as a power cycle
1556 	 * against its voluntary count.
1557 	 */
1558 	if (DEVI(dip)->devi_pm_volpmd &&
1559 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1560 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1561 		clearvolpmd = 1;
1562 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1563 	    &power_req, &result)) == DDI_SUCCESS) {
1564 		/*
1565 		 * Now do involuntary pm accounting;  If we've just cycled power
1566 		 * on a voluntarily pm'd node, and by inference on its entire
1567 		 * subtree, we need to set the subtree (including those nodes
1568 		 * already detached) volpmd counts to 0, and subtract out the
1569 		 * value of the current node's volpmd count from the ancestors
1570 		 */
1571 		if (clearvolpmd) {
1572 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1573 			pm_clear_volpm_dip(dip);
1574 			pm_clear_volpm_list(dip);
1575 			if (volpmd) {
1576 				(void) ddi_pathname(dip, pathbuf);
1577 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1578 				    volpmd, 0, pathbuf, dip);
1579 			}
1580 		}
1581 	} else {
1582 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1583 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1584 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1585 	}
1586 	/*
1587 	 * If some other devices were also powered up (e.g. other cpus in
1588 	 * the same domain) return a pointer to that list
1589 	 */
1590 	if (devlist) {
1591 		*devlist = (pm_ppm_devlist_t *)
1592 		    power_req.req.ppm_set_power_req.cookie;
1593 	}
1594 	/*
1595 	 * We will have to resume the device if the device is backwards compat
1596 	 * device and either of the following is true:
1597 	 * -This is comp 0 and we have successfully powered it up
1598 	 * -This is comp 0 and we have failed to power it down. Resume is
1599 	 *  needed because we have suspended it above
1600 	 */
1601 
1602 	if (bc && comp == 0) {
1603 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1604 		if (power_op_ret == DDI_SUCCESS) {
1605 			if (POWERING_ON(old_level, level)) {
1606 				/*
1607 				 * It must be either suspended or resumed
1608 				 * via pm_power_has_changed path
1609 				 */
1610 				ASSERT((DEVI(dip)->devi_pm_flags &
1611 				    PMC_SUSPENDED) ||
1612 				    (PM_CP(dip, comp)->pmc_flags &
1613 				    PM_PHC_WHILE_SET_POWER));
1614 
1615 					resume_needed = suspended;
1616 			}
1617 		} else {
1618 			if (POWERING_OFF(old_level, level)) {
1619 				/*
1620 				 * It must be either suspended or resumed
1621 				 * via pm_power_has_changed path
1622 				 */
1623 				ASSERT((DEVI(dip)->devi_pm_flags &
1624 				    PMC_SUSPENDED) ||
1625 				    (PM_CP(dip, comp)->pmc_flags &
1626 				    PM_PHC_WHILE_SET_POWER));
1627 
1628 					resume_needed = suspended;
1629 			}
1630 		}
1631 	}
1632 	if (resume_needed) {
1633 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1634 		/* ppm is not interested in DDI_PM_RESUME */
1635 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1636 		    DDI_SUCCESS) {
1637 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1638 		} else
1639 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1640 			    PM_DEVICE(dip));
1641 	}
1642 	return (power_op_ret);
1643 }
1644 
1645 /*
1646  * Return true if we are the owner or a borrower of the devi lock.  See
1647  * pm_lock_power_single() about borrowing the lock.
1648  */
1649 static int
1650 pm_devi_lock_held(dev_info_t *dip)
1651 {
1652 	lock_loan_t *cur;
1653 
1654 	if (DEVI_BUSY_OWNED(dip))
1655 		return (1);
1656 
1657 	/* return false if no locks borrowed */
1658 	if (lock_loan_head.pmlk_next == NULL)
1659 		return (0);
1660 
1661 	mutex_enter(&pm_loan_lock);
1662 	/* see if our thread is registered as a lock borrower. */
1663 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1664 		if (cur->pmlk_borrower == curthread)
1665 			break;
1666 	mutex_exit(&pm_loan_lock);
1667 
1668 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1669 }
1670 
1671 /*
1672  * pm_set_power: adjusts power level of device.	 Assumes device is power
1673  * manageable & component exists.
1674  *
1675  * Cases which require us to bring up devices we keep up ("wekeepups") for
1676  * backwards compatible devices:
1677  *	component 0 is off and we're bringing it up from 0
1678  *		bring up wekeepup first
1679  *	and recursively when component 0 is off and we bring some other
1680  *	component up from 0
1681  * For devices which are not backward compatible, our dependency notion is much
1682  * simpler.  Unless all components are off, then wekeeps must be on.
1683  * We don't treat component 0 differently.
1684  * Canblock tells how to deal with a direct pm'd device.
1685  * Scan arg tells us if we were called from scan, in which case we don't need
1686  * to go back to the root node and walk down to change power.
1687  */
1688 int
1689 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1690     pm_canblock_t canblock, int scan, int *retp)
1691 {
1692 	PMD_FUNC(pmf, "set_power")
1693 	char		*pathbuf;
1694 	pm_bp_child_pwrchg_t bpc;
1695 	pm_sp_misc_t	pspm;
1696 	int		ret = DDI_SUCCESS;
1697 	int		unused = DDI_SUCCESS;
1698 	dev_info_t	*pdip = ddi_get_parent(dip);
1699 
1700 #ifdef DEBUG
1701 	int		diverted = 0;
1702 
1703 	/*
1704 	 * This prevents operations on the console from calling prom_printf and
1705 	 * either deadlocking or bringing up the console because of debug
1706 	 * output
1707 	 */
1708 	if (dip == cfb_dip) {
1709 		diverted++;
1710 		mutex_enter(&pm_debug_lock);
1711 		pm_divertdebug++;
1712 		mutex_exit(&pm_debug_lock);
1713 	}
1714 #endif
1715 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1716 	    direction == PM_LEVEL_EXACT);
1717 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1718 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1719 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1720 	(void) ddi_pathname(dip, pathbuf);
1721 	bpc.bpc_dip = dip;
1722 	bpc.bpc_path = pathbuf;
1723 	bpc.bpc_comp = comp;
1724 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1725 	bpc.bpc_nlevel = level;
1726 	pspm.pspm_direction = direction;
1727 	pspm.pspm_errnop = retp;
1728 	pspm.pspm_canblock = canblock;
1729 	pspm.pspm_scan = scan;
1730 	bpc.bpc_private = &pspm;
1731 
1732 	/*
1733 	 * If a config operation is being done (we've locked the parent) or
1734 	 * we already hold the power lock (we've locked the node)
1735 	 * then we can operate directly on the node because we have already
1736 	 * brought up all the ancestors, otherwise, we have to go back to the
1737 	 * top of the tree.
1738 	 */
1739 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1740 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1741 		    (void *)&bpc, (void *)&unused);
1742 	else
1743 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1744 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1745 #ifdef DEBUG
1746 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1747 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1748 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1749 	}
1750 	if (diverted) {
1751 		mutex_enter(&pm_debug_lock);
1752 		pm_divertdebug--;
1753 		mutex_exit(&pm_debug_lock);
1754 	}
1755 #endif
1756 	kmem_free(pathbuf, MAXPATHLEN);
1757 	return (ret);
1758 }
1759 
1760 /*
1761  * If holddip is set, then if a dip is found we return with the node held.
1762  *
1763  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1764  * (resolve_pathname), but it does not drive attach.
1765  */
1766 dev_info_t *
1767 pm_name_to_dip(char *pathname, int holddip)
1768 {
1769 	struct pathname pn;
1770 	char		*component;
1771 	dev_info_t	*parent, *child;
1772 	int		circ;
1773 
1774 	if ((pathname == NULL) || (*pathname != '/'))
1775 		return (NULL);
1776 
1777 	/* setup pathname and allocate component */
1778 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1779 		return (NULL);
1780 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1781 
1782 	/* start at top, process '/' component */
1783 	parent = child = ddi_root_node();
1784 	ndi_hold_devi(parent);
1785 	pn_skipslash(&pn);
1786 	ASSERT(i_ddi_devi_attached(parent));
1787 
1788 	/* process components of pathname */
1789 	while (pn_pathleft(&pn)) {
1790 		(void) pn_getcomponent(&pn, component);
1791 
1792 		/* enter parent and search for component child */
1793 		ndi_devi_enter(parent, &circ);
1794 		child = ndi_devi_findchild(parent, component);
1795 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1796 			child = NULL;
1797 			ndi_devi_exit(parent, circ);
1798 			ndi_rele_devi(parent);
1799 			goto out;
1800 		}
1801 
1802 		/* attached child found, hold child and release parent */
1803 		ndi_hold_devi(child);
1804 		ndi_devi_exit(parent, circ);
1805 		ndi_rele_devi(parent);
1806 
1807 		/* child becomes parent, and process next component */
1808 		parent = child;
1809 		pn_skipslash(&pn);
1810 
1811 		/* loop with active ndi_devi_hold of child->parent */
1812 	}
1813 
1814 out:
1815 	pn_free(&pn);
1816 	kmem_free(component, MAXNAMELEN);
1817 
1818 	/* if we are not asked to return with hold, drop current hold */
1819 	if (child && !holddip)
1820 		ndi_rele_devi(child);
1821 	return (child);
1822 }
1823 
1824 /*
1825  * Search for a dependency and mark it unsatisfied
1826  */
1827 static void
1828 pm_unsatisfy(char *keeper, char *kept)
1829 {
1830 	PMD_FUNC(pmf, "unsatisfy")
1831 	pm_pdr_t *dp;
1832 
1833 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1834 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1835 		if (!dp->pdr_isprop) {
1836 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1837 			    (dp->pdr_kept_count > 0) &&
1838 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1839 				if (dp->pdr_satisfied) {
1840 					dp->pdr_satisfied = 0;
1841 					pm_unresolved_deps++;
1842 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1843 					    "pm_unresolved_deps now %d\n", pmf,
1844 					    pm_unresolved_deps))
1845 				}
1846 			}
1847 		}
1848 	}
1849 }
1850 
1851 /*
1852  * Device dip is being un power managed, it keeps up count other devices.
1853  * We need to release any hold we have on the kept devices, and also
1854  * mark the dependency no longer satisfied.
1855  */
1856 static void
1857 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1858 {
1859 	PMD_FUNC(pmf, "unkeeps")
1860 	int i, j;
1861 	dev_info_t *kept;
1862 	dev_info_t *dip;
1863 	struct pm_component *cp;
1864 	int keeper_on = 0, circ;
1865 
1866 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1867 	    keeper, (void *)keptpaths))
1868 	/*
1869 	 * Try to grab keeper. Keeper may have gone away by now,
1870 	 * in this case, used the passed in value pwr
1871 	 */
1872 	dip = pm_name_to_dip(keeper, 1);
1873 	for (i = 0; i < count; i++) {
1874 		/* Release power hold */
1875 		kept = pm_name_to_dip(keptpaths[i], 1);
1876 		if (kept) {
1877 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1878 			    PM_DEVICE(kept), i))
1879 			/*
1880 			 * We need to check if we skipped a bringup here
1881 			 * because we could have failed the bringup
1882 			 * (ie DIRECT PM device) and have
1883 			 * not increment the count.
1884 			 */
1885 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1886 				keeper_on = 0;
1887 				PM_LOCK_POWER(dip, &circ);
1888 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1889 					cp = &DEVI(dip)->devi_pm_components[j];
1890 					if (cur_power(cp)) {
1891 						keeper_on++;
1892 						break;
1893 					}
1894 				}
1895 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1896 					pm_rele_power(kept);
1897 					DEVI(kept)->devi_pm_flags
1898 					    &= ~PMC_SKIP_BRINGUP;
1899 				}
1900 				PM_UNLOCK_POWER(dip, circ);
1901 			} else if (pwr) {
1902 				if (PM_SKBU(kept) == 0) {
1903 					pm_rele_power(kept);
1904 					DEVI(kept)->devi_pm_flags
1905 					    &= ~PMC_SKIP_BRINGUP;
1906 				}
1907 			}
1908 			ddi_release_devi(kept);
1909 		}
1910 		/*
1911 		 * mark this dependency not satisfied
1912 		 */
1913 		pm_unsatisfy(keeper, keptpaths[i]);
1914 	}
1915 	if (dip)
1916 		ddi_release_devi(dip);
1917 }
1918 
1919 /*
1920  * Device kept is being un power managed, it is kept up by keeper.
1921  * We need to mark the dependency no longer satisfied.
1922  */
1923 static void
1924 pm_unkepts(char *kept, char *keeper)
1925 {
1926 	PMD_FUNC(pmf, "unkepts")
1927 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1928 	ASSERT(keeper != NULL);
1929 	/*
1930 	 * mark this dependency not satisfied
1931 	 */
1932 	pm_unsatisfy(keeper, kept);
1933 }
1934 
1935 /*
1936  * Removes dependency information and hold on the kepts, if the path is a
1937  * path of a keeper.
1938  */
1939 static void
1940 pm_free_keeper(char *path, int pwr)
1941 {
1942 	pm_pdr_t *dp;
1943 	int i;
1944 	size_t length;
1945 
1946 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1947 		if (strcmp(dp->pdr_keeper, path) != 0)
1948 			continue;
1949 		/*
1950 		 * Remove all our kept holds and the dependency records,
1951 		 * then free up the kept lists.
1952 		 */
1953 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1954 		if (dp->pdr_kept_count)  {
1955 			for (i = 0; i < dp->pdr_kept_count; i++) {
1956 				length = strlen(dp->pdr_kept_paths[i]);
1957 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1958 			}
1959 			kmem_free(dp->pdr_kept_paths,
1960 			    dp->pdr_kept_count * sizeof (char **));
1961 			dp->pdr_kept_paths = NULL;
1962 			dp->pdr_kept_count = 0;
1963 		}
1964 	}
1965 }
1966 
1967 /*
1968  * Removes the device represented by path from the list of kepts, if the
1969  * path is a path of a kept
1970  */
1971 static void
1972 pm_free_kept(char *path)
1973 {
1974 	pm_pdr_t *dp;
1975 	int i;
1976 	int j, count;
1977 	size_t length;
1978 	char **paths;
1979 
1980 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1981 		if (dp->pdr_kept_count == 0)
1982 			continue;
1983 		count = dp->pdr_kept_count;
1984 		/* Remove this device from the kept path lists */
1985 		for (i = 0; i < count; i++) {
1986 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1987 				pm_unkepts(path, dp->pdr_keeper);
1988 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1989 				kmem_free(dp->pdr_kept_paths[i], length);
1990 				dp->pdr_kept_paths[i] = NULL;
1991 				dp->pdr_kept_count--;
1992 			}
1993 		}
1994 		/* Compact the kept paths array */
1995 		if (dp->pdr_kept_count) {
1996 			length = dp->pdr_kept_count * sizeof (char **);
1997 			paths = kmem_zalloc(length, KM_SLEEP);
1998 			j = 0;
1999 			for (i = 0; i < count; i++) {
2000 				if (dp->pdr_kept_paths[i] != NULL) {
2001 					paths[j] = dp->pdr_kept_paths[i];
2002 					j++;
2003 				}
2004 			}
2005 			ASSERT(j == dp->pdr_kept_count);
2006 		}
2007 		/* Now free the old array and point to the new one */
2008 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
2009 		if (dp->pdr_kept_count)
2010 			dp->pdr_kept_paths = paths;
2011 		else
2012 			dp->pdr_kept_paths = NULL;
2013 	}
2014 }
2015 
2016 /*
2017  * Free the dependency information for a device.
2018  */
2019 void
2020 pm_free_keeps(char *path, int pwr)
2021 {
2022 	PMD_FUNC(pmf, "free_keeps")
2023 
2024 #ifdef DEBUG
2025 	int doprdeps = 0;
2026 	void prdeps(char *);
2027 
2028 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2029 	if (pm_debug & PMD_KEEPS) {
2030 		doprdeps = 1;
2031 		prdeps("pm_free_keeps before");
2032 	}
2033 #endif
2034 	/*
2035 	 * First assume we are a keeper and remove all our kepts.
2036 	 */
2037 	pm_free_keeper(path, pwr);
2038 	/*
2039 	 * Now assume we a kept device, and remove all our records.
2040 	 */
2041 	pm_free_kept(path);
2042 #ifdef	DEBUG
2043 	if (doprdeps) {
2044 		prdeps("pm_free_keeps after");
2045 	}
2046 #endif
2047 }
2048 
2049 static int
2050 pm_is_kept(char *path)
2051 {
2052 	pm_pdr_t *dp;
2053 	int i;
2054 
2055 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2056 		if (dp->pdr_kept_count == 0)
2057 			continue;
2058 		for (i = 0; i < dp->pdr_kept_count; i++) {
2059 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2060 				return (1);
2061 		}
2062 	}
2063 	return (0);
2064 }
2065 
2066 static void
2067 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2068 {
2069 	PMD_FUNC(pmf, "hold_rele_power")
2070 	int circ;
2071 
2072 	if ((dip == NULL) ||
2073 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2074 		return;
2075 
2076 	PM_LOCK_POWER(dip, &circ);
2077 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2078 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2079 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2080 
2081 	PM_KUC(dip) += cnt;
2082 
2083 	ASSERT(PM_KUC(dip) >= 0);
2084 	PM_UNLOCK_POWER(dip, circ);
2085 
2086 	if (cnt < 0 && PM_KUC(dip) == 0)
2087 		pm_rescan(dip);
2088 }
2089 
2090 #define	MAX_PPM_HANDLERS	4
2091 
2092 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2093 
2094 struct	ppm_callbacks {
2095 	int (*ppmc_func)(dev_info_t *);
2096 	dev_info_t	*ppmc_dip;
2097 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2098 
2099 
2100 /*
2101  * This routine calls into all the registered ppms to notify them
2102  * that either all components of power-managed devices are at their
2103  * lowest levels or no longer all are at their lowest levels.
2104  */
2105 static void
2106 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2107 {
2108 	struct ppm_callbacks *ppmcp;
2109 	power_req_t power_req;
2110 	int result = 0;
2111 
2112 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2113 	power_req.req.ppm_all_lowest_req.mode = mode;
2114 	mutex_enter(&ppm_lock);
2115 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2116 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2117 		    DDI_CTLOPS_POWER, &power_req, &result);
2118 	mutex_exit(&ppm_lock);
2119 	if (mode == PM_ALL_LOWEST) {
2120 		if (autoS3_enabled) {
2121 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2122 			    "autos3\n"))
2123 			mutex_enter(&srn_clone_lock);
2124 			if (srn_signal) {
2125 				srn_inuse++;
2126 				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2127 				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2128 				srn_inuse--;
2129 			} else {
2130 				PMD(PMD_SX, ("srn_signal NULL\n"))
2131 			}
2132 			mutex_exit(&srn_clone_lock);
2133 		} else {
2134 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2135 			    "disabled\n"));
2136 		}
2137 	}
2138 }
2139 
2140 static void
2141 pm_set_pm_info(dev_info_t *dip, void *value)
2142 {
2143 	DEVI(dip)->devi_pm_info = value;
2144 }
2145 
2146 pm_rsvp_t *pm_blocked_list;
2147 
2148 /*
2149  * Look up an entry in the blocked list by dip and component
2150  */
2151 static pm_rsvp_t *
2152 pm_rsvp_lookup(dev_info_t *dip, int comp)
2153 {
2154 	pm_rsvp_t *p;
2155 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2156 	for (p = pm_blocked_list; p; p = p->pr_next)
2157 		if (p->pr_dip == dip && p->pr_comp == comp) {
2158 			return (p);
2159 		}
2160 	return (NULL);
2161 }
2162 
2163 /*
2164  * Called when a device which is direct power managed (or the parent or
2165  * dependent of such a device) changes power, or when a pm clone is closed
2166  * that was direct power managing a device.  This call results in pm_blocked()
2167  * (below) returning.
2168  */
2169 void
2170 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2171 {
2172 	PMD_FUNC(pmf, "proceed")
2173 	pm_rsvp_t *found = NULL;
2174 	pm_rsvp_t *p;
2175 
2176 	mutex_enter(&pm_rsvp_lock);
2177 	switch (cmd) {
2178 	/*
2179 	 * we're giving up control, let any pending op continue
2180 	 */
2181 	case PMP_RELEASE:
2182 		for (p = pm_blocked_list; p; p = p->pr_next) {
2183 			if (dip == p->pr_dip) {
2184 				p->pr_retval = PMP_RELEASE;
2185 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2186 				    pmf, PM_DEVICE(dip)))
2187 				cv_signal(&p->pr_cv);
2188 			}
2189 		}
2190 		break;
2191 
2192 	/*
2193 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2194 	 * succeed and a non-matching request for the same device fail
2195 	 */
2196 	case PMP_SETPOWER:
2197 		found = pm_rsvp_lookup(dip, comp);
2198 		if (!found)	/* if driver not waiting */
2199 			break;
2200 		/*
2201 		 * This cannot be pm_lower_power, since that can only happen
2202 		 * during detach or probe
2203 		 */
2204 		if (found->pr_newlevel <= newlevel) {
2205 			found->pr_retval = PMP_SUCCEED;
2206 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2207 			    PM_DEVICE(dip)))
2208 		} else {
2209 			found->pr_retval = PMP_FAIL;
2210 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2211 			    PM_DEVICE(dip)))
2212 		}
2213 		cv_signal(&found->pr_cv);
2214 		break;
2215 
2216 	default:
2217 		panic("pm_proceed unknown cmd %d", cmd);
2218 	}
2219 	mutex_exit(&pm_rsvp_lock);
2220 }
2221 
2222 /*
2223  * This routine dispatches new work to the dependency thread. Caller must
2224  * be prepared to block for memory if necessary.
2225  */
2226 void
2227 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2228     int *res, int cached_pwr)
2229 {
2230 	pm_dep_wk_t	*new_work;
2231 
2232 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2233 	new_work->pdw_type = cmd;
2234 	new_work->pdw_wait = wait;
2235 	new_work->pdw_done = 0;
2236 	new_work->pdw_ret = 0;
2237 	new_work->pdw_pwr = cached_pwr;
2238 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2239 	if (keeper != NULL) {
2240 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2241 		    KM_SLEEP);
2242 		(void) strcpy(new_work->pdw_keeper, keeper);
2243 	}
2244 	if (kept != NULL) {
2245 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2246 		(void) strcpy(new_work->pdw_kept, kept);
2247 	}
2248 	mutex_enter(&pm_dep_thread_lock);
2249 	if (pm_dep_thread_workq == NULL) {
2250 		pm_dep_thread_workq = new_work;
2251 		pm_dep_thread_tail = new_work;
2252 		new_work->pdw_next = NULL;
2253 	} else {
2254 		pm_dep_thread_tail->pdw_next = new_work;
2255 		pm_dep_thread_tail = new_work;
2256 		new_work->pdw_next = NULL;
2257 	}
2258 	cv_signal(&pm_dep_thread_cv);
2259 	/* If caller asked for it, wait till it is done. */
2260 	if (wait)  {
2261 		while (!new_work->pdw_done)
2262 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2263 		/*
2264 		 * Pass return status, if any, back.
2265 		 */
2266 		if (res != NULL)
2267 			*res = new_work->pdw_ret;
2268 		/*
2269 		 * If we asked to wait, it is our job to free the request
2270 		 * structure.
2271 		 */
2272 		if (new_work->pdw_keeper)
2273 			kmem_free(new_work->pdw_keeper,
2274 			    strlen(new_work->pdw_keeper) + 1);
2275 		if (new_work->pdw_kept)
2276 			kmem_free(new_work->pdw_kept,
2277 			    strlen(new_work->pdw_kept) + 1);
2278 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2279 	}
2280 	mutex_exit(&pm_dep_thread_lock);
2281 }
2282 
2283 /*
2284  * Release the pm resource for this device.
2285  */
2286 void
2287 pm_rem_info(dev_info_t *dip)
2288 {
2289 	PMD_FUNC(pmf, "rem_info")
2290 	int		i, count = 0;
2291 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2292 	dev_info_t	*pdip = ddi_get_parent(dip);
2293 	char		*pathbuf;
2294 	int		work_type = PM_DEP_WK_DETACH;
2295 
2296 	ASSERT(info);
2297 
2298 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2299 	if (PM_ISDIRECT(dip)) {
2300 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2301 		ASSERT(info->pmi_clone);
2302 		info->pmi_clone = 0;
2303 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2304 	}
2305 	ASSERT(!PM_GET_PM_SCAN(dip));
2306 
2307 	/*
2308 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2309 	 * Others we check all components.  BC node that has already
2310 	 * called pm_destroy_components() has zero component count.
2311 	 * Parents that get notification are not adjusted because their
2312 	 * kidsupcnt is always 0 (or 1 during configuration).
2313 	 */
2314 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2315 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2316 
2317 	/* node is detached, so we can examine power without locking */
2318 	if (PM_ISBC(dip)) {
2319 		count = (PM_CURPOWER(dip, 0) != 0);
2320 	} else {
2321 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2322 			count += (PM_CURPOWER(dip, i) != 0);
2323 	}
2324 
2325 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2326 		e_pm_hold_rele_power(pdip, -count);
2327 
2328 	/* Schedule a request to clean up dependency records */
2329 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2330 	(void) ddi_pathname(dip, pathbuf);
2331 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2332 	    PM_DEP_NOWAIT, NULL, (count > 0));
2333 	kmem_free(pathbuf, MAXPATHLEN);
2334 
2335 	/*
2336 	 * Adjust the pm_comps_notlowest count since this device is
2337 	 * not being power-managed anymore.
2338 	 */
2339 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2340 		if (PM_CURPOWER(dip, i) != 0)
2341 			PM_DECR_NOTLOWEST(dip);
2342 	}
2343 	/*
2344 	 * Once we clear the info pointer, it looks like it is not power
2345 	 * managed to everybody else.
2346 	 */
2347 	pm_set_pm_info(dip, NULL);
2348 	kmem_free(info, sizeof (pm_info_t));
2349 }
2350 
2351 int
2352 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2353 {
2354 	int components = PM_NUMCMPTS(dip);
2355 	int *bufp;
2356 	size_t size;
2357 	int i;
2358 
2359 	if (components <= 0) {
2360 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2361 		    "can't get normal power values\n", PM_DEVICE(dip));
2362 		return (DDI_FAILURE);
2363 	} else {
2364 		size = components * sizeof (int);
2365 		bufp = kmem_alloc(size, KM_SLEEP);
2366 		for (i = 0; i < components; i++) {
2367 			bufp[i] = pm_get_normal_power(dip, i);
2368 		}
2369 	}
2370 	*length = size;
2371 	*valuep = bufp;
2372 	return (DDI_SUCCESS);
2373 }
2374 
2375 static int
2376 pm_reset_timestamps(dev_info_t *dip, void *arg)
2377 {
2378 	_NOTE(ARGUNUSED(arg))
2379 
2380 	int components;
2381 	int	i;
2382 
2383 	if (!PM_GET_PM_INFO(dip))
2384 		return (DDI_WALK_CONTINUE);
2385 	components = PM_NUMCMPTS(dip);
2386 	ASSERT(components > 0);
2387 	PM_LOCK_BUSY(dip);
2388 	for (i = 0; i < components; i++) {
2389 		struct pm_component *cp;
2390 		/*
2391 		 * If the component was not marked as busy,
2392 		 * reset its timestamp to now.
2393 		 */
2394 		cp = PM_CP(dip, i);
2395 		if (cp->pmc_timestamp)
2396 			cp->pmc_timestamp = gethrestime_sec();
2397 	}
2398 	PM_UNLOCK_BUSY(dip);
2399 	return (DDI_WALK_CONTINUE);
2400 }
2401 
2402 /*
2403  * Convert a power level to an index into the levels array (or
2404  * just PM_LEVEL_UNKNOWN in that special case).
2405  */
2406 static int
2407 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2408 {
2409 	PMD_FUNC(pmf, "level_to_index")
2410 	int i;
2411 	int limit = cp->pmc_comp.pmc_numlevels;
2412 	int *ip = cp->pmc_comp.pmc_lvals;
2413 
2414 	if (level == PM_LEVEL_UNKNOWN)
2415 		return (level);
2416 
2417 	for (i = 0; i < limit; i++) {
2418 		if (level == *ip++) {
2419 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2420 			    pmf, PM_DEVICE(dip),
2421 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2422 			return (i);
2423 		}
2424 	}
2425 	panic("pm_level_to_index: level %d not found for device "
2426 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2427 	/*NOTREACHED*/
2428 }
2429 
2430 /*
2431  * Internal function to set current power level
2432  */
2433 static void
2434 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2435 {
2436 	PMD_FUNC(pmf, "set_cur_pwr")
2437 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2438 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2439 
2440 	/*
2441 	 * Nothing to adjust if current & new levels are the same.
2442 	 */
2443 	if (curpwr != PM_LEVEL_UNKNOWN &&
2444 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2445 		return;
2446 
2447 	/*
2448 	 * Keep the count for comps doing transition to/from lowest
2449 	 * level.
2450 	 */
2451 	if (curpwr == 0) {
2452 		PM_INCR_NOTLOWEST(dip);
2453 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2454 		PM_DECR_NOTLOWEST(dip);
2455 	}
2456 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2457 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2458 }
2459 
2460 static int pm_phc_impl(dev_info_t *, int, int, int);
2461 
2462 /*
2463  * This is the default method of setting the power of a device if no ppm
2464  * driver has claimed it.
2465  */
2466 int
2467 pm_power(dev_info_t *dip, int comp, int level)
2468 {
2469 	PMD_FUNC(pmf, "power")
2470 	struct dev_ops	*ops;
2471 	int		(*fn)(dev_info_t *, int, int);
2472 	struct pm_component *cp = PM_CP(dip, comp);
2473 	int retval;
2474 	pm_info_t *info = PM_GET_PM_INFO(dip);
2475 
2476 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2477 	    PM_DEVICE(dip), comp, level))
2478 	if (!(ops = ddi_get_driver(dip))) {
2479 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2480 		    PM_DEVICE(dip)))
2481 		return (DDI_FAILURE);
2482 	}
2483 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2484 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2485 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2486 		    (!fn ? " devo_power NULL" : "")))
2487 		return (DDI_FAILURE);
2488 	}
2489 	cp->pmc_flags |= PM_POWER_OP;
2490 	retval = (*fn)(dip, comp, level);
2491 	cp->pmc_flags &= ~PM_POWER_OP;
2492 	if (retval == DDI_SUCCESS) {
2493 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2494 		return (DDI_SUCCESS);
2495 	}
2496 
2497 	/*
2498 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2499 	 * updated only the power level of the component.  If our attempt to
2500 	 * set the device new to a power level above has failed we sync the
2501 	 * total power state via phc code now.
2502 	 */
2503 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2504 		int phc_lvl =
2505 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2506 
2507 		ASSERT(info);
2508 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2509 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2510 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2511 	}
2512 
2513 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2514 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2515 	    level, power_val_to_string(cp, level)));
2516 	return (DDI_FAILURE);
2517 }
2518 
2519 int
2520 pm_unmanage(dev_info_t *dip)
2521 {
2522 	PMD_FUNC(pmf, "unmanage")
2523 	power_req_t power_req;
2524 	int result, retval = 0;
2525 
2526 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2527 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2528 	    PM_DEVICE(dip)))
2529 	power_req.request_type = PMR_PPM_UNMANAGE;
2530 	power_req.req.ppm_config_req.who = dip;
2531 	if (pm_ppm_claimed(dip))
2532 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2533 		    &power_req, &result);
2534 #ifdef DEBUG
2535 	else
2536 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2537 		    &power_req, &result);
2538 #endif
2539 	ASSERT(retval == DDI_SUCCESS);
2540 	pm_rem_info(dip);
2541 	return (retval);
2542 }
2543 
2544 int
2545 pm_raise_power(dev_info_t *dip, int comp, int level)
2546 {
2547 	if (level < 0)
2548 		return (DDI_FAILURE);
2549 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2550 	    !e_pm_valid_power(dip, comp, level))
2551 		return (DDI_FAILURE);
2552 
2553 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2554 }
2555 
2556 int
2557 pm_lower_power(dev_info_t *dip, int comp, int level)
2558 {
2559 	PMD_FUNC(pmf, "pm_lower_power")
2560 
2561 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2562 	    !e_pm_valid_power(dip, comp, level)) {
2563 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2564 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2565 		return (DDI_FAILURE);
2566 	}
2567 
2568 	if (!DEVI_IS_DETACHING(dip)) {
2569 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2570 		    pmf, PM_DEVICE(dip)))
2571 		return (DDI_FAILURE);
2572 	}
2573 
2574 	/*
2575 	 * If we don't care about saving power, or we're treating this node
2576 	 * specially, then this is a no-op
2577 	 */
2578 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2579 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2580 		    pmf, PM_DEVICE(dip),
2581 		    !autopm_enabled ? "!autopm_enabled " : "",
2582 		    !PM_POLLING_CPUPM ? "!cpupm_polling " : "",
2583 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2584 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2585 		return (DDI_SUCCESS);
2586 	}
2587 
2588 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2589 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2590 		    PM_DEVICE(dip)))
2591 		return (DDI_FAILURE);
2592 	}
2593 	return (DDI_SUCCESS);
2594 }
2595 
2596 /*
2597  * Find the entries struct for a given dip in the blocked list, return it locked
2598  */
2599 static psce_t *
2600 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2601 {
2602 	pscc_t *p;
2603 	psce_t *psce;
2604 
2605 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2606 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2607 		if (p->pscc_dip == dip) {
2608 			*psccp = p;
2609 			psce = p->pscc_entries;
2610 			mutex_enter(&psce->psce_lock);
2611 			ASSERT(psce);
2612 			rw_exit(&pm_pscc_direct_rwlock);
2613 			return (psce);
2614 		}
2615 	}
2616 	rw_exit(&pm_pscc_direct_rwlock);
2617 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2618 	/*NOTREACHED*/
2619 }
2620 
2621 /*
2622  * Write an entry indicating a power level change (to be passed to a process
2623  * later) in the given psce.
2624  * If we were called in the path that brings up the console fb in the
2625  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2626  * we create a record that has a size of -1, a physaddr of NULL, and that
2627  * has the overflow flag set.
2628  */
2629 static int
2630 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2631     int old, int which, pm_canblock_t canblock)
2632 {
2633 	char	buf[MAXNAMELEN];
2634 	pm_state_change_t *p;
2635 	size_t	size;
2636 	caddr_t physpath = NULL;
2637 	int	overrun = 0;
2638 
2639 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2640 	(void) ddi_pathname(dip, buf);
2641 	size = strlen(buf) + 1;
2642 	p = psce->psce_in;
2643 	if (canblock == PM_CANBLOCK_BYPASS) {
2644 		physpath = kmem_alloc(size, KM_NOSLEEP);
2645 		if (physpath == NULL) {
2646 			/*
2647 			 * mark current entry as overrun
2648 			 */
2649 			p->flags |= PSC_EVENT_LOST;
2650 			size = (size_t)-1;
2651 		}
2652 	} else
2653 		physpath = kmem_alloc(size, KM_SLEEP);
2654 	if (p->size) {	/* overflow; mark the next entry */
2655 		if (p->size != (size_t)-1)
2656 			kmem_free(p->physpath, p->size);
2657 		ASSERT(psce->psce_out == p);
2658 		if (p == psce->psce_last) {
2659 			psce->psce_first->flags |= PSC_EVENT_LOST;
2660 			psce->psce_out = psce->psce_first;
2661 		} else {
2662 			(p + 1)->flags |= PSC_EVENT_LOST;
2663 			psce->psce_out = (p + 1);
2664 		}
2665 		overrun++;
2666 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2667 		p->flags |= PSC_EVENT_LOST;
2668 		p->size = 0;
2669 		p->physpath = NULL;
2670 	}
2671 	if (which == PSC_INTEREST) {
2672 		mutex_enter(&pm_compcnt_lock);
2673 		if (pm_comps_notlowest == 0)
2674 			p->flags |= PSC_ALL_LOWEST;
2675 		else
2676 			p->flags &= ~PSC_ALL_LOWEST;
2677 		mutex_exit(&pm_compcnt_lock);
2678 	}
2679 	p->event = event;
2680 	p->timestamp = gethrestime_sec();
2681 	p->component = comp;
2682 	p->old_level = old;
2683 	p->new_level = new;
2684 	p->physpath = physpath;
2685 	p->size = size;
2686 	if (physpath != NULL)
2687 		(void) strcpy(p->physpath, buf);
2688 	if (p == psce->psce_last)
2689 		psce->psce_in = psce->psce_first;
2690 	else
2691 		psce->psce_in = ++p;
2692 	mutex_exit(&psce->psce_lock);
2693 	return (overrun);
2694 }
2695 
2696 /*
2697  * Find the next entry on the interest list.  We keep a pointer to the item we
2698  * last returned in the user's cooke.  Returns a locked entries struct.
2699  */
2700 static psce_t *
2701 psc_interest(void **cookie, pscc_t **psccp)
2702 {
2703 	pscc_t *pscc;
2704 	pscc_t **cookiep = (pscc_t **)cookie;
2705 
2706 	if (*cookiep == NULL)
2707 		pscc = pm_pscc_interest;
2708 	else
2709 		pscc = (*cookiep)->pscc_next;
2710 	if (pscc) {
2711 		*cookiep = pscc;
2712 		*psccp = pscc;
2713 		mutex_enter(&pscc->pscc_entries->psce_lock);
2714 		return (pscc->pscc_entries);
2715 	} else {
2716 		return (NULL);
2717 	}
2718 }
2719 
2720 /*
2721  * Create an entry for a process to pick up indicating a power level change.
2722  */
2723 static void
2724 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2725     int newlevel, int oldlevel, pm_canblock_t canblock)
2726 {
2727 	PMD_FUNC(pmf, "enqueue_notify")
2728 	pscc_t	*pscc;
2729 	psce_t	*psce;
2730 	void		*cookie = NULL;
2731 	int	overrun;
2732 
2733 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2734 	switch (cmd) {
2735 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2736 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2737 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2738 		psce = pm_psc_dip_to_direct(dip, &pscc);
2739 		ASSERT(psce);
2740 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2741 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2742 		    pm_poll_cnt[pscc->pscc_clone]))
2743 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2744 		    PSC_DIRECT, canblock);
2745 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2746 		mutex_enter(&pm_clone_lock);
2747 		if (!overrun)
2748 			pm_poll_cnt[pscc->pscc_clone]++;
2749 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2750 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2751 		mutex_exit(&pm_clone_lock);
2752 		break;
2753 	case PSC_HAS_CHANGED:
2754 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2755 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2756 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2757 			psce = pm_psc_dip_to_direct(dip, &pscc);
2758 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2759 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2760 			    pm_poll_cnt[pscc->pscc_clone]))
2761 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2762 			    oldlevel, PSC_DIRECT, canblock);
2763 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2764 			mutex_enter(&pm_clone_lock);
2765 			if (!overrun)
2766 				pm_poll_cnt[pscc->pscc_clone]++;
2767 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2768 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2769 			mutex_exit(&pm_clone_lock);
2770 		}
2771 		mutex_enter(&pm_clone_lock);
2772 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2773 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2774 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2775 			    oldlevel, PSC_INTEREST, canblock);
2776 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2777 		}
2778 		rw_exit(&pm_pscc_interest_rwlock);
2779 		mutex_exit(&pm_clone_lock);
2780 		break;
2781 #ifdef DEBUG
2782 	default:
2783 		ASSERT(0);
2784 #endif
2785 	}
2786 }
2787 
2788 static void
2789 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2790 {
2791 	if (listp) {
2792 		pm_ppm_devlist_t *p, *next = NULL;
2793 
2794 		for (p = *listp; p; p = next) {
2795 			next = p->ppd_next;
2796 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2797 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2798 			    canblock);
2799 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2800 		}
2801 		*listp = NULL;
2802 	}
2803 }
2804 
2805 /*
2806  * Try to get the power locks of the parent node and target (child)
2807  * node.  Return true if successful (with both locks held) or false
2808  * (with no locks held).
2809  */
2810 static int
2811 pm_try_parent_child_locks(dev_info_t *pdip,
2812     dev_info_t *dip, int *pcircp, int *circp)
2813 {
2814 	if (ndi_devi_tryenter(pdip, pcircp))
2815 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2816 			return (1);
2817 		} else {
2818 			ndi_devi_exit(pdip, *pcircp);
2819 		}
2820 	return (0);
2821 }
2822 
2823 /*
2824  * Determine if the power lock owner is blocked by current thread.
2825  * returns :
2826  * 	1 - If the thread owning the effective power lock (the first lock on
2827  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2828  *          a mutex held by the current thread.
2829  *
2830  *	0 - otherwise
2831  *
2832  * Note : This function is called by pm_power_has_changed to determine whether
2833  * it is executing in parallel with pm_set_power.
2834  */
2835 static int
2836 pm_blocked_by_us(dev_info_t *dip)
2837 {
2838 	power_req_t power_req;
2839 	kthread_t *owner;
2840 	int result;
2841 	kmutex_t *mp;
2842 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2843 
2844 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2845 	power_req.req.ppm_power_lock_owner_req.who = dip;
2846 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2847 	    DDI_SUCCESS) {
2848 		/*
2849 		 * It is assumed that if the device is claimed by ppm, ppm
2850 		 * will always implement this request type and it'll always
2851 		 * return success. We panic here, if it fails.
2852 		 */
2853 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2854 		    PM_DEVICE(dip));
2855 		/*NOTREACHED*/
2856 	}
2857 
2858 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2859 	    owner->t_state == TS_SLEEP &&
2860 	    owner->t_sobj_ops &&
2861 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2862 	    (mp = (kmutex_t *)owner->t_wchan) &&
2863 	    mutex_owner(mp) == curthread)
2864 		return (1);
2865 
2866 	return (0);
2867 }
2868 
2869 /*
2870  * Notify parent which wants to hear about a child's power changes.
2871  */
2872 static void
2873 pm_notify_parent(dev_info_t *dip,
2874     dev_info_t *pdip, int comp, int old_level, int level)
2875 {
2876 	pm_bp_has_changed_t bphc;
2877 	pm_sp_misc_t pspm;
2878 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2879 	int result = DDI_SUCCESS;
2880 
2881 	bphc.bphc_dip = dip;
2882 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2883 	bphc.bphc_comp = comp;
2884 	bphc.bphc_olevel = old_level;
2885 	bphc.bphc_nlevel = level;
2886 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2887 	pspm.pspm_scan = 0;
2888 	bphc.bphc_private = &pspm;
2889 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2890 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2891 	kmem_free(pathbuf, MAXPATHLEN);
2892 }
2893 
2894 /*
2895  * Check if we need to resume a BC device, and make the attach call as required.
2896  */
2897 static int
2898 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2899 {
2900 	int ret = DDI_SUCCESS;
2901 
2902 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2903 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2904 		/* ppm is not interested in DDI_PM_RESUME */
2905 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2906 			/* XXX Should we mark it resumed, */
2907 			/* even though it failed? */
2908 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2909 			    PM_NAME(dip), PM_ADDR(dip));
2910 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2911 	}
2912 
2913 	return (ret);
2914 }
2915 
2916 /*
2917  * Tests outside the lock to see if we should bother to enqueue an entry
2918  * for any watching process.  If yes, then caller will take the lock and
2919  * do the full protocol
2920  */
2921 static int
2922 pm_watchers()
2923 {
2924 	if (pm_processes_stopped)
2925 		return (0);
2926 	return (pm_pscc_direct || pm_pscc_interest);
2927 }
2928 
2929 static int pm_phc_impl(dev_info_t *, int, int, int);
2930 
2931 /*
2932  * A driver is reporting that the power of one of its device's components
2933  * has changed.  Update the power state accordingly.
2934  */
2935 int
2936 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2937 {
2938 	PMD_FUNC(pmf, "pm_power_has_changed")
2939 	int ret;
2940 	dev_info_t *pdip = ddi_get_parent(dip);
2941 	struct pm_component *cp;
2942 	int blocked, circ, pcirc, old_level;
2943 
2944 	if (level < 0) {
2945 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2946 		    PM_DEVICE(dip), level))
2947 		return (DDI_FAILURE);
2948 	}
2949 
2950 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2951 	    PM_DEVICE(dip), comp, level))
2952 
2953 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2954 	    !e_pm_valid_power(dip, comp, level))
2955 		return (DDI_FAILURE);
2956 
2957 	/*
2958 	 * A driver thread calling pm_power_has_changed and another thread
2959 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2960 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2961 	 * this specific deadlock.  If we can't get the lock immediately
2962 	 * and we are deadlocked, just update the component's level, do
2963 	 * notifications, and return.  We intend to update the total power
2964 	 * state later (if the other thread fails to set power to the
2965 	 * desired level).  If we were called because of a power change on a
2966 	 * component that isn't involved in a set_power op, update all state
2967 	 * immediately.
2968 	 */
2969 	cp = PM_CP(dip, comp);
2970 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2971 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2972 		    (cp->pmc_flags & PM_POWER_OP)) {
2973 			if (pm_watchers()) {
2974 				mutex_enter(&pm_rsvp_lock);
2975 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2976 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2977 				mutex_exit(&pm_rsvp_lock);
2978 			}
2979 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2980 				pm_notify_parent(dip,
2981 				    pdip, comp, cur_power(cp), level);
2982 			(void) pm_check_and_resume(dip,
2983 			    comp, cur_power(cp), level);
2984 
2985 			/*
2986 			 * Stash the old power index, update curpwr, and flag
2987 			 * that the total power state needs to be synched.
2988 			 */
2989 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2990 			/*
2991 			 * Several pm_power_has_changed calls could arrive
2992 			 * while the set power path remains blocked.  Keep the
2993 			 * oldest old power and the newest new power of any
2994 			 * sequence of phc calls which arrive during deadlock.
2995 			 */
2996 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
2997 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
2998 			cp->pmc_cur_pwr =
2999 			    pm_level_to_index(dip, cp, level);
3000 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
3001 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3002 			return (DDI_SUCCESS);
3003 		} else
3004 			if (blocked) {	/* blocked, but different cmpt? */
3005 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
3006 					cmn_err(CE_NOTE,
3007 					    "!pm: parent kuc not updated due "
3008 					    "to possible deadlock.\n");
3009 					return (pm_phc_impl(dip,
3010 					    comp, level, 1));
3011 				}
3012 				old_level = cur_power(cp);
3013 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3014 				    (!PM_ISBC(dip) || comp == 0) &&
3015 				    POWERING_ON(old_level, level))
3016 					pm_hold_power(pdip);
3017 				ret = pm_phc_impl(dip, comp, level, 1);
3018 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3019 					if ((!PM_ISBC(dip) ||
3020 					    comp == 0) && level == 0 &&
3021 					    old_level != PM_LEVEL_UNKNOWN)
3022 						pm_rele_power(pdip);
3023 				}
3024 				ndi_devi_exit(pdip, pcirc);
3025 				/* child lock not held: deadlock */
3026 				return (ret);
3027 			}
3028 		delay(1);
3029 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3030 	}
3031 
3032 	/* non-deadlock case */
3033 	old_level = cur_power(cp);
3034 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3035 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3036 		pm_hold_power(pdip);
3037 	ret = pm_phc_impl(dip, comp, level, 1);
3038 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3039 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3040 		    old_level != PM_LEVEL_UNKNOWN)
3041 			pm_rele_power(pdip);
3042 	}
3043 	PM_UNLOCK_POWER(dip, circ);
3044 	ndi_devi_exit(pdip, pcirc);
3045 	return (ret);
3046 }
3047 
3048 /*
3049  * Account for power changes to a component of the the console frame buffer.
3050  * If lowering power from full (or "unkown", which is treatd as full)
3051  * we will increment the "components off" count of the fb device.
3052  * Subsequent lowering of the same component doesn't affect the count.  If
3053  * raising a component back to full power, we will decrement the count.
3054  *
3055  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3056  */
3057 static int
3058 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3059 {
3060 	struct pm_component *cp = PM_CP(dip, cmpt);
3061 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3062 	int want_normal = (new == cp->pmc_norm_pwr);
3063 	int incr = 0;
3064 
3065 	if (on && !want_normal)
3066 		incr = 1;
3067 	else if (!on && want_normal)
3068 		incr = -1;
3069 	return (incr);
3070 }
3071 
3072 /*
3073  * Adjust the count of console frame buffer components < full power.
3074  */
3075 static void
3076 update_comps_off(int incr, dev_info_t *dip)
3077 {
3078 		mutex_enter(&pm_cfb_lock);
3079 		pm_cfb_comps_off += incr;
3080 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3081 		mutex_exit(&pm_cfb_lock);
3082 }
3083 
3084 /*
3085  * Update the power state in the framework (via the ppm).  The 'notify'
3086  * argument tells whether to notify watchers.  Power lock is already held.
3087  */
3088 static int
3089 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3090 {
3091 	PMD_FUNC(pmf, "phc_impl")
3092 	power_req_t power_req;
3093 	int i, dodeps = 0;
3094 	dev_info_t *pdip = ddi_get_parent(dip);
3095 	int result;
3096 	int old_level;
3097 	struct pm_component *cp;
3098 	int incr = 0;
3099 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3100 	int work_type = 0;
3101 	char *pathbuf;
3102 
3103 	/* Must use "official" power level for this test. */
3104 	cp = PM_CP(dip, comp);
3105 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3106 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3107 	if (old_level != PM_LEVEL_UNKNOWN)
3108 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3109 
3110 	if (level == old_level) {
3111 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3112 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3113 		return (DDI_SUCCESS);
3114 	}
3115 
3116 	/*
3117 	 * Tell ppm about this.
3118 	 */
3119 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3120 	power_req.req.ppm_notify_level_req.who = dip;
3121 	power_req.req.ppm_notify_level_req.cmpt = comp;
3122 	power_req.req.ppm_notify_level_req.new_level = level;
3123 	power_req.req.ppm_notify_level_req.old_level = old_level;
3124 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3125 	    &result) == DDI_FAILURE) {
3126 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3127 		    pmf, PM_DEVICE(dip), level))
3128 		return (DDI_FAILURE);
3129 	}
3130 
3131 	if (PM_IS_CFB(dip)) {
3132 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3133 
3134 		if (incr) {
3135 			update_comps_off(incr, dip);
3136 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3137 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3138 			    comp, old_level, level, pm_cfb_comps_off))
3139 		}
3140 	}
3141 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3142 	result = DDI_SUCCESS;
3143 
3144 	if (notify) {
3145 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3146 			pm_notify_parent(dip, pdip, comp, old_level, level);
3147 		(void) pm_check_and_resume(dip, comp, old_level, level);
3148 	}
3149 
3150 	/*
3151 	 * Decrement the dependency kidsup count if we turn a device
3152 	 * off.
3153 	 */
3154 	if (POWERING_OFF(old_level, level)) {
3155 		dodeps = 1;
3156 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3157 			cp = PM_CP(dip, i);
3158 			if (cur_power(cp)) {
3159 				dodeps = 0;
3160 				break;
3161 			}
3162 		}
3163 		if (dodeps)
3164 			work_type = PM_DEP_WK_POWER_OFF;
3165 	}
3166 
3167 	/*
3168 	 * Increment if we turn it on. Check to see
3169 	 * if other comps are already on, if so,
3170 	 * dont increment.
3171 	 */
3172 	if (POWERING_ON(old_level, level)) {
3173 		dodeps = 1;
3174 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3175 			cp = PM_CP(dip, i);
3176 			if (comp == i)
3177 				continue;
3178 			/* -1 also treated as 0 in this case */
3179 			if (cur_power(cp) > 0) {
3180 				dodeps = 0;
3181 				break;
3182 			}
3183 		}
3184 		if (dodeps)
3185 			work_type = PM_DEP_WK_POWER_ON;
3186 	}
3187 
3188 	if (dodeps) {
3189 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3190 		(void) ddi_pathname(dip, pathbuf);
3191 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3192 		    PM_DEP_NOWAIT, NULL, 0);
3193 		kmem_free(pathbuf, MAXPATHLEN);
3194 	}
3195 
3196 	if (notify && (level != old_level) && pm_watchers()) {
3197 		mutex_enter(&pm_rsvp_lock);
3198 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3199 		    PM_CANBLOCK_BLOCK);
3200 		mutex_exit(&pm_rsvp_lock);
3201 	}
3202 
3203 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3204 	pm_rescan(dip);
3205 	return (DDI_SUCCESS);
3206 }
3207 
3208 /*
3209  * This function is called at startup time to notify pm of the existence
3210  * of any platform power managers for this platform.  As a result of
3211  * this registration, each function provided will be called each time
3212  * a device node is attached, until one returns true, and it must claim the
3213  * device node (by returning non-zero) if it wants to be involved in the
3214  * node's power management.  If it does claim the node, then it will
3215  * subsequently be notified of attach and detach events.
3216  *
3217  */
3218 
3219 int
3220 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3221 {
3222 	PMD_FUNC(pmf, "register_ppm")
3223 	struct ppm_callbacks *ppmcp;
3224 	pm_component_t *cp;
3225 	int i, pwr, result, circ;
3226 	power_req_t power_req;
3227 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3228 	void pm_ppm_claim(dev_info_t *);
3229 
3230 	mutex_enter(&ppm_lock);
3231 	ppmcp = ppm_callbacks;
3232 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3233 		if (ppmcp->ppmc_func == NULL) {
3234 			ppmcp->ppmc_func = func;
3235 			ppmcp->ppmc_dip = dip;
3236 			break;
3237 		}
3238 	}
3239 	mutex_exit(&ppm_lock);
3240 
3241 	if (i >= MAX_PPM_HANDLERS)
3242 		return (DDI_FAILURE);
3243 	while ((dip = ddi_get_parent(dip)) != NULL) {
3244 		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3245 			continue;
3246 		pm_ppm_claim(dip);
3247 		/* don't bother with the not power-manageable nodes */
3248 		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3249 			/*
3250 			 * Tell ppm about this.
3251 			 */
3252 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3253 			p->old_level = PM_LEVEL_UNKNOWN;
3254 			p->who = dip;
3255 			PM_LOCK_POWER(dip, &circ);
3256 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3257 				cp = PM_CP(dip, i);
3258 				pwr = cp->pmc_cur_pwr;
3259 				if (pwr != PM_LEVEL_UNKNOWN) {
3260 					p->cmpt = i;
3261 					p->new_level = cur_power(cp);
3262 					p->old_level = PM_LEVEL_UNKNOWN;
3263 					if (pm_ctlops(PPM(dip), dip,
3264 					    DDI_CTLOPS_POWER, &power_req,
3265 					    &result) == DDI_FAILURE) {
3266 						PMD(PMD_FAIL, ("%s: pc "
3267 						    "%s@%s(%s#%d) to %d "
3268 						    "fails\n", pmf,
3269 						    PM_DEVICE(dip), pwr))
3270 					}
3271 				}
3272 			}
3273 			PM_UNLOCK_POWER(dip, circ);
3274 		}
3275 	}
3276 	return (DDI_SUCCESS);
3277 }
3278 
3279 /*
3280  * Call the ppm's that have registered and adjust the devinfo struct as
3281  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3282  * by each ppm are assumed to be disjoint.
3283  */
3284 void
3285 pm_ppm_claim(dev_info_t *dip)
3286 {
3287 	struct ppm_callbacks *ppmcp;
3288 
3289 	if (PPM(dip)) {
3290 		return;
3291 	}
3292 	mutex_enter(&ppm_lock);
3293 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3294 		if ((*ppmcp->ppmc_func)(dip)) {
3295 			DEVI(dip)->devi_pm_ppm =
3296 			    (struct dev_info *)ppmcp->ppmc_dip;
3297 			mutex_exit(&ppm_lock);
3298 			return;
3299 		}
3300 	}
3301 	mutex_exit(&ppm_lock);
3302 }
3303 
3304 /*
3305  * Node is being detached so stop autopm until we see if it succeeds, in which
3306  * case pm_stop will be called.  For backwards compatible devices we bring the
3307  * device up to full power on the assumption the detach will succeed.
3308  */
3309 void
3310 pm_detaching(dev_info_t *dip)
3311 {
3312 	PMD_FUNC(pmf, "detaching")
3313 	pm_info_t *info = PM_GET_PM_INFO(dip);
3314 	int iscons;
3315 
3316 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3317 	    PM_NUMCMPTS(dip)))
3318 	if (info == NULL)
3319 		return;
3320 	ASSERT(DEVI_IS_DETACHING(dip));
3321 	PM_LOCK_DIP(dip);
3322 	info->pmi_dev_pm_state |= PM_DETACHING;
3323 	PM_UNLOCK_DIP(dip);
3324 	if (!PM_ISBC(dip))
3325 		pm_scan_stop(dip);
3326 
3327 	/*
3328 	 * console and old-style devices get brought up when detaching.
3329 	 */
3330 	iscons = PM_IS_CFB(dip);
3331 	if (iscons || PM_ISBC(dip)) {
3332 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3333 		if (iscons) {
3334 			mutex_enter(&pm_cfb_lock);
3335 			while (cfb_inuse) {
3336 				mutex_exit(&pm_cfb_lock);
3337 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3338 				delay(1);
3339 				mutex_enter(&pm_cfb_lock);
3340 			}
3341 			ASSERT(cfb_dip_detaching == NULL);
3342 			ASSERT(cfb_dip);
3343 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3344 			cfb_dip = NULL;
3345 			mutex_exit(&pm_cfb_lock);
3346 		}
3347 	}
3348 }
3349 
3350 /*
3351  * Node failed to detach.  If it used to be autopm'd, make it so again.
3352  */
3353 void
3354 pm_detach_failed(dev_info_t *dip)
3355 {
3356 	PMD_FUNC(pmf, "detach_failed")
3357 	pm_info_t *info = PM_GET_PM_INFO(dip);
3358 	int pm_all_at_normal(dev_info_t *);
3359 
3360 	if (info == NULL)
3361 		return;
3362 	ASSERT(DEVI_IS_DETACHING(dip));
3363 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3364 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3365 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3366 			/* Make sure the operation is still needed */
3367 			if (!pm_all_at_normal(dip)) {
3368 				if (pm_all_to_normal(dip,
3369 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3370 					PMD(PMD_ERROR, ("%s: could not bring "
3371 					    "%s@%s(%s#%d) to normal\n", pmf,
3372 					    PM_DEVICE(dip)))
3373 				}
3374 			}
3375 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3376 		}
3377 	}
3378 	if (!PM_ISBC(dip)) {
3379 		mutex_enter(&pm_scan_lock);
3380 		if (PM_SCANABLE(dip))
3381 			pm_scan_init(dip);
3382 		mutex_exit(&pm_scan_lock);
3383 		pm_rescan(dip);
3384 	}
3385 }
3386 
3387 /* generic Backwards Compatible component */
3388 static char *bc_names[] = {"off", "on"};
3389 
3390 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3391 
3392 static void
3393 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3394 {
3395 	pm_comp_t *pmc;
3396 	pmc = &cp->pmc_comp;
3397 	pmc->pmc_numlevels = 2;
3398 	pmc->pmc_lvals[0] = 0;
3399 	pmc->pmc_lvals[1] = norm;
3400 	e_pm_set_cur_pwr(dip, cp, norm);
3401 }
3402 
3403 static void
3404 e_pm_default_components(dev_info_t *dip, int cmpts)
3405 {
3406 	int i;
3407 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3408 
3409 	p = DEVI(dip)->devi_pm_components;
3410 	for (i = 0; i < cmpts; i++, p++) {
3411 		p->pmc_comp = bc_comp;	/* struct assignment */
3412 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3413 		    KM_SLEEP);
3414 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3415 		    KM_SLEEP);
3416 		p->pmc_comp.pmc_numlevels = 2;
3417 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3418 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3419 	}
3420 }
3421 
3422 /*
3423  * Called from functions that require components to exist already to allow
3424  * for their creation by parsing the pm-components property.
3425  * Device will not be power managed as a result of this call
3426  * No locking needed because we're single threaded by the ndi_devi_enter
3427  * done while attaching, and the device isn't visible until after it has
3428  * attached
3429  */
3430 int
3431 pm_premanage(dev_info_t *dip, int style)
3432 {
3433 	PMD_FUNC(pmf, "premanage")
3434 	pm_comp_t	*pcp, *compp;
3435 	int		cmpts, i, norm, error;
3436 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3437 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3438 
3439 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3440 	/*
3441 	 * If this dip has already been processed, don't mess with it
3442 	 */
3443 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3444 		return (DDI_SUCCESS);
3445 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3446 		return (DDI_FAILURE);
3447 	}
3448 	/*
3449 	 * Look up pm-components property and create components accordingly
3450 	 * If that fails, fall back to backwards compatibility
3451 	 */
3452 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3453 		/*
3454 		 * If error is set, the property existed but was not well formed
3455 		 */
3456 		if (error || (style == PM_STYLE_NEW)) {
3457 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3458 			return (DDI_FAILURE);
3459 		}
3460 		/*
3461 		 * If they don't have the pm-components property, then we
3462 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3463 		 * behavior driver must have called pm_create_components, and
3464 		 * we need to flesh out dummy components
3465 		 */
3466 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3467 			/*
3468 			 * Not really failure, but we don't want the
3469 			 * caller to treat it as success
3470 			 */
3471 			return (DDI_FAILURE);
3472 		}
3473 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3474 		e_pm_default_components(dip, cmpts);
3475 		for (i = 0; i < cmpts; i++) {
3476 			/*
3477 			 * if normal power not set yet, we don't really know
3478 			 * what *ANY* of the power values are.  If normal
3479 			 * power is set, then we assume for this backwards
3480 			 * compatible case that the values are 0, normal power.
3481 			 */
3482 			norm = pm_get_normal_power(dip, i);
3483 			if (norm == (uint_t)-1) {
3484 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3485 				    PM_DEVICE(dip), i))
3486 				return (DDI_FAILURE);
3487 			}
3488 			/*
3489 			 * Components of BC devices start at their normal power,
3490 			 * so count them to be not at their lowest power.
3491 			 */
3492 			PM_INCR_NOTLOWEST(dip);
3493 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3494 		}
3495 	} else {
3496 		/*
3497 		 * e_pm_create_components was called from pm_autoconfig(), it
3498 		 * creates components with no descriptions (or known levels)
3499 		 */
3500 		cmpts = PM_NUMCMPTS(dip);
3501 		ASSERT(cmpts != 0);
3502 		pcp = compp;
3503 		p = DEVI(dip)->devi_pm_components;
3504 		for (i = 0; i < cmpts; i++, p++) {
3505 			p->pmc_comp = *pcp++;   /* struct assignment */
3506 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3507 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3508 		}
3509 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3510 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3511 			    PMC_CPU_THRESH);
3512 		else
3513 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3514 			    PMC_DEF_THRESH);
3515 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3516 	}
3517 	return (DDI_SUCCESS);
3518 }
3519 
3520 /*
3521  * Called from during or after the device's attach to let us know it is ready
3522  * to play autopm.   Look up the pm model and manage the device accordingly.
3523  * Returns system call errno value.
3524  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3525  * a little cleaner
3526  *
3527  * Called with dip lock held, return with dip lock unheld.
3528  */
3529 
3530 int
3531 e_pm_manage(dev_info_t *dip, int style)
3532 {
3533 	PMD_FUNC(pmf, "e_manage")
3534 	pm_info_t	*info;
3535 	dev_info_t	*pdip = ddi_get_parent(dip);
3536 	int	pm_thresh_specd(dev_info_t *);
3537 	int	count;
3538 	char	*pathbuf;
3539 
3540 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3541 		return (DDI_FAILURE);
3542 	}
3543 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3544 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3545 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3546 
3547 	/*
3548 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3549 	 * out at their normal power, so they are "up", others start out
3550 	 * unknown, which is effectively "up".  Parent which want notification
3551 	 * get kidsupcnt of 0 always.
3552 	 */
3553 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3554 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3555 		e_pm_hold_rele_power(pdip, count);
3556 
3557 	pm_set_pm_info(dip, info);
3558 	/*
3559 	 * Apply any recorded thresholds
3560 	 */
3561 	(void) pm_thresh_specd(dip);
3562 
3563 	/*
3564 	 * Do dependency processing.
3565 	 */
3566 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3567 	(void) ddi_pathname(dip, pathbuf);
3568 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3569 	    PM_DEP_NOWAIT, NULL, 0);
3570 	kmem_free(pathbuf, MAXPATHLEN);
3571 
3572 	if (!PM_ISBC(dip)) {
3573 		mutex_enter(&pm_scan_lock);
3574 		if (PM_SCANABLE(dip)) {
3575 			pm_scan_init(dip);
3576 			mutex_exit(&pm_scan_lock);
3577 			pm_rescan(dip);
3578 		} else {
3579 			mutex_exit(&pm_scan_lock);
3580 		}
3581 	}
3582 	return (0);
3583 }
3584 
3585 /*
3586  * This is the obsolete exported interface for a driver to find out its
3587  * "normal" (max) power.
3588  * We only get components destroyed while no power management is
3589  * going on (and the device is detached), so we don't need a mutex here
3590  */
3591 int
3592 pm_get_normal_power(dev_info_t *dip, int comp)
3593 {
3594 
3595 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3596 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3597 	}
3598 	return (DDI_FAILURE);
3599 }
3600 
3601 /*
3602  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3603  */
3604 int
3605 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3606 {
3607 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3608 		*levelp = PM_CURPOWER(dip, comp);
3609 		return (DDI_SUCCESS);
3610 	}
3611 	return (DDI_FAILURE);
3612 }
3613 
3614 /*
3615  * Returns current threshold of indicated component
3616  */
3617 static int
3618 cur_threshold(dev_info_t *dip, int comp)
3619 {
3620 	pm_component_t *cp = PM_CP(dip, comp);
3621 	int pwr;
3622 
3623 	if (PM_ISBC(dip)) {
3624 		/*
3625 		 * backwards compatible nodes only have one threshold
3626 		 */
3627 		return (cp->pmc_comp.pmc_thresh[1]);
3628 	}
3629 	pwr = cp->pmc_cur_pwr;
3630 	if (pwr == PM_LEVEL_UNKNOWN) {
3631 		int thresh;
3632 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3633 			thresh = pm_default_nexus_threshold;
3634 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3635 			thresh = pm_cpu_idle_threshold;
3636 		else
3637 			thresh = pm_system_idle_threshold;
3638 		return (thresh);
3639 	}
3640 	ASSERT(cp->pmc_comp.pmc_thresh);
3641 	return (cp->pmc_comp.pmc_thresh[pwr]);
3642 }
3643 
3644 /*
3645  * Compute next lower component power level given power index.
3646  */
3647 static int
3648 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3649 {
3650 	int nxt_pwr;
3651 
3652 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3653 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3654 	} else {
3655 		pwrndx--;
3656 		ASSERT(pwrndx >= 0);
3657 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3658 	}
3659 	return (nxt_pwr);
3660 }
3661 
3662 /*
3663  * Update the maxpower (normal) power of a component. Note that the
3664  * component's power level is only changed if it's current power level
3665  * is higher than the new max power.
3666  */
3667 int
3668 pm_update_maxpower(dev_info_t *dip, int comp, int level)
3669 {
3670 	PMD_FUNC(pmf, "update_maxpower")
3671 	int old;
3672 	int result;
3673 
3674 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3675 	    !e_pm_valid_power(dip, comp, level)) {
3676 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3677 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3678 		return (DDI_FAILURE);
3679 	}
3680 	old = e_pm_get_max_power(dip, comp);
3681 	e_pm_set_max_power(dip, comp, level);
3682 
3683 	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3684 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3685 		e_pm_set_max_power(dip, comp, old);
3686 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3687 		    PM_DEVICE(dip)))
3688 		return (DDI_FAILURE);
3689 	}
3690 	return (DDI_SUCCESS);
3691 }
3692 
3693 /*
3694  * Bring all components of device to normal power
3695  */
3696 int
3697 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3698 {
3699 	PMD_FUNC(pmf, "all_to_normal")
3700 	int		*normal;
3701 	int		i, ncomps, result;
3702 	size_t		size;
3703 	int		changefailed = 0;
3704 
3705 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3706 	ASSERT(PM_GET_PM_INFO(dip));
3707 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3708 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3709 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3710 		return (DDI_FAILURE);
3711 	}
3712 	ncomps = PM_NUMCMPTS(dip);
3713 	for (i = 0; i < ncomps; i++) {
3714 		if (pm_set_power(dip, i, normal[i],
3715 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3716 			changefailed++;
3717 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3718 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3719 			    PM_DEVICE(dip), i, normal[i], result))
3720 		}
3721 	}
3722 	kmem_free(normal, size);
3723 	if (changefailed) {
3724 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3725 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3726 		return (DDI_FAILURE);
3727 	}
3728 	return (DDI_SUCCESS);
3729 }
3730 
3731 /*
3732  * Returns true if all components of device are at normal power
3733  */
3734 int
3735 pm_all_at_normal(dev_info_t *dip)
3736 {
3737 	PMD_FUNC(pmf, "all_at_normal")
3738 	int		*normal;
3739 	int		i;
3740 	size_t		size;
3741 
3742 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3743 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3744 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3745 		return (DDI_FAILURE);
3746 	}
3747 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3748 		int current = PM_CURPOWER(dip, i);
3749 		if (normal[i] > current) {
3750 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3751 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3752 			    normal[i], current))
3753 			break;
3754 		}
3755 	}
3756 	kmem_free(normal, size);
3757 	if (i != PM_NUMCMPTS(dip)) {
3758 		return (0);
3759 	}
3760 	return (1);
3761 }
3762 
3763 static void bring_pmdep_up(dev_info_t *, int);
3764 
3765 static void
3766 bring_wekeeps_up(char *keeper)
3767 {
3768 	PMD_FUNC(pmf, "bring_wekeeps_up")
3769 	int i;
3770 	pm_pdr_t *dp;
3771 	pm_info_t *wku_info;
3772 	char *kept_path;
3773 	dev_info_t *kept;
3774 
3775 	if (panicstr) {
3776 		return;
3777 	}
3778 	/*
3779 	 * We process the request even if the keeper detaches because
3780 	 * detach processing expects this to increment kidsupcnt of kept.
3781 	 */
3782 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3783 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3784 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3785 			continue;
3786 		for (i = 0; i < dp->pdr_kept_count; i++) {
3787 			kept_path = dp->pdr_kept_paths[i];
3788 			if (kept_path == NULL)
3789 				continue;
3790 			ASSERT(kept_path[0] != '\0');
3791 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3792 				continue;
3793 			wku_info = PM_GET_PM_INFO(kept);
3794 			if (wku_info == NULL) {
3795 				if (kept)
3796 					ddi_release_devi(kept);
3797 				continue;
3798 			}
3799 			/*
3800 			 * Don't mess with it if it is being detached, it isn't
3801 			 * safe to call its power entry point
3802 			 */
3803 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3804 				if (kept)
3805 					ddi_release_devi(kept);
3806 				continue;
3807 			}
3808 			bring_pmdep_up(kept, 1);
3809 			ddi_release_devi(kept);
3810 		}
3811 	}
3812 }
3813 
3814 /*
3815  * Bring up the 'kept' device passed as argument
3816  */
3817 static void
3818 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3819 {
3820 	PMD_FUNC(pmf, "bring_pmdep_up")
3821 	int is_all_at_normal = 0;
3822 
3823 	/*
3824 	 * If the kept device has been unmanaged, do nothing.
3825 	 */
3826 	if (!PM_GET_PM_INFO(kept_dip))
3827 		return;
3828 
3829 	/* Just ignore DIRECT PM device till they are released. */
3830 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3831 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3832 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3833 		    "controlling process did something else\n", pmf,
3834 		    PM_DEVICE(kept_dip)))
3835 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3836 		return;
3837 	}
3838 	/* if we got here the keeper had a transition from OFF->ON */
3839 	if (hold)
3840 		pm_hold_power(kept_dip);
3841 
3842 	if (!is_all_at_normal)
3843 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3844 }
3845 
3846 /*
3847  * A bunch of stuff that belongs only to the next routine (or two)
3848  */
3849 
3850 static const char namestr[] = "NAME=";
3851 static const int nameln = sizeof (namestr) - 1;
3852 static const char pmcompstr[] = "pm-components";
3853 
3854 struct pm_comp_pkg {
3855 	pm_comp_t		*comp;
3856 	struct pm_comp_pkg	*next;
3857 };
3858 
3859 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3860 
3861 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3862 			((ch) >= 'A' && (ch) <= 'F'))
3863 
3864 /*
3865  * Rather than duplicate this code ...
3866  * (this code excerpted from the function that follows it)
3867  */
3868 #define	FINISH_COMP { \
3869 	ASSERT(compp); \
3870 	compp->pmc_lnames_sz = size; \
3871 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3872 	compp->pmc_numlevels = level; \
3873 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3874 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3875 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3876 	/* copy string out of prop array into buffer */ \
3877 	for (j = 0; j < level; j++) { \
3878 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3879 		compp->pmc_lvals[j] = lvals[j]; \
3880 		(void) strcpy(tp, lnames[j]); \
3881 		compp->pmc_lnames[j] = tp; \
3882 		tp += lszs[j]; \
3883 	} \
3884 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3885 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3886 	}
3887 
3888 /*
3889  * Create (empty) component data structures.
3890  */
3891 static void
3892 e_pm_create_components(dev_info_t *dip, int num_components)
3893 {
3894 	struct pm_component *compp, *ocompp;
3895 	int i, size = 0;
3896 
3897 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3898 	ASSERT(!DEVI(dip)->devi_pm_components);
3899 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3900 	size = sizeof (struct pm_component) * num_components;
3901 
3902 	compp = kmem_zalloc(size, KM_SLEEP);
3903 	ocompp = compp;
3904 	DEVI(dip)->devi_pm_comp_size = size;
3905 	DEVI(dip)->devi_pm_num_components = num_components;
3906 	PM_LOCK_BUSY(dip);
3907 	for (i = 0; i < num_components;  i++) {
3908 		compp->pmc_timestamp = gethrestime_sec();
3909 		compp->pmc_norm_pwr = (uint_t)-1;
3910 		compp++;
3911 	}
3912 	PM_UNLOCK_BUSY(dip);
3913 	DEVI(dip)->devi_pm_components = ocompp;
3914 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3915 }
3916 
3917 /*
3918  * Parse hex or decimal value from char string
3919  */
3920 static char *
3921 pm_parsenum(char *cp, int *valp)
3922 {
3923 	int ch, offset;
3924 	char numbuf[256];
3925 	char *np = numbuf;
3926 	int value = 0;
3927 
3928 	ch = *cp++;
3929 	if (isdigit(ch)) {
3930 		if (ch == '0') {
3931 			if ((ch = *cp++) == 'x' || ch == 'X') {
3932 				ch = *cp++;
3933 				while (isxdigit(ch)) {
3934 					*np++ = (char)ch;
3935 					ch = *cp++;
3936 				}
3937 				*np = 0;
3938 				cp--;
3939 				goto hexval;
3940 			} else {
3941 				goto digit;
3942 			}
3943 		} else {
3944 digit:
3945 			while (isdigit(ch)) {
3946 				*np++ = (char)ch;
3947 				ch = *cp++;
3948 			}
3949 			*np = 0;
3950 			cp--;
3951 			goto decval;
3952 		}
3953 	} else
3954 		return (NULL);
3955 
3956 hexval:
3957 	for (np = numbuf; *np; np++) {
3958 		if (*np >= 'a' && *np <= 'f')
3959 			offset = 'a' - 10;
3960 		else if (*np >= 'A' && *np <= 'F')
3961 			offset = 'A' - 10;
3962 		else if (*np >= '0' && *np <= '9')
3963 			offset = '0';
3964 		value *= 16;
3965 		value += *np - offset;
3966 	}
3967 	*valp = value;
3968 	return (cp);
3969 
3970 decval:
3971 	offset = '0';
3972 	for (np = numbuf; *np; np++) {
3973 		value *= 10;
3974 		value += *np - offset;
3975 	}
3976 	*valp = value;
3977 	return (cp);
3978 }
3979 
3980 /*
3981  * Set max (previously documented as "normal") power.
3982  */
3983 static void
3984 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3985 {
3986 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3987 }
3988 
3989 /*
3990  * Get max (previously documented as "normal") power.
3991  */
3992 static int
3993 e_pm_get_max_power(dev_info_t *dip, int component_number)
3994 {
3995 	return (PM_CP(dip, component_number)->pmc_norm_pwr);
3996 }
3997 
3998 /*
3999  * Internal routine for destroying components
4000  * It is called even when there might not be any, so it must be forgiving.
4001  */
4002 static void
4003 e_pm_destroy_components(dev_info_t *dip)
4004 {
4005 	int i;
4006 	struct pm_component *cp;
4007 
4008 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4009 	if (PM_NUMCMPTS(dip) == 0)
4010 		return;
4011 	cp = DEVI(dip)->devi_pm_components;
4012 	ASSERT(cp);
4013 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
4014 		int nlevels = cp->pmc_comp.pmc_numlevels;
4015 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
4016 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
4017 		/*
4018 		 * For BC nodes, the rest is static in bc_comp, so skip it
4019 		 */
4020 		if (PM_ISBC(dip))
4021 			continue;
4022 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
4023 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
4024 		kmem_free(cp->pmc_comp.pmc_lname_buf,
4025 		    cp->pmc_comp.pmc_lnames_sz);
4026 	}
4027 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
4028 	DEVI(dip)->devi_pm_components = NULL;
4029 	DEVI(dip)->devi_pm_num_components = 0;
4030 	DEVI(dip)->devi_pm_flags &=
4031 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4032 }
4033 
4034 /*
4035  * Read the pm-components property (if there is one) and use it to set up
4036  * components.  Returns a pointer to an array of component structures if
4037  * pm-components found and successfully parsed, else returns NULL.
4038  * Sets error return *errp to true to indicate a failure (as opposed to no
4039  * property being present).
4040  */
4041 pm_comp_t *
4042 pm_autoconfig(dev_info_t *dip, int *errp)
4043 {
4044 	PMD_FUNC(pmf, "autoconfig")
4045 	uint_t nelems;
4046 	char **pp;
4047 	pm_comp_t *compp = NULL;
4048 	int i, j, level, components = 0;
4049 	size_t size = 0;
4050 	struct pm_comp_pkg *p, *ptail;
4051 	struct pm_comp_pkg *phead = NULL;
4052 	int *lvals = NULL;
4053 	int *lszs = NULL;
4054 	int *np = NULL;
4055 	int npi = 0;
4056 	char **lnames = NULL;
4057 	char *cp, *tp;
4058 	pm_comp_t *ret = NULL;
4059 
4060 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4061 	*errp = 0;	/* assume success */
4062 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4063 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
4064 		return (NULL);
4065 	}
4066 
4067 	if (nelems < 3) {	/* need at least one name and two levels */
4068 		goto errout;
4069 	}
4070 
4071 	/*
4072 	 * pm_create_components is no longer allowed
4073 	 */
4074 	if (PM_NUMCMPTS(dip) != 0) {
4075 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
4076 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4077 		goto errout;
4078 	}
4079 
4080 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4081 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4082 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4083 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4084 
4085 	level = 0;
4086 	phead = NULL;
4087 	for (i = 0; i < nelems; i++) {
4088 		cp = pp[i];
4089 		if (!isdigit(*cp)) {	/*  must be name */
4090 			if (strncmp(cp, namestr, nameln) != 0) {
4091 				goto errout;
4092 			}
4093 			if (i != 0) {
4094 				if (level == 0) {	/* no level spec'd */
4095 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4096 					    pmf))
4097 					goto errout;
4098 				}
4099 				np[npi++] = lvals[level - 1];
4100 				/* finish up previous component levels */
4101 				FINISH_COMP;
4102 			}
4103 			cp += nameln;
4104 			if (!*cp) {
4105 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4106 				goto errout;
4107 			}
4108 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4109 			if (phead == NULL) {
4110 				phead = ptail = p;
4111 			} else {
4112 				ptail->next = p;
4113 				ptail = p;
4114 			}
4115 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4116 			    KM_SLEEP);
4117 			compp->pmc_name_sz = strlen(cp) + 1;
4118 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4119 			    KM_SLEEP);
4120 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4121 			components++;
4122 			level = 0;
4123 		} else {	/* better be power level <num>=<name> */
4124 #ifdef DEBUG
4125 			tp = cp;
4126 #endif
4127 			if (i == 0 ||
4128 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4129 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4130 				goto errout;
4131 			}
4132 #ifdef DEBUG
4133 			tp = cp;
4134 #endif
4135 			if (*cp++ != '=' || !*cp) {
4136 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4137 				goto errout;
4138 			}
4139 
4140 			lszs[level] = strlen(cp) + 1;
4141 			size += lszs[level];
4142 			lnames[level] = cp;	/* points into prop string */
4143 			level++;
4144 		}
4145 	}
4146 	np[npi++] = lvals[level - 1];
4147 	if (level == 0) {	/* ended with a name */
4148 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4149 		goto errout;
4150 	}
4151 	FINISH_COMP;
4152 
4153 
4154 	/*
4155 	 * Now we have a list of components--we have to return instead an
4156 	 * array of them, but we can just copy the top level and leave
4157 	 * the rest as is
4158 	 */
4159 	(void) e_pm_create_components(dip, components);
4160 	for (i = 0; i < components; i++)
4161 		e_pm_set_max_power(dip, i, np[i]);
4162 
4163 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4164 	for (i = 0, p = phead; i < components; i++) {
4165 		ASSERT(p);
4166 		/*
4167 		 * Now sanity-check values:  levels must be monotonically
4168 		 * increasing
4169 		 */
4170 		if (p->comp->pmc_numlevels < 2) {
4171 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4172 			    "levels\n", pmf,
4173 			    p->comp->pmc_name, PM_DEVICE(dip),
4174 			    p->comp->pmc_numlevels))
4175 			goto errout;
4176 		}
4177 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4178 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4179 			    (p->comp->pmc_lvals[j] <=
4180 			    p->comp->pmc_lvals[j - 1]))) {
4181 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4182 				    "not mono. incr, %d follows %d\n", pmf,
4183 				    p->comp->pmc_name, PM_DEVICE(dip),
4184 				    p->comp->pmc_lvals[j],
4185 				    p->comp->pmc_lvals[j - 1]))
4186 				goto errout;
4187 			}
4188 		}
4189 		ret[i] = *p->comp;	/* struct assignment */
4190 		for (j = 0; j < i; j++) {
4191 			/*
4192 			 * Test for unique component names
4193 			 */
4194 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4195 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4196 				    "unique\n", pmf, ret[j].pmc_name,
4197 				    PM_DEVICE(dip)))
4198 				goto errout;
4199 			}
4200 		}
4201 		ptail = p;
4202 		p = p->next;
4203 		phead = p;	/* errout depends on phead making sense */
4204 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4205 		kmem_free(ptail, sizeof (*ptail));
4206 	}
4207 out:
4208 	ddi_prop_free(pp);
4209 	if (lvals)
4210 		kmem_free(lvals, nelems * sizeof (int));
4211 	if (lszs)
4212 		kmem_free(lszs, nelems * sizeof (int));
4213 	if (lnames)
4214 		kmem_free(lnames, nelems * sizeof (char *));
4215 	if (np)
4216 		kmem_free(np, nelems * sizeof (int));
4217 	return (ret);
4218 
4219 errout:
4220 	e_pm_destroy_components(dip);
4221 	*errp = 1;	/* signal failure */
4222 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4223 	for (i = 0; i < nelems - 1; i++)
4224 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4225 	if (nelems != 0)
4226 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4227 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4228 	for (p = phead; p; ) {
4229 		pm_comp_t *pp;
4230 		int n;
4231 
4232 		ptail = p;
4233 		/*
4234 		 * Free component data structures
4235 		 */
4236 		pp = p->comp;
4237 		n = pp->pmc_numlevels;
4238 		if (pp->pmc_name_sz) {
4239 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4240 		}
4241 		if (pp->pmc_lnames_sz) {
4242 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4243 		}
4244 		if (pp->pmc_lnames) {
4245 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4246 		}
4247 		if (pp->pmc_thresh) {
4248 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4249 		}
4250 		if (pp->pmc_lvals) {
4251 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4252 		}
4253 		p = ptail->next;
4254 		kmem_free(ptail, sizeof (*ptail));
4255 	}
4256 	if (ret != NULL)
4257 		kmem_free(ret, components * sizeof (pm_comp_t));
4258 	ret = NULL;
4259 	goto out;
4260 }
4261 
4262 /*
4263  * Set threshold values for a devices components by dividing the target
4264  * threshold (base) by the number of transitions and assign each transition
4265  * that threshold.  This will get the entire device down in the target time if
4266  * all components are idle and even if there are dependencies among components.
4267  *
4268  * Devices may well get powered all the way down before the target time, but
4269  * at least the EPA will be happy.
4270  */
4271 void
4272 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4273 {
4274 	PMD_FUNC(pmf, "set_device_threshold")
4275 	int target_threshold = (base * 95) / 100;
4276 	int level, comp;		/* loop counters */
4277 	int transitions = 0;
4278 	int ncomp = PM_NUMCMPTS(dip);
4279 	int thresh;
4280 	int remainder;
4281 	pm_comp_t *pmc;
4282 	int i, circ;
4283 
4284 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4285 	PM_LOCK_DIP(dip);
4286 	/*
4287 	 * First we handle the easy one.  If we're setting the default
4288 	 * threshold for a node with children, then we set it to the
4289 	 * default nexus threshold (currently 0) and mark it as default
4290 	 * nexus threshold instead
4291 	 */
4292 	if (PM_IS_NEXUS(dip)) {
4293 		if (flag == PMC_DEF_THRESH) {
4294 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4295 			    PM_DEVICE(dip)))
4296 			thresh = pm_default_nexus_threshold;
4297 			for (comp = 0; comp < ncomp; comp++) {
4298 				pmc = &PM_CP(dip, comp)->pmc_comp;
4299 				for (level = 1; level < pmc->pmc_numlevels;
4300 				    level++) {
4301 					pmc->pmc_thresh[level] = thresh;
4302 				}
4303 			}
4304 			DEVI(dip)->devi_pm_dev_thresh =
4305 			    pm_default_nexus_threshold;
4306 			/*
4307 			 * If the nexus node is being reconfigured back to
4308 			 * the default threshold, adjust the notlowest count.
4309 			 */
4310 			if (DEVI(dip)->devi_pm_flags &
4311 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4312 				PM_LOCK_POWER(dip, &circ);
4313 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4314 					if (PM_CURPOWER(dip, i) == 0)
4315 						continue;
4316 					mutex_enter(&pm_compcnt_lock);
4317 					ASSERT(pm_comps_notlowest);
4318 					pm_comps_notlowest--;
4319 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4320 					    "notlowest to %d\n", pmf,
4321 					    PM_DEVICE(dip), pm_comps_notlowest))
4322 					if (pm_comps_notlowest == 0)
4323 						pm_ppm_notify_all_lowest(dip,
4324 						    PM_ALL_LOWEST);
4325 					mutex_exit(&pm_compcnt_lock);
4326 				}
4327 				PM_UNLOCK_POWER(dip, circ);
4328 			}
4329 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4330 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4331 			PM_UNLOCK_DIP(dip);
4332 			return;
4333 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4334 			/*
4335 			 * If the nexus node is being configured for a
4336 			 * non-default threshold, include that node in
4337 			 * the notlowest accounting.
4338 			 */
4339 			PM_LOCK_POWER(dip, &circ);
4340 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4341 				if (PM_CURPOWER(dip, i) == 0)
4342 					continue;
4343 				mutex_enter(&pm_compcnt_lock);
4344 				if (pm_comps_notlowest == 0)
4345 					pm_ppm_notify_all_lowest(dip,
4346 					    PM_NOT_ALL_LOWEST);
4347 				pm_comps_notlowest++;
4348 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4349 				    "notlowest to %d\n", pmf,
4350 				    PM_DEVICE(dip), pm_comps_notlowest))
4351 				mutex_exit(&pm_compcnt_lock);
4352 			}
4353 			PM_UNLOCK_POWER(dip, circ);
4354 		}
4355 	}
4356 	/*
4357 	 * Compute the total number of transitions for all components
4358 	 * of the device.  Distribute the threshold evenly over them
4359 	 */
4360 	for (comp = 0; comp < ncomp; comp++) {
4361 		pmc = &PM_CP(dip, comp)->pmc_comp;
4362 		ASSERT(pmc->pmc_numlevels > 1);
4363 		transitions += pmc->pmc_numlevels - 1;
4364 	}
4365 	ASSERT(transitions);
4366 	thresh = target_threshold / transitions;
4367 
4368 	for (comp = 0; comp < ncomp; comp++) {
4369 		pmc = &PM_CP(dip, comp)->pmc_comp;
4370 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4371 			pmc->pmc_thresh[level] = thresh;
4372 		}
4373 	}
4374 
4375 #ifdef DEBUG
4376 	for (comp = 0; comp < ncomp; comp++) {
4377 		pmc = &PM_CP(dip, comp)->pmc_comp;
4378 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4379 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4380 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4381 			    comp, level, pmc->pmc_thresh[level]))
4382 		}
4383 	}
4384 #endif
4385 	/*
4386 	 * Distribute any remainder till they are all gone
4387 	 */
4388 	remainder = target_threshold - thresh * transitions;
4389 	level = 1;
4390 #ifdef DEBUG
4391 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4392 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4393 	    transitions))
4394 #endif
4395 	while (remainder > 0) {
4396 		comp = 0;
4397 		while (remainder && (comp < ncomp)) {
4398 			pmc = &PM_CP(dip, comp)->pmc_comp;
4399 			if (level < pmc->pmc_numlevels) {
4400 				pmc->pmc_thresh[level] += 1;
4401 				remainder--;
4402 			}
4403 			comp++;
4404 		}
4405 		level++;
4406 	}
4407 #ifdef DEBUG
4408 	for (comp = 0; comp < ncomp; comp++) {
4409 		pmc = &PM_CP(dip, comp)->pmc_comp;
4410 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4411 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4412 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4413 			    comp, level, pmc->pmc_thresh[level]))
4414 		}
4415 	}
4416 #endif
4417 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4418 	DEVI(dip)->devi_pm_dev_thresh = base;
4419 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4420 	DEVI(dip)->devi_pm_flags |= flag;
4421 	PM_UNLOCK_DIP(dip);
4422 }
4423 
4424 /*
4425  * Called when there is no old-style platform power management driver
4426  */
4427 static int
4428 ddi_no_platform_power(power_req_t *req)
4429 {
4430 	_NOTE(ARGUNUSED(req))
4431 	return (DDI_FAILURE);
4432 }
4433 
4434 /*
4435  * This function calls the entry point supplied by the platform-specific
4436  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4437  * The use of global for getting the  function name from platform-specific
4438  * pm driver is not ideal, but it is simple and efficient.
4439  * The previous property lookup was being done in the idle loop on swift
4440  * systems without pmc chips and hurt deskbench performance as well as
4441  * violating scheduler locking rules
4442  */
4443 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4444 
4445 /*
4446  * Old obsolete interface for a device to request a power change (but only
4447  * an increase in power)
4448  */
4449 int
4450 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4451 {
4452 	return (pm_raise_power(dip, cmpt, level));
4453 }
4454 
4455 /*
4456  * The old obsolete interface to platform power management.  Only used by
4457  * Gypsy platform and APM on X86.
4458  */
4459 int
4460 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4461 {
4462 	power_req_t	request;
4463 
4464 	request.request_type = PMR_SET_POWER;
4465 	request.req.set_power_req.who = dip;
4466 	request.req.set_power_req.cmpt = pm_cmpt;
4467 	request.req.set_power_req.level = pm_level;
4468 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4469 }
4470 
4471 /*
4472  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4473  * passed.  Returns true if subsequent processing could result in power being
4474  * removed from the device.  The arg is not currently used because it is
4475  * implicit in the operation of cpr/DR.
4476  */
4477 int
4478 ddi_removing_power(dev_info_t *dip)
4479 {
4480 	_NOTE(ARGUNUSED(dip))
4481 	return (pm_powering_down);
4482 }
4483 
4484 /*
4485  * Returns true if a device indicates that its parent handles suspend/resume
4486  * processing for it.
4487  */
4488 int
4489 e_ddi_parental_suspend_resume(dev_info_t *dip)
4490 {
4491 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4492 }
4493 
4494 /*
4495  * Called for devices which indicate that their parent does suspend/resume
4496  * handling for them
4497  */
4498 int
4499 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4500 {
4501 	power_req_t	request;
4502 	request.request_type = PMR_SUSPEND;
4503 	request.req.suspend_req.who = dip;
4504 	request.req.suspend_req.cmd = cmd;
4505 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4506 }
4507 
4508 /*
4509  * Called for devices which indicate that their parent does suspend/resume
4510  * handling for them
4511  */
4512 int
4513 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4514 {
4515 	power_req_t	request;
4516 	request.request_type = PMR_RESUME;
4517 	request.req.resume_req.who = dip;
4518 	request.req.resume_req.cmd = cmd;
4519 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4520 }
4521 
4522 /*
4523  * Old obsolete exported interface for drivers to create components.
4524  * This is now handled by exporting the pm-components property.
4525  */
4526 int
4527 pm_create_components(dev_info_t *dip, int num_components)
4528 {
4529 	PMD_FUNC(pmf, "pm_create_components")
4530 
4531 	if (num_components < 1)
4532 		return (DDI_FAILURE);
4533 
4534 	if (!DEVI_IS_ATTACHING(dip)) {
4535 		return (DDI_FAILURE);
4536 	}
4537 
4538 	/* don't need to lock dip because attach is single threaded */
4539 	if (DEVI(dip)->devi_pm_components) {
4540 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4541 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4542 		return (DDI_FAILURE);
4543 	}
4544 	e_pm_create_components(dip, num_components);
4545 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4546 	e_pm_default_components(dip, num_components);
4547 	return (DDI_SUCCESS);
4548 }
4549 
4550 /*
4551  * Obsolete interface previously called by drivers to destroy their components
4552  * at detach time.  This is now done automatically.  However, we need to keep
4553  * this for the old drivers.
4554  */
4555 void
4556 pm_destroy_components(dev_info_t *dip)
4557 {
4558 	PMD_FUNC(pmf, "pm_destroy_components")
4559 	dev_info_t *pdip = ddi_get_parent(dip);
4560 
4561 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4562 	    PM_DEVICE(dip)))
4563 	ASSERT(DEVI_IS_DETACHING(dip));
4564 #ifdef DEBUG
4565 	if (!PM_ISBC(dip))
4566 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4567 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4568 		    PM_ADDR(dip));
4569 #endif
4570 	/*
4571 	 * We ignore this unless this is an old-style driver, except for
4572 	 * printing the message above
4573 	 */
4574 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4575 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4576 		    PM_DEVICE(dip)))
4577 		return;
4578 	}
4579 	ASSERT(PM_GET_PM_INFO(dip));
4580 
4581 	/*
4582 	 * pm_unmanage will clear info pointer later, after dealing with
4583 	 * dependencies
4584 	 */
4585 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4586 	/*
4587 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4588 	 * Parents that get notification are not adjusted because their
4589 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4590 	 */
4591 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4592 		pm_rele_power(pdip);
4593 #ifdef DEBUG
4594 	else {
4595 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4596 		    pmf, PM_DEVICE(dip)))
4597 	}
4598 #endif
4599 	e_pm_destroy_components(dip);
4600 	/*
4601 	 * Forget we ever knew anything about the components of this  device
4602 	 */
4603 	DEVI(dip)->devi_pm_flags &=
4604 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4605 }
4606 
4607 /*
4608  * Exported interface for a driver to set a component busy.
4609  */
4610 int
4611 pm_busy_component(dev_info_t *dip, int cmpt)
4612 {
4613 	struct pm_component *cp;
4614 
4615 	ASSERT(dip != NULL);
4616 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4617 		return (DDI_FAILURE);
4618 	PM_LOCK_BUSY(dip);
4619 	cp->pmc_busycount++;
4620 	cp->pmc_timestamp = 0;
4621 	PM_UNLOCK_BUSY(dip);
4622 	return (DDI_SUCCESS);
4623 }
4624 
4625 /*
4626  * Exported interface for a driver to set a component idle.
4627  */
4628 int
4629 pm_idle_component(dev_info_t *dip, int cmpt)
4630 {
4631 	PMD_FUNC(pmf, "pm_idle_component")
4632 	struct pm_component *cp;
4633 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4634 
4635 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4636 		return (DDI_FAILURE);
4637 
4638 	PM_LOCK_BUSY(dip);
4639 	if (cp->pmc_busycount) {
4640 		if (--(cp->pmc_busycount) == 0)
4641 			cp->pmc_timestamp = gethrestime_sec();
4642 	} else {
4643 		cp->pmc_timestamp = gethrestime_sec();
4644 	}
4645 
4646 	PM_UNLOCK_BUSY(dip);
4647 
4648 	/*
4649 	 * if device becomes idle during idle down period, try scan it down
4650 	 */
4651 	if (scanp && PM_IS_PID(dip)) {
4652 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4653 		    PM_DEVICE(dip)))
4654 		pm_rescan(dip);
4655 		return (DDI_SUCCESS);
4656 	}
4657 
4658 	/*
4659 	 * handle scan not running with nexus threshold == 0
4660 	 */
4661 
4662 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4663 		pm_rescan(dip);
4664 	}
4665 
4666 	return (DDI_SUCCESS);
4667 }
4668 
4669 /*
4670  * This is the old  obsolete interface called by drivers to set their normal
4671  * power.  Thus we can't fix its behavior or return a value.
4672  * This functionality is replaced by the pm-component property.
4673  * We'll only get components destroyed while no power management is
4674  * going on (and the device is detached), so we don't need a mutex here
4675  */
4676 void
4677 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4678 {
4679 	PMD_FUNC(pmf, "set_normal_power")
4680 #ifdef DEBUG
4681 	if (!PM_ISBC(dip))
4682 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4683 		    "(driver exporting pm-components property) ignored",
4684 		    PM_NAME(dip), PM_ADDR(dip));
4685 #endif
4686 	if (PM_ISBC(dip)) {
4687 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4688 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4689 		e_pm_set_max_power(dip, comp, level);
4690 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4691 	}
4692 }
4693 
4694 /*
4695  * Called on a successfully detached driver to free pm resources
4696  */
4697 static void
4698 pm_stop(dev_info_t *dip)
4699 {
4700 	PMD_FUNC(pmf, "stop")
4701 	dev_info_t *pdip = ddi_get_parent(dip);
4702 
4703 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4704 	/* stopping scan, destroy scan data structure */
4705 	if (!PM_ISBC(dip)) {
4706 		pm_scan_stop(dip);
4707 		pm_scan_fini(dip);
4708 	}
4709 
4710 	if (PM_GET_PM_INFO(dip) != NULL) {
4711 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4712 			/*
4713 			 * Old style driver may have called
4714 			 * pm_destroy_components already, but just in case ...
4715 			 */
4716 			e_pm_destroy_components(dip);
4717 		} else {
4718 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4719 			    pmf, PM_DEVICE(dip)))
4720 		}
4721 	} else {
4722 		if (PM_NUMCMPTS(dip))
4723 			e_pm_destroy_components(dip);
4724 		else {
4725 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4726 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4727 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4728 					pm_rele_power(pdip);
4729 				} else if (pdip &&
4730 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4731 					(void) mdi_power(pdip,
4732 					    MDI_PM_RELE_POWER,
4733 					    (void *)dip, NULL, 0);
4734 				}
4735 			}
4736 		}
4737 	}
4738 }
4739 
4740 /*
4741  * The node is the subject of a reparse pm props ioctl. Throw away the old
4742  * info and start over.
4743  */
4744 int
4745 e_new_pm_props(dev_info_t *dip)
4746 {
4747 	if (PM_GET_PM_INFO(dip) != NULL) {
4748 		pm_stop(dip);
4749 
4750 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4751 			return (DDI_FAILURE);
4752 		}
4753 	}
4754 	e_pm_props(dip);
4755 	return (DDI_SUCCESS);
4756 }
4757 
4758 /*
4759  * Device has been attached, so process its pm properties
4760  */
4761 void
4762 e_pm_props(dev_info_t *dip)
4763 {
4764 	char *pp;
4765 	int len;
4766 	int flags = 0;
4767 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4768 
4769 	/*
4770 	 * It doesn't matter if we do this more than once, we should always
4771 	 * get the same answers, and if not, then the last one in is the
4772 	 * best one.
4773 	 */
4774 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4775 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4776 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4777 			flags = PMC_NEEDS_SR;
4778 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4779 			flags = PMC_NO_SR;
4780 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4781 			flags = PMC_PARENTAL_SR;
4782 		} else {
4783 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4784 			    "%s property value '%s'", PM_NAME(dip),
4785 			    PM_ADDR(dip), "pm-hardware-state", pp);
4786 		}
4787 		kmem_free(pp, len);
4788 	}
4789 	/*
4790 	 * This next segment (PMC_WANTS_NOTIFY) is in
4791 	 * support of nexus drivers which will want to be involved in
4792 	 * (or at least notified of) their child node's power level transitions.
4793 	 * "pm-want-child-notification?" is defined by the parent.
4794 	 */
4795 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4796 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4797 		flags |= PMC_WANTS_NOTIFY;
4798 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4799 	    dip, propflag, "pm-want-child-notification?"));
4800 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4801 	    "no-involuntary-power-cycles"))
4802 		flags |= PMC_NO_INVOL;
4803 	/*
4804 	 * Is the device a CPU device?
4805 	 */
4806 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4807 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4808 		if (strcmp(pp, "CPU") == 0) {
4809 			flags |= PMC_CPU_DEVICE;
4810 		} else {
4811 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4812 			    "%s property value '%s'", PM_NAME(dip),
4813 			    PM_ADDR(dip), "pm-class", pp);
4814 		}
4815 		kmem_free(pp, len);
4816 	}
4817 	/* devfs single threads us */
4818 	DEVI(dip)->devi_pm_flags |= flags;
4819 }
4820 
4821 /*
4822  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4823  * driver which has claimed a node.
4824  * Sets old_power in arg struct.
4825  */
4826 static int
4827 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4828     ddi_ctl_enum_t ctlop, void *arg, void *result)
4829 {
4830 	_NOTE(ARGUNUSED(dip))
4831 	PMD_FUNC(pmf, "ctlops")
4832 	power_req_t *reqp = (power_req_t *)arg;
4833 	int retval;
4834 	dev_info_t *target_dip;
4835 	int new_level, old_level, cmpt;
4836 #ifdef PMDDEBUG
4837 	char *format;
4838 #endif
4839 
4840 	/*
4841 	 * The interface for doing the actual power level changes is now
4842 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4843 	 * different platform-specific power control drivers.
4844 	 *
4845 	 * This driver implements the "default" version of this interface.
4846 	 * If no ppm driver has been installed then this interface is called
4847 	 * instead.
4848 	 */
4849 	ASSERT(dip == NULL);
4850 	switch (ctlop) {
4851 	case DDI_CTLOPS_POWER:
4852 		switch (reqp->request_type) {
4853 		case PMR_PPM_SET_POWER:
4854 		{
4855 			target_dip = reqp->req.ppm_set_power_req.who;
4856 			ASSERT(target_dip == rdip);
4857 			new_level = reqp->req.ppm_set_power_req.new_level;
4858 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4859 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4860 			old_level = PM_CURPOWER(target_dip, cmpt);
4861 			reqp->req.ppm_set_power_req.old_level = old_level;
4862 			retval = pm_power(target_dip, cmpt, new_level);
4863 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4864 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4865 			    old_level, new_level, (retval == DDI_SUCCESS ?
4866 			    "chd" : "no chg")))
4867 			return (retval);
4868 		}
4869 
4870 		case PMR_PPM_PRE_DETACH:
4871 		case PMR_PPM_POST_DETACH:
4872 		case PMR_PPM_PRE_ATTACH:
4873 		case PMR_PPM_POST_ATTACH:
4874 		case PMR_PPM_PRE_PROBE:
4875 		case PMR_PPM_POST_PROBE:
4876 		case PMR_PPM_PRE_RESUME:
4877 		case PMR_PPM_INIT_CHILD:
4878 		case PMR_PPM_UNINIT_CHILD:
4879 #ifdef PMDDEBUG
4880 			switch (reqp->request_type) {
4881 				case PMR_PPM_PRE_DETACH:
4882 					format = "%s: PMR_PPM_PRE_DETACH "
4883 					    "%s@%s(%s#%d)\n";
4884 					break;
4885 				case PMR_PPM_POST_DETACH:
4886 					format = "%s: PMR_PPM_POST_DETACH "
4887 					    "%s@%s(%s#%d) rets %d\n";
4888 					break;
4889 				case PMR_PPM_PRE_ATTACH:
4890 					format = "%s: PMR_PPM_PRE_ATTACH "
4891 					    "%s@%s(%s#%d)\n";
4892 					break;
4893 				case PMR_PPM_POST_ATTACH:
4894 					format = "%s: PMR_PPM_POST_ATTACH "
4895 					    "%s@%s(%s#%d) rets %d\n";
4896 					break;
4897 				case PMR_PPM_PRE_PROBE:
4898 					format = "%s: PMR_PPM_PRE_PROBE "
4899 					    "%s@%s(%s#%d)\n";
4900 					break;
4901 				case PMR_PPM_POST_PROBE:
4902 					format = "%s: PMR_PPM_POST_PROBE "
4903 					    "%s@%s(%s#%d) rets %d\n";
4904 					break;
4905 				case PMR_PPM_PRE_RESUME:
4906 					format = "%s: PMR_PPM_PRE_RESUME "
4907 					    "%s@%s(%s#%d) rets %d\n";
4908 					break;
4909 				case PMR_PPM_INIT_CHILD:
4910 					format = "%s: PMR_PPM_INIT_CHILD "
4911 					    "%s@%s(%s#%d)\n";
4912 					break;
4913 				case PMR_PPM_UNINIT_CHILD:
4914 					format = "%s: PMR_PPM_UNINIT_CHILD "
4915 					    "%s@%s(%s#%d)\n";
4916 					break;
4917 				default:
4918 					break;
4919 			}
4920 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4921 			    reqp->req.ppm_config_req.result))
4922 #endif
4923 			return (DDI_SUCCESS);
4924 
4925 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4926 			/*
4927 			 * Nothing for us to do
4928 			 */
4929 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4930 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4931 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4932 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4933 			    reqp->req.ppm_notify_level_req.cmpt,
4934 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4935 			    reqp->req.ppm_notify_level_req.cmpt),
4936 			    reqp->req.ppm_notify_level_req.new_level))
4937 			return (DDI_SUCCESS);
4938 
4939 		case PMR_PPM_UNMANAGE:
4940 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4941 			    pmf, PM_DEVICE(rdip)))
4942 			return (DDI_SUCCESS);
4943 
4944 		case PMR_PPM_LOCK_POWER:
4945 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4946 			    reqp->req.ppm_lock_power_req.circp);
4947 			return (DDI_SUCCESS);
4948 
4949 		case PMR_PPM_UNLOCK_POWER:
4950 			pm_unlock_power_single(
4951 			    reqp->req.ppm_unlock_power_req.who,
4952 			    reqp->req.ppm_unlock_power_req.circ);
4953 			return (DDI_SUCCESS);
4954 
4955 		case PMR_PPM_TRY_LOCK_POWER:
4956 			*(int *)result = pm_try_locking_power_single(
4957 			    reqp->req.ppm_lock_power_req.who,
4958 			    reqp->req.ppm_lock_power_req.circp);
4959 			return (DDI_SUCCESS);
4960 
4961 		case PMR_PPM_POWER_LOCK_OWNER:
4962 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4963 			ASSERT(target_dip == rdip);
4964 			reqp->req.ppm_power_lock_owner_req.owner =
4965 			    DEVI(rdip)->devi_busy_thread;
4966 			return (DDI_SUCCESS);
4967 		default:
4968 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4969 			return (DDI_FAILURE);
4970 		}
4971 
4972 	default:
4973 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4974 		return (DDI_FAILURE);
4975 	}
4976 }
4977 
4978 /*
4979  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4980  * power_ops struct for this functionality instead?
4981  * However, we only ever do this on a ppm driver.
4982  */
4983 int
4984 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4985 {
4986 	int (*fp)();
4987 
4988 	/* if no ppm handler, call the default routine */
4989 	if (d == NULL) {
4990 		return (pm_default_ctlops(d, r, op, a, v));
4991 	}
4992 	if (!d || !r)
4993 		return (DDI_FAILURE);
4994 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
4995 	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
4996 
4997 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
4998 	return ((*fp)(d, r, op, a, v));
4999 }
5000 
5001 /*
5002  * Called on a node when attach completes or the driver makes its first pm
5003  * call (whichever comes first).
5004  * In the attach case, device may not be power manageable at all.
5005  * Don't need to lock the dip because we're single threaded by the devfs code
5006  */
5007 static int
5008 pm_start(dev_info_t *dip)
5009 {
5010 	PMD_FUNC(pmf, "start")
5011 	int ret;
5012 	dev_info_t *pdip = ddi_get_parent(dip);
5013 	int e_pm_manage(dev_info_t *, int);
5014 	void pm_noinvol_specd(dev_info_t *dip);
5015 
5016 	e_pm_props(dip);
5017 	pm_noinvol_specd(dip);
5018 	/*
5019 	 * If this dip has already been processed, don't mess with it
5020 	 * (but decrement the speculative count we did above, as whatever
5021 	 * code put it under pm already will have dealt with it)
5022 	 */
5023 	if (PM_GET_PM_INFO(dip)) {
5024 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
5025 		    pmf, PM_DEVICE(dip)))
5026 		return (0);
5027 	}
5028 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
5029 
5030 	if (PM_GET_PM_INFO(dip) == NULL) {
5031 		/*
5032 		 * keep the kidsupcount increment as is
5033 		 */
5034 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
5035 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
5036 			pm_hold_power(pdip);
5037 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
5038 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
5039 			    (void *)dip, NULL, 0);
5040 		}
5041 
5042 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
5043 		    "left up\n", pmf, PM_DEVICE(dip)))
5044 	}
5045 
5046 	return (ret);
5047 }
5048 
5049 /*
5050  * Keep a list of recorded thresholds.  For now we just keep a list and
5051  * search it linearly.  We don't expect too many entries.  Can always hash it
5052  * later if we need to.
5053  */
5054 void
5055 pm_record_thresh(pm_thresh_rec_t *rp)
5056 {
5057 	pm_thresh_rec_t *pptr, *ptr;
5058 
5059 	ASSERT(*rp->ptr_physpath);
5060 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
5061 	for (pptr = NULL, ptr = pm_thresh_head;
5062 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
5063 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
5064 			/* replace this one */
5065 			rp->ptr_next = ptr->ptr_next;
5066 			if (pptr) {
5067 				pptr->ptr_next = rp;
5068 			} else {
5069 				pm_thresh_head = rp;
5070 			}
5071 			rw_exit(&pm_thresh_rwlock);
5072 			kmem_free(ptr, ptr->ptr_size);
5073 			return;
5074 		}
5075 		continue;
5076 	}
5077 	/*
5078 	 * There was not a match in the list, insert this one in front
5079 	 */
5080 	if (pm_thresh_head) {
5081 		rp->ptr_next = pm_thresh_head;
5082 		pm_thresh_head = rp;
5083 	} else {
5084 		rp->ptr_next = NULL;
5085 		pm_thresh_head = rp;
5086 	}
5087 	rw_exit(&pm_thresh_rwlock);
5088 }
5089 
5090 /*
5091  * Create a new dependency record and hang a new dependency entry off of it
5092  */
5093 pm_pdr_t *
5094 newpdr(char *kept, char *keeps, int isprop)
5095 {
5096 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5097 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5098 	p->pdr_size = size;
5099 	p->pdr_isprop = isprop;
5100 	p->pdr_kept_paths = NULL;
5101 	p->pdr_kept_count = 0;
5102 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5103 	(void) strcpy(p->pdr_kept, kept);
5104 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5105 	(void) strcpy(p->pdr_keeper, keeps);
5106 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5107 	    (intptr_t)p + size);
5108 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5109 	    (intptr_t)p + size);
5110 	return (p);
5111 }
5112 
5113 /*
5114  * Keep a list of recorded dependencies.  We only keep the
5115  * keeper -> kept list for simplification. At this point We do not
5116  * care about whether the devices are attached or not yet,
5117  * this would be done in pm_keeper() and pm_kept().
5118  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5119  * and it is up to the user to issue the ioctl again if they want it
5120  * (e.g. pmconfig)
5121  * Returns true if dependency already exists in the list.
5122  */
5123 int
5124 pm_record_keeper(char *kept, char *keeper, int isprop)
5125 {
5126 	PMD_FUNC(pmf, "record_keeper")
5127 	pm_pdr_t *npdr, *ppdr, *pdr;
5128 
5129 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5130 	ASSERT(kept && keeper);
5131 #ifdef DEBUG
5132 	if (pm_debug & PMD_KEEPS)
5133 		prdeps("pm_record_keeper entry");
5134 #endif
5135 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5136 	    ppdr = pdr, pdr = pdr->pdr_next) {
5137 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5138 		    pdr->pdr_keeper))
5139 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5140 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5141 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5142 			return (1);
5143 		}
5144 	}
5145 	/*
5146 	 * We did not find any match, so we have to make an entry
5147 	 */
5148 	npdr = newpdr(kept, keeper, isprop);
5149 	if (ppdr) {
5150 		ASSERT(ppdr->pdr_next == NULL);
5151 		ppdr->pdr_next = npdr;
5152 	} else {
5153 		ASSERT(pm_dep_head == NULL);
5154 		pm_dep_head = npdr;
5155 	}
5156 #ifdef DEBUG
5157 	if (pm_debug & PMD_KEEPS)
5158 		prdeps("pm_record_keeper after new record");
5159 #endif
5160 	if (!isprop)
5161 		pm_unresolved_deps++;
5162 	else
5163 		pm_prop_deps++;
5164 	return (0);
5165 }
5166 
5167 /*
5168  * Look up this device in the set of devices we've seen ioctls for
5169  * to see if we are holding a threshold spec for it.  If so, make it so.
5170  * At ioctl time, we were given the physical path of the device.
5171  */
5172 int
5173 pm_thresh_specd(dev_info_t *dip)
5174 {
5175 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5176 	char *path = 0;
5177 	char pathbuf[MAXNAMELEN];
5178 	pm_thresh_rec_t *rp;
5179 
5180 	path = ddi_pathname(dip, pathbuf);
5181 
5182 	rw_enter(&pm_thresh_rwlock, RW_READER);
5183 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5184 		if (strcmp(rp->ptr_physpath, path) != 0)
5185 			continue;
5186 		pm_apply_recorded_thresh(dip, rp);
5187 		rw_exit(&pm_thresh_rwlock);
5188 		return (1);
5189 	}
5190 	rw_exit(&pm_thresh_rwlock);
5191 	return (0);
5192 }
5193 
5194 static int
5195 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5196 {
5197 	PMD_FUNC(pmf, "set_keeping")
5198 	pm_info_t *kept_info;
5199 	int j, up = 0, circ;
5200 	void prdeps(char *);
5201 
5202 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5203 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5204 #ifdef DEBUG
5205 	if (pm_debug & PMD_KEEPS)
5206 		prdeps("Before PAD\n");
5207 #endif
5208 	ASSERT(keeper != kept);
5209 	if (PM_GET_PM_INFO(keeper) == NULL) {
5210 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5211 		    "%s@%s(%s#%d), but the former is not power managed",
5212 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5213 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5214 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5215 		return (0);
5216 	}
5217 	kept_info = PM_GET_PM_INFO(kept);
5218 	ASSERT(kept_info);
5219 	PM_LOCK_POWER(keeper, &circ);
5220 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5221 		if (PM_CURPOWER(keeper, j)) {
5222 			up++;
5223 			break;
5224 		}
5225 	}
5226 	if (up) {
5227 		/* Bringup and maintain a hold on the kept */
5228 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5229 		    PM_DEVICE(kept)))
5230 		bring_pmdep_up(kept, 1);
5231 	}
5232 	PM_UNLOCK_POWER(keeper, circ);
5233 #ifdef DEBUG
5234 	if (pm_debug & PMD_KEEPS)
5235 		prdeps("After PAD\n");
5236 #endif
5237 	return (1);
5238 }
5239 
5240 /*
5241  * Should this device keep up another device?
5242  * Look up this device in the set of devices we've seen ioctls for
5243  * to see if we are holding a dependency spec for it.  If so, make it so.
5244  * Because we require the kept device to be attached already in order to
5245  * make the list entry (and hold it), we only need to look for keepers.
5246  * At ioctl time, we were given the physical path of the device.
5247  */
5248 int
5249 pm_keeper(char *keeper)
5250 {
5251 	PMD_FUNC(pmf, "keeper")
5252 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5253 	dev_info_t *dip;
5254 	pm_pdr_t *dp;
5255 	dev_info_t *kept = NULL;
5256 	int ret = 0;
5257 	int i;
5258 
5259 	if (!pm_unresolved_deps && !pm_prop_deps)
5260 		return (0);
5261 	ASSERT(keeper != NULL);
5262 	dip = pm_name_to_dip(keeper, 1);
5263 	if (dip == NULL)
5264 		return (0);
5265 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5266 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5267 		if (!dp->pdr_isprop) {
5268 			if (!pm_unresolved_deps)
5269 				continue;
5270 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5271 			if (dp->pdr_satisfied) {
5272 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5273 				continue;
5274 			}
5275 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5276 				ret += pm_apply_recorded_dep(dip, dp);
5277 			}
5278 		} else {
5279 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5280 				continue;
5281 			for (i = 0; i < dp->pdr_kept_count; i++) {
5282 				if (dp->pdr_kept_paths[i] == NULL)
5283 					continue;
5284 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5285 				if (kept == NULL)
5286 					continue;
5287 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5288 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5289 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5290 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5291 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5292 				    dp->pdr_kept_count))
5293 				if (kept != dip) {
5294 					ret += pm_set_keeping(dip, kept);
5295 				}
5296 				ddi_release_devi(kept);
5297 			}
5298 
5299 		}
5300 	}
5301 	ddi_release_devi(dip);
5302 	return (ret);
5303 }
5304 
5305 /*
5306  * Should this device be kept up by another device?
5307  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5308  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5309  * kept device lists.
5310  */
5311 static int
5312 pm_kept(char *keptp)
5313 {
5314 	PMD_FUNC(pmf, "kept")
5315 	pm_pdr_t *dp;
5316 	int found = 0;
5317 	int ret = 0;
5318 	dev_info_t *keeper;
5319 	dev_info_t *kept;
5320 	size_t length;
5321 	int i;
5322 	char **paths;
5323 	char *path;
5324 
5325 	ASSERT(keptp != NULL);
5326 	kept = pm_name_to_dip(keptp, 1);
5327 	if (kept == NULL)
5328 		return (0);
5329 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5330 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5331 		if (dp->pdr_isprop) {
5332 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5333 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5334 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5335 				/*
5336 				 * Dont allow self dependency.
5337 				 */
5338 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5339 					continue;
5340 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5341 				if (keeper == NULL)
5342 					continue;
5343 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5344 				    "%p\n", pmf, (void *)kept))
5345 #ifdef DEBUG
5346 				if (pm_debug & PMD_DEP)
5347 					prdeps("Before Adding from pm_kept\n");
5348 #endif
5349 				/*
5350 				 * Add ourselves to the dip list.
5351 				 */
5352 				if (dp->pdr_kept_count == 0) {
5353 					length = strlen(keptp) + 1;
5354 					path =
5355 					    kmem_alloc(length, KM_SLEEP);
5356 					paths = kmem_alloc(sizeof (char **),
5357 					    KM_SLEEP);
5358 					(void) strcpy(path, keptp);
5359 					paths[0] = path;
5360 					dp->pdr_kept_paths = paths;
5361 					dp->pdr_kept_count++;
5362 				} else {
5363 					/* Check to see if already on list */
5364 					for (i = 0; i < dp->pdr_kept_count;
5365 					    i++) {
5366 						if (strcmp(keptp,
5367 						    dp->pdr_kept_paths[i])
5368 						    == 0) {
5369 							found++;
5370 							break;
5371 						}
5372 					}
5373 					if (found) {
5374 						ddi_release_devi(keeper);
5375 						continue;
5376 					}
5377 					length = dp->pdr_kept_count *
5378 					    sizeof (char **);
5379 					paths = kmem_alloc(
5380 					    length + sizeof (char **),
5381 					    KM_SLEEP);
5382 					if (dp->pdr_kept_count) {
5383 						bcopy(dp->pdr_kept_paths,
5384 						    paths, length);
5385 						kmem_free(dp->pdr_kept_paths,
5386 						    length);
5387 					}
5388 					dp->pdr_kept_paths = paths;
5389 					length = strlen(keptp) + 1;
5390 					path =
5391 					    kmem_alloc(length, KM_SLEEP);
5392 					(void) strcpy(path, keptp);
5393 					dp->pdr_kept_paths[i] = path;
5394 					dp->pdr_kept_count++;
5395 				}
5396 #ifdef DEBUG
5397 				if (pm_debug & PMD_DEP)
5398 					prdeps("After from pm_kept\n");
5399 #endif
5400 				if (keeper) {
5401 					ret += pm_set_keeping(keeper, kept);
5402 					ddi_release_devi(keeper);
5403 				}
5404 			}
5405 		} else {
5406 			/*
5407 			 * pm_keeper would be called later to do
5408 			 * the actual pm_set_keeping.
5409 			 */
5410 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5411 			    pmf, (void *)kept))
5412 #ifdef DEBUG
5413 			if (pm_debug & PMD_DEP)
5414 				prdeps("Before Adding from pm_kept\n");
5415 #endif
5416 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5417 				if (dp->pdr_kept_paths == NULL) {
5418 					length = strlen(keptp) + 1;
5419 					path =
5420 					    kmem_alloc(length, KM_SLEEP);
5421 					paths = kmem_alloc(sizeof (char **),
5422 					    KM_SLEEP);
5423 					(void) strcpy(path, keptp);
5424 					paths[0] = path;
5425 					dp->pdr_kept_paths = paths;
5426 					dp->pdr_kept_count++;
5427 				}
5428 			}
5429 #ifdef DEBUG
5430 			if (pm_debug & PMD_DEP)
5431 				prdeps("After from pm_kept\n");
5432 #endif
5433 		}
5434 	}
5435 	ddi_release_devi(kept);
5436 	return (ret);
5437 }
5438 
5439 /*
5440  * Apply a recorded dependency.  dp specifies the dependency, and
5441  * keeper is already known to be the device that keeps up the other (kept) one.
5442  * We have to the whole tree for the "kept" device, then apply
5443  * the dependency (which may already be applied).
5444  */
5445 int
5446 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5447 {
5448 	PMD_FUNC(pmf, "apply_recorded_dep")
5449 	dev_info_t *kept = NULL;
5450 	int ret = 0;
5451 	char *keptp = NULL;
5452 
5453 	/*
5454 	 * Device to Device dependency can only be 1 to 1.
5455 	 */
5456 	if (dp->pdr_kept_paths == NULL)
5457 		return (0);
5458 	keptp = dp->pdr_kept_paths[0];
5459 	if (keptp == NULL)
5460 		return (0);
5461 	ASSERT(*keptp != '\0');
5462 	kept = pm_name_to_dip(keptp, 1);
5463 	if (kept == NULL)
5464 		return (0);
5465 	if (kept) {
5466 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5467 		    dp->pdr_keeper, keptp))
5468 		if (pm_set_keeping(keeper, kept)) {
5469 			ASSERT(dp->pdr_satisfied == 0);
5470 			dp->pdr_satisfied = 1;
5471 			ASSERT(pm_unresolved_deps);
5472 			pm_unresolved_deps--;
5473 			ret++;
5474 		}
5475 	}
5476 	ddi_release_devi(kept);
5477 
5478 	return (ret);
5479 }
5480 
5481 /*
5482  * Called from common/io/pm.c
5483  */
5484 int
5485 pm_cur_power(pm_component_t *cp)
5486 {
5487 	return (cur_power(cp));
5488 }
5489 
5490 /*
5491  * External interface to sanity-check a power level.
5492  */
5493 int
5494 pm_valid_power(dev_info_t *dip, int comp, int level)
5495 {
5496 	PMD_FUNC(pmf, "valid_power")
5497 
5498 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5499 		return (e_pm_valid_power(dip, comp, level));
5500 	else {
5501 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5502 		    pmf, comp, PM_NUMCMPTS(dip), level))
5503 		return (0);
5504 	}
5505 }
5506 
5507 /*
5508  * Called when a device that is direct power managed needs to change state.
5509  * This routine arranges to block the request until the process managing
5510  * the device makes the change (or some other incompatible change) or
5511  * the process closes /dev/pm.
5512  */
5513 static int
5514 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5515 {
5516 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5517 	int ret = 0;
5518 	void pm_dequeue_blocked(pm_rsvp_t *);
5519 	void pm_enqueue_blocked(pm_rsvp_t *);
5520 
5521 	ASSERT(!pm_processes_stopped);
5522 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5523 	new->pr_dip = dip;
5524 	new->pr_comp = comp;
5525 	new->pr_newlevel = newpower;
5526 	new->pr_oldlevel = oldpower;
5527 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5528 	mutex_enter(&pm_rsvp_lock);
5529 	pm_enqueue_blocked(new);
5530 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5531 	    PM_CANBLOCK_BLOCK);
5532 	PM_UNLOCK_DIP(dip);
5533 	/*
5534 	 * truss may make the cv_wait_sig return prematurely
5535 	 */
5536 	while (ret == 0) {
5537 		/*
5538 		 * Normally there will be no user context involved, but if
5539 		 * there is (e.g. we are here via an ioctl call to a driver)
5540 		 * then we should allow the process to abort the request,
5541 		 * or we get an unkillable process if the same thread does
5542 		 * PM_DIRECT_PM and pm_raise_power
5543 		 */
5544 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5545 			ret = PMP_FAIL;
5546 		} else {
5547 			ret = new->pr_retval;
5548 		}
5549 	}
5550 	pm_dequeue_blocked(new);
5551 	mutex_exit(&pm_rsvp_lock);
5552 	cv_destroy(&new->pr_cv);
5553 	kmem_free(new, sizeof (*new));
5554 	return (ret);
5555 }
5556 
5557 /*
5558  * Returns true if the process is interested in power level changes (has issued
5559  * PM_GET_STATE_CHANGE ioctl).
5560  */
5561 int
5562 pm_interest_registered(int clone)
5563 {
5564 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5565 	return (pm_interest[clone]);
5566 }
5567 
5568 static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5569 
5570 /*
5571  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5572  * watch all state transitions (dip == NULL).  Set up data
5573  * structs to communicate with process about state changes.
5574  */
5575 void
5576 pm_register_watcher(int clone, dev_info_t *dip)
5577 {
5578 	pscc_t	*p;
5579 	psce_t	*psce;
5580 
5581 	/*
5582 	 * We definitely need a control struct, then we have to search to see
5583 	 * there is already an entries struct (in the dip != NULL case).
5584 	 */
5585 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5586 	pscc->pscc_clone = clone;
5587 	pscc->pscc_dip = dip;
5588 
5589 	if (dip) {
5590 		int found = 0;
5591 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5592 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5593 			/*
5594 			 * Already an entry for this clone, so just use it
5595 			 * for the new one (for the case where a single
5596 			 * process is watching multiple devices)
5597 			 */
5598 			if (p->pscc_clone == clone) {
5599 				pscc->pscc_entries = p->pscc_entries;
5600 				pscc->pscc_entries->psce_references++;
5601 				found++;
5602 				break;
5603 			}
5604 		}
5605 		if (!found) {		/* create a new one */
5606 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5607 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5608 			psce->psce_first =
5609 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5610 			    KM_SLEEP);
5611 			psce->psce_in = psce->psce_out = psce->psce_first;
5612 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5613 			psce->psce_references = 1;
5614 			pscc->pscc_entries = psce;
5615 		}
5616 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5617 		rw_exit(&pm_pscc_direct_rwlock);
5618 	} else {
5619 		ASSERT(!pm_interest_registered(clone));
5620 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5621 #ifdef DEBUG
5622 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5623 			/*
5624 			 * Should not be an entry for this clone!
5625 			 */
5626 			ASSERT(p->pscc_clone != clone);
5627 		}
5628 #endif
5629 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5630 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5631 		    PSCCOUNT, KM_SLEEP);
5632 		psce->psce_in = psce->psce_out = psce->psce_first;
5633 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5634 		psce->psce_references = 1;
5635 		pscc->pscc_entries = psce;
5636 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5637 		pm_interest[clone] = 1;
5638 		rw_exit(&pm_pscc_interest_rwlock);
5639 	}
5640 }
5641 
5642 /*
5643  * Remove the given entry from the blocked list
5644  */
5645 void
5646 pm_dequeue_blocked(pm_rsvp_t *p)
5647 {
5648 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5649 	if (pm_blocked_list == p) {
5650 		ASSERT(p->pr_prev == NULL);
5651 		if (p->pr_next != NULL)
5652 			p->pr_next->pr_prev = NULL;
5653 		pm_blocked_list = p->pr_next;
5654 	} else {
5655 		ASSERT(p->pr_prev != NULL);
5656 		p->pr_prev->pr_next = p->pr_next;
5657 		if (p->pr_next != NULL)
5658 			p->pr_next->pr_prev = p->pr_prev;
5659 	}
5660 }
5661 
5662 /*
5663  * Remove the given control struct from the given list
5664  */
5665 static void
5666 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5667 {
5668 	if (*list == p) {
5669 		ASSERT(p->pscc_prev == NULL);
5670 		if (p->pscc_next != NULL)
5671 			p->pscc_next->pscc_prev = NULL;
5672 		*list = p->pscc_next;
5673 	} else {
5674 		ASSERT(p->pscc_prev != NULL);
5675 		p->pscc_prev->pscc_next = p->pscc_next;
5676 		if (p->pscc_next != NULL)
5677 			p->pscc_next->pscc_prev = p->pscc_prev;
5678 	}
5679 }
5680 
5681 /*
5682  * Stick the control struct specified on the front of the list
5683  */
5684 static void
5685 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5686 {
5687 	pscc_t *h;	/* entry at head of list */
5688 	if ((h = *list) == NULL) {
5689 		*list = p;
5690 		ASSERT(p->pscc_next == NULL);
5691 		ASSERT(p->pscc_prev == NULL);
5692 	} else {
5693 		p->pscc_next = h;
5694 		ASSERT(h->pscc_prev == NULL);
5695 		h->pscc_prev = p;
5696 		ASSERT(p->pscc_prev == NULL);
5697 		*list = p;
5698 	}
5699 }
5700 
5701 /*
5702  * If dip is NULL, process is closing "clone" clean up all its registrations.
5703  * Otherwise only clean up those for dip because process is just giving up
5704  * control of a direct device.
5705  */
5706 void
5707 pm_deregister_watcher(int clone, dev_info_t *dip)
5708 {
5709 	pscc_t	*p, *pn;
5710 	psce_t	*psce;
5711 	int found = 0;
5712 
5713 	if (dip == NULL) {
5714 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5715 		for (p = pm_pscc_interest; p; p = pn) {
5716 			pn = p->pscc_next;
5717 			if (p->pscc_clone == clone) {
5718 				pm_dequeue_pscc(p, &pm_pscc_interest);
5719 				psce = p->pscc_entries;
5720 				ASSERT(psce->psce_references == 1);
5721 				mutex_destroy(&psce->psce_lock);
5722 				kmem_free(psce->psce_first,
5723 				    sizeof (pm_state_change_t) * PSCCOUNT);
5724 				kmem_free(psce, sizeof (*psce));
5725 				kmem_free(p, sizeof (*p));
5726 			}
5727 		}
5728 		pm_interest[clone] = 0;
5729 		rw_exit(&pm_pscc_interest_rwlock);
5730 	}
5731 	found = 0;
5732 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5733 	for (p = pm_pscc_direct; p; p = pn) {
5734 		pn = p->pscc_next;
5735 		if ((dip && p->pscc_dip == dip) ||
5736 		    (dip == NULL && clone == p->pscc_clone)) {
5737 			ASSERT(clone == p->pscc_clone);
5738 			found++;
5739 			/*
5740 			 * Remove from control list
5741 			 */
5742 			pm_dequeue_pscc(p, &pm_pscc_direct);
5743 			/*
5744 			 * If we're the last reference, free the
5745 			 * entries struct.
5746 			 */
5747 			psce = p->pscc_entries;
5748 			ASSERT(psce);
5749 			if (psce->psce_references == 1) {
5750 				kmem_free(psce->psce_first,
5751 				    PSCCOUNT * sizeof (pm_state_change_t));
5752 				kmem_free(psce, sizeof (*psce));
5753 			} else {
5754 				psce->psce_references--;
5755 			}
5756 			kmem_free(p, sizeof (*p));
5757 		}
5758 	}
5759 	ASSERT(dip == NULL || found);
5760 	rw_exit(&pm_pscc_direct_rwlock);
5761 }
5762 
5763 /*
5764  * Search the indicated list for an entry that matches clone, and return a
5765  * pointer to it.  To be interesting, the entry must have something ready to
5766  * be passed up to the controlling process.
5767  * The returned entry will be locked upon return from this call.
5768  */
5769 static psce_t *
5770 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5771 {
5772 	pscc_t	*p;
5773 	psce_t	*psce;
5774 	rw_enter(lock, RW_READER);
5775 	for (p = *list; p; p = p->pscc_next) {
5776 		if (clone == p->pscc_clone) {
5777 			psce = p->pscc_entries;
5778 			mutex_enter(&psce->psce_lock);
5779 			if (psce->psce_out->size) {
5780 				rw_exit(lock);
5781 				return (psce);
5782 			} else {
5783 				mutex_exit(&psce->psce_lock);
5784 			}
5785 		}
5786 	}
5787 	rw_exit(lock);
5788 	return (NULL);
5789 }
5790 
5791 static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5792 /*
5793  * Find an entry for a particular clone in the direct list.
5794  */
5795 psce_t *
5796 pm_psc_clone_to_direct(int clone)
5797 {
5798 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5799 	    &pm_pscc_direct_rwlock));
5800 }
5801 
5802 /*
5803  * Find an entry for a particular clone in the interest list.
5804  */
5805 psce_t *
5806 pm_psc_clone_to_interest(int clone)
5807 {
5808 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5809 	    &pm_pscc_interest_rwlock));
5810 }
5811 
5812 /*
5813  * Put the given entry at the head of the blocked list
5814  */
5815 void
5816 pm_enqueue_blocked(pm_rsvp_t *p)
5817 {
5818 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5819 	ASSERT(p->pr_next == NULL);
5820 	ASSERT(p->pr_prev == NULL);
5821 	if (pm_blocked_list != NULL) {
5822 		p->pr_next = pm_blocked_list;
5823 		ASSERT(pm_blocked_list->pr_prev == NULL);
5824 		pm_blocked_list->pr_prev = p;
5825 		pm_blocked_list = p;
5826 	} else {
5827 		pm_blocked_list = p;
5828 	}
5829 }
5830 
5831 /*
5832  * Sets every power managed device back to its default threshold
5833  */
5834 void
5835 pm_all_to_default_thresholds(void)
5836 {
5837 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5838 	    (void *) &pm_system_idle_threshold);
5839 }
5840 
5841 static int
5842 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5843 {
5844 	int thr = (int)(*(int *)arg);
5845 
5846 	if (!PM_GET_PM_INFO(dip))
5847 		return (DDI_WALK_CONTINUE);
5848 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5849 	return (DDI_WALK_CONTINUE);
5850 }
5851 
5852 /*
5853  * Returns the current threshold value (in seconds) for the indicated component
5854  */
5855 int
5856 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5857 {
5858 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5859 		return (DDI_FAILURE);
5860 	} else {
5861 		*threshp = cur_threshold(dip, comp);
5862 		return (DDI_SUCCESS);
5863 	}
5864 }
5865 
5866 /*
5867  * To be called when changing the power level of a component of a device.
5868  * On some platforms, changing power on one device may require that power
5869  * be changed on other, related devices in the same transaction.  Thus, we
5870  * always pass this request to the platform power manager so that all the
5871  * affected devices will be locked.
5872  */
5873 void
5874 pm_lock_power(dev_info_t *dip, int *circp)
5875 {
5876 	power_req_t power_req;
5877 	int result;
5878 
5879 	power_req.request_type = PMR_PPM_LOCK_POWER;
5880 	power_req.req.ppm_lock_power_req.who = dip;
5881 	power_req.req.ppm_lock_power_req.circp = circp;
5882 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5883 }
5884 
5885 /*
5886  * Release the lock (or locks) acquired to change the power of a device.
5887  * See comments for pm_lock_power.
5888  */
5889 void
5890 pm_unlock_power(dev_info_t *dip, int circ)
5891 {
5892 	power_req_t power_req;
5893 	int result;
5894 
5895 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5896 	power_req.req.ppm_unlock_power_req.who = dip;
5897 	power_req.req.ppm_unlock_power_req.circ = circ;
5898 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5899 }
5900 
5901 
5902 /*
5903  * Attempt (without blocking) to acquire the lock(s) needed to change the
5904  * power of a component of a device.  See comments for pm_lock_power.
5905  *
5906  * Return: 1 if lock(s) acquired, 0 if not.
5907  */
5908 int
5909 pm_try_locking_power(dev_info_t *dip, int *circp)
5910 {
5911 	power_req_t power_req;
5912 	int result;
5913 
5914 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5915 	power_req.req.ppm_lock_power_req.who = dip;
5916 	power_req.req.ppm_lock_power_req.circp = circp;
5917 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5918 	return (result);
5919 }
5920 
5921 
5922 /*
5923  * Lock power state of a device.
5924  *
5925  * The implementation handles a special case where another thread may have
5926  * acquired the lock and created/launched this thread to do the work.  If
5927  * the lock cannot be acquired immediately, we check to see if this thread
5928  * is registered as a borrower of the lock.  If so, we may proceed without
5929  * the lock.  This assumes that the lending thread blocks on the completion
5930  * of this thread.
5931  *
5932  * Note 1: for use by ppm only.
5933  *
5934  * Note 2: On failing to get the lock immediately, we search lock_loan list
5935  * for curthread (as borrower of the lock).  On a hit, we check that the
5936  * lending thread already owns the lock we want.  It is safe to compare
5937  * devi_busy_thread and thread id of the lender because in the == case (the
5938  * only one we care about) we know that the owner is blocked.  Similarly,
5939  * If we find that curthread isn't registered as a lock borrower, it is safe
5940  * to use the blocking call (ndi_devi_enter) because we know that if we
5941  * weren't already listed as a borrower (upstream on the call stack) we won't
5942  * become one.
5943  */
5944 void
5945 pm_lock_power_single(dev_info_t *dip, int *circp)
5946 {
5947 	lock_loan_t *cur;
5948 
5949 	/* if the lock is available, we are done. */
5950 	if (ndi_devi_tryenter(dip, circp))
5951 		return;
5952 
5953 	mutex_enter(&pm_loan_lock);
5954 	/* see if our thread is registered as a lock borrower. */
5955 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5956 		if (cur->pmlk_borrower == curthread)
5957 			break;
5958 	mutex_exit(&pm_loan_lock);
5959 
5960 	/* if this thread not already registered, it is safe to block */
5961 	if (cur == NULL)
5962 		ndi_devi_enter(dip, circp);
5963 	else {
5964 		/* registered: does lender own the lock we want? */
5965 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5966 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5967 			cur->pmlk_dip = dip;
5968 		} else /* no: just block for it */
5969 			ndi_devi_enter(dip, circp);
5970 
5971 	}
5972 }
5973 
5974 /*
5975  * Drop the lock on the device's power state.  See comment for
5976  * pm_lock_power_single() for special implementation considerations.
5977  *
5978  * Note: for use by ppm only.
5979  */
5980 void
5981 pm_unlock_power_single(dev_info_t *dip, int circ)
5982 {
5983 	lock_loan_t *cur;
5984 
5985 	/* optimization: mutex not needed to check empty list */
5986 	if (lock_loan_head.pmlk_next == NULL) {
5987 		ndi_devi_exit(dip, circ);
5988 		return;
5989 	}
5990 
5991 	mutex_enter(&pm_loan_lock);
5992 	/* see if our thread is registered as a lock borrower. */
5993 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5994 		if (cur->pmlk_borrower == curthread)
5995 			break;
5996 	mutex_exit(&pm_loan_lock);
5997 
5998 	if (cur == NULL || cur->pmlk_dip != dip)
5999 		/* we acquired the lock directly, so return it */
6000 		ndi_devi_exit(dip, circ);
6001 }
6002 
6003 /*
6004  * Try to take the lock for changing the power level of a component.
6005  *
6006  * Note: for use by ppm only.
6007  */
6008 int
6009 pm_try_locking_power_single(dev_info_t *dip, int *circp)
6010 {
6011 	return (ndi_devi_tryenter(dip, circp));
6012 }
6013 
6014 #ifdef	DEBUG
6015 /*
6016  * The following are used only to print out data structures for debugging
6017  */
6018 void
6019 prdeps(char *msg)
6020 {
6021 
6022 	pm_pdr_t *rp;
6023 	int i;
6024 
6025 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
6026 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
6027 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
6028 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
6029 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
6030 		    (void *)rp->pdr_next);
6031 		if (rp->pdr_kept_count != 0) {
6032 			pm_log("kept list = ");
6033 			i = 0;
6034 			while (i < rp->pdr_kept_count) {
6035 				pm_log("%s ", rp->pdr_kept_paths[i]);
6036 				i++;
6037 			}
6038 			pm_log("\n");
6039 		}
6040 	}
6041 }
6042 
6043 void
6044 pr_noinvol(char *hdr)
6045 {
6046 	pm_noinvol_t *ip;
6047 
6048 	pm_log("%s\n", hdr);
6049 	rw_enter(&pm_noinvol_rwlock, RW_READER);
6050 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
6051 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
6052 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
6053 	rw_exit(&pm_noinvol_rwlock);
6054 }
6055 #endif
6056 
6057 /*
6058  * Attempt to apply the thresholds indicated by rp to the node specified by
6059  * dip.
6060  */
6061 void
6062 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6063 {
6064 	PMD_FUNC(pmf, "apply_recorded_thresh")
6065 	int i, j;
6066 	int comps = PM_NUMCMPTS(dip);
6067 	struct pm_component *cp;
6068 	pm_pte_t *ep;
6069 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
6070 
6071 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
6072 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
6073 	PM_LOCK_DIP(dip);
6074 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
6075 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
6076 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
6077 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
6078 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
6079 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
6080 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
6081 		PM_UNLOCK_DIP(dip);
6082 		return;
6083 	}
6084 
6085 	ep = rp->ptr_entries;
6086 	/*
6087 	 * Here we do the special case of a device threshold
6088 	 */
6089 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6090 		ASSERT(ep && ep->pte_numthresh == 1);
6091 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6092 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6093 		PM_UNLOCK_DIP(dip);
6094 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6095 		if (PM_SCANABLE(dip))
6096 			pm_rescan(dip);
6097 		return;
6098 	}
6099 	for (i = 0; i < comps; i++) {
6100 		cp = PM_CP(dip, i);
6101 		for (j = 0; j < ep->pte_numthresh; j++) {
6102 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6103 			    "to %x\n", pmf, j, PM_DEVICE(dip),
6104 			    i, ep->pte_thresh[j]))
6105 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6106 		}
6107 		ep++;
6108 	}
6109 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6110 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6111 	PM_UNLOCK_DIP(dip);
6112 
6113 	if (PM_SCANABLE(dip))
6114 		pm_rescan(dip);
6115 }
6116 
6117 /*
6118  * Returns true if the threshold specified by rp could be applied to dip
6119  * (that is, the number of components and transitions are the same)
6120  */
6121 int
6122 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6123 {
6124 	PMD_FUNC(pmf, "valid_thresh")
6125 	int comps, i;
6126 	pm_component_t *cp;
6127 	pm_pte_t *ep;
6128 
6129 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6130 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6131 		    rp->ptr_physpath))
6132 		return (0);
6133 	}
6134 	/*
6135 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6136 	 * an entry with numcomps == 0, (since we don't know how many
6137 	 * components there are in advance).  This is always a valid
6138 	 * spec.
6139 	 */
6140 	if (rp->ptr_numcomps == 0) {
6141 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6142 		return (1);
6143 	}
6144 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6145 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6146 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6147 		return (0);
6148 	}
6149 	ep = rp->ptr_entries;
6150 	for (i = 0; i < comps; i++) {
6151 		cp = PM_CP(dip, i);
6152 		if ((ep + i)->pte_numthresh !=
6153 		    cp->pmc_comp.pmc_numlevels - 1) {
6154 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6155 			    pmf, rp->ptr_physpath, i,
6156 			    cp->pmc_comp.pmc_numlevels - 1,
6157 			    (ep + i)->pte_numthresh))
6158 			return (0);
6159 		}
6160 	}
6161 	return (1);
6162 }
6163 
6164 /*
6165  * Remove any recorded threshold for device physpath
6166  * We know there will be at most one.
6167  */
6168 void
6169 pm_unrecord_threshold(char *physpath)
6170 {
6171 	pm_thresh_rec_t *pptr, *ptr;
6172 
6173 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6174 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6175 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6176 			if (pptr) {
6177 				pptr->ptr_next = ptr->ptr_next;
6178 			} else {
6179 				ASSERT(pm_thresh_head == ptr);
6180 				pm_thresh_head = ptr->ptr_next;
6181 			}
6182 			kmem_free(ptr, ptr->ptr_size);
6183 			break;
6184 		}
6185 		pptr = ptr;
6186 	}
6187 	rw_exit(&pm_thresh_rwlock);
6188 }
6189 
6190 /*
6191  * Discard all recorded thresholds.  We are returning to the default pm state.
6192  */
6193 void
6194 pm_discard_thresholds(void)
6195 {
6196 	pm_thresh_rec_t *rp;
6197 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6198 	while (pm_thresh_head) {
6199 		rp = pm_thresh_head;
6200 		pm_thresh_head = rp->ptr_next;
6201 		kmem_free(rp, rp->ptr_size);
6202 	}
6203 	rw_exit(&pm_thresh_rwlock);
6204 }
6205 
6206 /*
6207  * Discard all recorded dependencies.  We are returning to the default pm state.
6208  */
6209 void
6210 pm_discard_dependencies(void)
6211 {
6212 	pm_pdr_t *rp;
6213 	int i;
6214 	size_t length;
6215 
6216 #ifdef DEBUG
6217 	if (pm_debug & PMD_DEP)
6218 		prdeps("Before discard\n");
6219 #endif
6220 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6221 
6222 #ifdef DEBUG
6223 	if (pm_debug & PMD_DEP)
6224 		prdeps("After discard\n");
6225 #endif
6226 	while (pm_dep_head) {
6227 		rp = pm_dep_head;
6228 		if (!rp->pdr_isprop) {
6229 			ASSERT(rp->pdr_satisfied == 0);
6230 			ASSERT(pm_unresolved_deps);
6231 			pm_unresolved_deps--;
6232 		} else {
6233 			ASSERT(pm_prop_deps);
6234 			pm_prop_deps--;
6235 		}
6236 		pm_dep_head = rp->pdr_next;
6237 		if (rp->pdr_kept_count)  {
6238 			for (i = 0; i < rp->pdr_kept_count; i++) {
6239 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6240 				kmem_free(rp->pdr_kept_paths[i], length);
6241 			}
6242 			kmem_free(rp->pdr_kept_paths,
6243 			    rp->pdr_kept_count * sizeof (char **));
6244 		}
6245 		kmem_free(rp, rp->pdr_size);
6246 	}
6247 }
6248 
6249 
6250 static int
6251 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6252 {
6253 	_NOTE(ARGUNUSED(arg))
6254 	char *pathbuf;
6255 
6256 	if (PM_GET_PM_INFO(dip) == NULL)
6257 		return (DDI_WALK_CONTINUE);
6258 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6259 	(void) ddi_pathname(dip, pathbuf);
6260 	pm_free_keeper(pathbuf, 0);
6261 	kmem_free(pathbuf, MAXPATHLEN);
6262 	return (DDI_WALK_CONTINUE);
6263 }
6264 
6265 static int
6266 pm_kept_walk(dev_info_t *dip, void *arg)
6267 {
6268 	_NOTE(ARGUNUSED(arg))
6269 	char *pathbuf;
6270 
6271 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6272 	(void) ddi_pathname(dip, pathbuf);
6273 	(void) pm_kept(pathbuf);
6274 	kmem_free(pathbuf, MAXPATHLEN);
6275 
6276 	return (DDI_WALK_CONTINUE);
6277 }
6278 
6279 static int
6280 pm_keeper_walk(dev_info_t *dip, void *arg)
6281 {
6282 	_NOTE(ARGUNUSED(arg))
6283 	char *pathbuf;
6284 
6285 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6286 	(void) ddi_pathname(dip, pathbuf);
6287 	(void) pm_keeper(pathbuf);
6288 	kmem_free(pathbuf, MAXPATHLEN);
6289 
6290 	return (DDI_WALK_CONTINUE);
6291 }
6292 
6293 static char *
6294 pdw_type_decode(int type)
6295 {
6296 	switch (type) {
6297 	case PM_DEP_WK_POWER_ON:
6298 		return ("power on");
6299 	case PM_DEP_WK_POWER_OFF:
6300 		return ("power off");
6301 	case PM_DEP_WK_DETACH:
6302 		return ("detach");
6303 	case PM_DEP_WK_REMOVE_DEP:
6304 		return ("remove dep");
6305 	case PM_DEP_WK_BRINGUP_SELF:
6306 		return ("bringup self");
6307 	case PM_DEP_WK_RECORD_KEEPER:
6308 		return ("add dependent");
6309 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6310 		return ("add dependent property");
6311 	case PM_DEP_WK_KEPT:
6312 		return ("kept");
6313 	case PM_DEP_WK_KEEPER:
6314 		return ("keeper");
6315 	case PM_DEP_WK_ATTACH:
6316 		return ("attach");
6317 	case PM_DEP_WK_CHECK_KEPT:
6318 		return ("check kept");
6319 	case PM_DEP_WK_CPR_SUSPEND:
6320 		return ("suspend");
6321 	case PM_DEP_WK_CPR_RESUME:
6322 		return ("resume");
6323 	default:
6324 		return ("unknown");
6325 	}
6326 
6327 }
6328 
6329 static void
6330 pm_rele_dep(char *keeper)
6331 {
6332 	PMD_FUNC(pmf, "rele_dep")
6333 	pm_pdr_t *dp;
6334 	char *kept_path = NULL;
6335 	dev_info_t *kept = NULL;
6336 	int count = 0;
6337 
6338 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6339 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6340 			continue;
6341 		for (count = 0; count < dp->pdr_kept_count; count++) {
6342 			kept_path = dp->pdr_kept_paths[count];
6343 			if (kept_path == NULL)
6344 				continue;
6345 			kept = pm_name_to_dip(kept_path, 1);
6346 			if (kept) {
6347 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6348 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6349 				    keeper))
6350 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6351 				pm_rele_power(kept);
6352 				ddi_release_devi(kept);
6353 			}
6354 		}
6355 	}
6356 }
6357 
6358 /*
6359  * Called when we are just released from direct PM.  Bring ourself up
6360  * if our keeper is up since dependency is not honored while a kept
6361  * device is under direct PM.
6362  */
6363 static void
6364 pm_bring_self_up(char *keptpath)
6365 {
6366 	PMD_FUNC(pmf, "bring_self_up")
6367 	dev_info_t *kept;
6368 	dev_info_t *keeper;
6369 	pm_pdr_t *dp;
6370 	int i, j;
6371 	int up = 0, circ;
6372 
6373 	kept = pm_name_to_dip(keptpath, 1);
6374 	if (kept == NULL)
6375 		return;
6376 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6377 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6378 		if (dp->pdr_kept_count == 0)
6379 			continue;
6380 		for (i = 0; i < dp->pdr_kept_count; i++) {
6381 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6382 				continue;
6383 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6384 			if (keeper) {
6385 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6386 				    pmf, PM_DEVICE(keeper)))
6387 				PM_LOCK_POWER(keeper, &circ);
6388 				for (j = 0; j < PM_NUMCMPTS(keeper);
6389 				    j++) {
6390 					if (PM_CURPOWER(keeper, j)) {
6391 						PMD(PMD_KEEPS, ("%s: comp="
6392 						    "%d is up\n", pmf, j))
6393 						up++;
6394 					}
6395 				}
6396 				if (up) {
6397 					if (PM_SKBU(kept))
6398 						DEVI(kept)->devi_pm_flags &=
6399 						    ~PMC_SKIP_BRINGUP;
6400 					bring_pmdep_up(kept, 1);
6401 				}
6402 				PM_UNLOCK_POWER(keeper, circ);
6403 				ddi_release_devi(keeper);
6404 			}
6405 		}
6406 	}
6407 	ddi_release_devi(kept);
6408 }
6409 
6410 static void
6411 pm_process_dep_request(pm_dep_wk_t *work)
6412 {
6413 	PMD_FUNC(pmf, "dep_req")
6414 	int ret;
6415 
6416 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6417 	    pdw_type_decode(work->pdw_type)))
6418 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6419 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6420 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6421 
6422 	switch (work->pdw_type) {
6423 	case PM_DEP_WK_POWER_ON:
6424 		/* Bring up the kept devices and put a hold on them */
6425 		bring_wekeeps_up(work->pdw_keeper);
6426 		break;
6427 	case PM_DEP_WK_POWER_OFF:
6428 		/* Release the kept devices */
6429 		pm_rele_dep(work->pdw_keeper);
6430 		break;
6431 	case PM_DEP_WK_DETACH:
6432 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6433 		break;
6434 	case PM_DEP_WK_REMOVE_DEP:
6435 		pm_discard_dependencies();
6436 		break;
6437 	case PM_DEP_WK_BRINGUP_SELF:
6438 		/*
6439 		 * We deferred satisfying our dependency till now, so satisfy
6440 		 * it again and bring ourselves up.
6441 		 */
6442 		pm_bring_self_up(work->pdw_kept);
6443 		break;
6444 	case PM_DEP_WK_RECORD_KEEPER:
6445 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6446 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6447 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6448 		break;
6449 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6450 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6451 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6452 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6453 		break;
6454 	case PM_DEP_WK_KEPT:
6455 		ret = pm_kept(work->pdw_kept);
6456 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6457 		    ret))
6458 		break;
6459 	case PM_DEP_WK_KEEPER:
6460 		ret = pm_keeper(work->pdw_keeper);
6461 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6462 		    pmf, ret))
6463 		break;
6464 	case PM_DEP_WK_ATTACH:
6465 		ret = pm_keeper(work->pdw_keeper);
6466 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6467 		    pmf, ret))
6468 		ret = pm_kept(work->pdw_kept);
6469 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6470 		    pmf, ret))
6471 		break;
6472 	case PM_DEP_WK_CHECK_KEPT:
6473 		ret = pm_is_kept(work->pdw_kept);
6474 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6475 		    pmf, work->pdw_kept, ret))
6476 		break;
6477 	case PM_DEP_WK_CPR_SUSPEND:
6478 		pm_discard_dependencies();
6479 		break;
6480 	case PM_DEP_WK_CPR_RESUME:
6481 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6482 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6483 		break;
6484 	default:
6485 		ASSERT(0);
6486 		break;
6487 	}
6488 	/*
6489 	 * Free the work structure if the requester is not waiting
6490 	 * Otherwise it is the requester's responsiblity to free it.
6491 	 */
6492 	if (!work->pdw_wait) {
6493 		if (work->pdw_keeper)
6494 			kmem_free(work->pdw_keeper,
6495 			    strlen(work->pdw_keeper) + 1);
6496 		if (work->pdw_kept)
6497 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6498 		kmem_free(work, sizeof (pm_dep_wk_t));
6499 	} else {
6500 		/*
6501 		 * Notify requester if it is waiting for it.
6502 		 */
6503 		work->pdw_ret = ret;
6504 		work->pdw_done = 1;
6505 		cv_signal(&work->pdw_cv);
6506 	}
6507 }
6508 
6509 /*
6510  * Process PM dependency requests.
6511  */
6512 static void
6513 pm_dep_thread(void)
6514 {
6515 	pm_dep_wk_t *work;
6516 	callb_cpr_t cprinfo;
6517 
6518 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6519 	    "pm_dep_thread");
6520 	for (;;) {
6521 		mutex_enter(&pm_dep_thread_lock);
6522 		if (pm_dep_thread_workq == NULL) {
6523 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6524 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6525 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6526 		}
6527 		work = pm_dep_thread_workq;
6528 		pm_dep_thread_workq = work->pdw_next;
6529 		if (pm_dep_thread_tail == work)
6530 			pm_dep_thread_tail = work->pdw_next;
6531 		mutex_exit(&pm_dep_thread_lock);
6532 		pm_process_dep_request(work);
6533 
6534 	}
6535 	/*NOTREACHED*/
6536 }
6537 
6538 /*
6539  * Set the power level of the indicated device to unknown (if it is not a
6540  * backwards compatible device), as it has just been resumed, and it won't
6541  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6542  */
6543 void
6544 pm_forget_power_level(dev_info_t *dip)
6545 {
6546 	dev_info_t *pdip = ddi_get_parent(dip);
6547 	int i, count = 0;
6548 
6549 	if (!PM_ISBC(dip)) {
6550 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6551 			count += (PM_CURPOWER(dip, i) == 0);
6552 
6553 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6554 			e_pm_hold_rele_power(pdip, count);
6555 
6556 		/*
6557 		 * Count this as a power cycle if we care
6558 		 */
6559 		if (DEVI(dip)->devi_pm_volpmd &&
6560 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6561 			DEVI(dip)->devi_pm_volpmd = 0;
6562 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6563 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6564 	}
6565 }
6566 
6567 /*
6568  * This function advises the caller whether it should make a power-off
6569  * transition at this time or not.  If the transition is not advised
6570  * at this time, the time that the next power-off transition can
6571  * be made from now is returned through "intervalp" pointer.
6572  * This function returns:
6573  *
6574  *  1  power-off advised
6575  *  0  power-off not advised, intervalp will point to seconds from
6576  *	  now that a power-off is advised.  If it is passed the number
6577  *	  of years that policy specifies the device should last,
6578  *	  a large number is returned as the time interval.
6579  *  -1  error
6580  */
6581 int
6582 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6583 {
6584 	PMD_FUNC(pmf, "pm_trans_check")
6585 	char dbuf[DC_SCSI_MFR_LEN];
6586 	struct pm_scsi_cycles *scp;
6587 	int service_years, service_weeks, full_years;
6588 	time_t now, service_seconds, tdiff;
6589 	time_t within_year, when_allowed;
6590 	char *ptr;
6591 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6592 	int cycles_diff, cycles_over;
6593 	struct pm_smart_count *smart_p;
6594 
6595 	if (datap == NULL) {
6596 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6597 		return (-1);
6598 	}
6599 
6600 	if (datap->format == DC_SCSI_FORMAT) {
6601 		/*
6602 		 * Power cycles of the scsi drives are distributed
6603 		 * over 5 years with the following percentage ratio:
6604 		 *
6605 		 *	30%, 25%, 20%, 15%, and 10%
6606 		 *
6607 		 * The power cycle quota for each year is distributed
6608 		 * linearly through out the year.  The equation for
6609 		 * determining the expected cycles is:
6610 		 *
6611 		 *	e = a * (n / y)
6612 		 *
6613 		 * e = expected cycles
6614 		 * a = allocated cycles for this year
6615 		 * n = number of seconds since beginning of this year
6616 		 * y = number of seconds in a year
6617 		 *
6618 		 * Note that beginning of the year starts the day that
6619 		 * the drive has been put on service.
6620 		 *
6621 		 * If the drive has passed its expected cycles, we
6622 		 * can determine when it can start to power cycle
6623 		 * again to keep it on track to meet the 5-year
6624 		 * life expectancy.  The equation for determining
6625 		 * when to power cycle is:
6626 		 *
6627 		 *	w = y * (c / a)
6628 		 *
6629 		 * w = when it can power cycle again
6630 		 * y = number of seconds in a year
6631 		 * c = current number of cycles
6632 		 * a = allocated cycles for the year
6633 		 *
6634 		 */
6635 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6636 
6637 		scp = &datap->un.scsi_cycles;
6638 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6639 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6640 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6641 		if (scp->ncycles < 0 || scp->flag != 0) {
6642 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6643 			return (-1);
6644 		}
6645 
6646 		if (scp->ncycles > scp->lifemax) {
6647 			*intervalp = (LONG_MAX / hz);
6648 			return (0);
6649 		}
6650 
6651 		/*
6652 		 * convert service date to time_t
6653 		 */
6654 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6655 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6656 		ptr = dbuf;
6657 		service_years = stoi(&ptr) - EPOCH_YEAR;
6658 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6659 		    DC_SCSI_WEEK_LEN);
6660 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6661 
6662 		/*
6663 		 * scsi standard does not specify WW data,
6664 		 * could be (00-51) or (01-52)
6665 		 */
6666 		ptr = dbuf;
6667 		service_weeks = stoi(&ptr);
6668 		if (service_years < 0 ||
6669 		    service_weeks < 0 || service_weeks > 52) {
6670 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6671 			    pmf, service_years, service_weeks))
6672 			return (-1);
6673 		}
6674 
6675 		/*
6676 		 * calculate service date in seconds-since-epoch,
6677 		 * adding one day for each leap-year.
6678 		 *
6679 		 * (years-since-epoch + 2) fixes integer truncation,
6680 		 * example: (8) leap-years during [1972, 2000]
6681 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6682 		 */
6683 		service_seconds = (service_years * DC_SPY) +
6684 		    (service_weeks * DC_SPW) +
6685 		    (((service_years + 2) / 4) * DC_SPD);
6686 
6687 		now = gethrestime_sec();
6688 		/*
6689 		 * since the granularity of 'svc_date' is day not second,
6690 		 * 'now' should be rounded up to full day.
6691 		 */
6692 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6693 		if (service_seconds > now) {
6694 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6695 			    "than now (%ld)!\n", pmf, service_seconds, now))
6696 			return (-1);
6697 		}
6698 
6699 		tdiff = now - service_seconds;
6700 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6701 
6702 		/*
6703 		 * NOTE - Leap years are not considered in the calculations
6704 		 * below.
6705 		 */
6706 		full_years = (tdiff / DC_SPY);
6707 		if ((full_years >= DC_SCSI_NPY) &&
6708 		    (scp->ncycles <= scp->lifemax))
6709 			return (1);
6710 
6711 		/*
6712 		 * Determine what is the normal cycle usage for the
6713 		 * device at the beginning and the end of this year.
6714 		 */
6715 		lower_bound_cycles = (!full_years) ? 0 :
6716 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6717 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6718 
6719 		if (scp->ncycles <= lower_bound_cycles)
6720 			return (1);
6721 
6722 		/*
6723 		 * The linear slope that determines how many cycles
6724 		 * are allowed this year is number of seconds
6725 		 * passed this year over total number of seconds in a year.
6726 		 */
6727 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6728 		within_year = (tdiff % DC_SPY);
6729 		cycles_allowed = lower_bound_cycles +
6730 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6731 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6732 		    full_years, within_year))
6733 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6734 		    cycles_allowed))
6735 
6736 		if (scp->ncycles <= cycles_allowed)
6737 			return (1);
6738 
6739 		/*
6740 		 * The transition is not advised now but we can
6741 		 * determine when the next transition can be made.
6742 		 *
6743 		 * Depending on how many cycles the device has been
6744 		 * over-used, we may need to skip years with
6745 		 * different percentage quota in order to determine
6746 		 * when the next transition can be made.
6747 		 */
6748 		cycles_over = (scp->ncycles - lower_bound_cycles);
6749 		while (cycles_over > cycles_diff) {
6750 			full_years++;
6751 			if (full_years >= DC_SCSI_NPY) {
6752 				*intervalp = (LONG_MAX / hz);
6753 				return (0);
6754 			}
6755 			cycles_over -= cycles_diff;
6756 			lower_bound_cycles = upper_bound_cycles;
6757 			upper_bound_cycles =
6758 			    (scp->lifemax * pcnt[full_years]) / 100;
6759 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6760 		}
6761 
6762 		/*
6763 		 * The linear slope that determines when the next transition
6764 		 * can be made is the relative position of used cycles within a
6765 		 * year over total number of cycles within that year.
6766 		 */
6767 		when_allowed = service_seconds + (full_years * DC_SPY) +
6768 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6769 		*intervalp = (when_allowed - now);
6770 		if (*intervalp > (LONG_MAX / hz))
6771 			*intervalp = (LONG_MAX / hz);
6772 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6773 		    *intervalp))
6774 		return (0);
6775 	} else if (datap->format == DC_SMART_FORMAT) {
6776 		/*
6777 		 * power cycles of SATA disks are reported from SMART
6778 		 * attributes.
6779 		 */
6780 		smart_p = &datap->un.smart_count;
6781 		if (smart_p->consumed >= smart_p->allowed) {
6782 			*intervalp = (LONG_MAX / hz);
6783 			PMD(PMD_TCHECK, ("%s: exceeded lifemax cycles.\n", pmf))
6784 			return (0);
6785 		} else
6786 			return (1);
6787 	}
6788 
6789 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6790 	return (-1);
6791 }
6792 
6793 /*
6794  * Nexus drivers call into pm framework to indicate which child driver is about
6795  * to be installed.  In some platforms, ppm may need to configure the hardware
6796  * for successful installation of a driver.
6797  */
6798 int
6799 pm_init_child(dev_info_t *dip)
6800 {
6801 	power_req_t power_req;
6802 
6803 	ASSERT(ddi_binding_name(dip));
6804 	ASSERT(ddi_get_name_addr(dip));
6805 	pm_ppm_claim(dip);
6806 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6807 		power_req.request_type = PMR_PPM_INIT_CHILD;
6808 		power_req.req.ppm_config_req.who = dip;
6809 		ASSERT(PPM(dip) != NULL);
6810 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6811 		    NULL));
6812 	} else {
6813 #ifdef DEBUG
6814 		/* pass it to the default handler so we can debug things */
6815 		power_req.request_type = PMR_PPM_INIT_CHILD;
6816 		power_req.req.ppm_config_req.who = dip;
6817 		(void) pm_ctlops(NULL, dip,
6818 		    DDI_CTLOPS_POWER, &power_req, NULL);
6819 #endif
6820 	}
6821 	return (DDI_SUCCESS);
6822 }
6823 
6824 /*
6825  * Bring parent of a node that is about to be probed up to full power, and
6826  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6827  * it is time to let it go down again
6828  */
6829 void
6830 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6831 {
6832 	int result;
6833 	power_req_t power_req;
6834 
6835 	bzero(cp, sizeof (*cp));
6836 	cp->ppc_dip = dip;
6837 
6838 	pm_ppm_claim(dip);
6839 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6840 		power_req.request_type = PMR_PPM_PRE_PROBE;
6841 		power_req.req.ppm_config_req.who = dip;
6842 		ASSERT(PPM(dip) != NULL);
6843 		(void) pm_ctlops(PPM(dip), dip,
6844 		    DDI_CTLOPS_POWER, &power_req, &result);
6845 		cp->ppc_ppm = PPM(dip);
6846 	} else {
6847 #ifdef DEBUG
6848 		/* pass it to the default handler so we can debug things */
6849 		power_req.request_type = PMR_PPM_PRE_PROBE;
6850 		power_req.req.ppm_config_req.who = dip;
6851 		(void) pm_ctlops(NULL, dip,
6852 		    DDI_CTLOPS_POWER, &power_req, &result);
6853 #endif
6854 		cp->ppc_ppm = NULL;
6855 	}
6856 }
6857 
6858 int
6859 pm_pre_config(dev_info_t *dip, char *devnm)
6860 {
6861 	PMD_FUNC(pmf, "pre_config")
6862 	int ret;
6863 
6864 	if (MDI_VHCI(dip)) {
6865 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6866 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6867 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6868 	} else if (!PM_GET_PM_INFO(dip))
6869 		return (DDI_SUCCESS);
6870 
6871 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6872 	pm_hold_power(dip);
6873 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6874 	if (ret != DDI_SUCCESS)
6875 		pm_rele_power(dip);
6876 	return (ret);
6877 }
6878 
6879 /*
6880  * This routine is called by devfs during its walk to unconfigue a node.
6881  * If the call is due to auto mod_unloads and the dip is not at its
6882  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6883  * return DDI_SUCCESS.
6884  */
6885 int
6886 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6887 {
6888 	PMD_FUNC(pmf, "pre_unconfig")
6889 	int ret;
6890 
6891 	if (MDI_VHCI(dip)) {
6892 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6893 		    PM_DEVICE(dip), flags))
6894 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6895 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6896 	} else if (!PM_GET_PM_INFO(dip))
6897 		return (DDI_SUCCESS);
6898 
6899 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6900 	    flags))
6901 	*held = 0;
6902 
6903 	/*
6904 	 * If the dip is a leaf node, don't power it up.
6905 	 */
6906 	if (!ddi_get_child(dip))
6907 		return (DDI_SUCCESS);
6908 
6909 	/*
6910 	 * Do not power up the node if it is called due to auto-modunload.
6911 	 */
6912 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6913 		return (DDI_FAILURE);
6914 
6915 	pm_hold_power(dip);
6916 	*held = 1;
6917 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6918 	if (ret != DDI_SUCCESS) {
6919 		pm_rele_power(dip);
6920 		*held = 0;
6921 	}
6922 	return (ret);
6923 }
6924 
6925 /*
6926  * Notify ppm of attach action.  Parent is already held at full power by
6927  * probe action.
6928  */
6929 void
6930 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6931 {
6932 	static char *me = "pm_pre_attach";
6933 	power_req_t power_req;
6934 	int result;
6935 
6936 	/*
6937 	 * Initialize and fill in the PPM cookie
6938 	 */
6939 	bzero(cp, sizeof (*cp));
6940 	cp->ppc_cmd = (int)cmd;
6941 	cp->ppc_ppm = PPM(dip);
6942 	cp->ppc_dip = dip;
6943 
6944 	/*
6945 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6946 	 * Power Management stuff. DDI_RESUME also has to purge it's
6947 	 * powerlevel information.
6948 	 */
6949 	switch (cmd) {
6950 	case DDI_ATTACH:
6951 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6952 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6953 			power_req.req.ppm_config_req.who = dip;
6954 			ASSERT(PPM(dip));
6955 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6956 			    &power_req, &result);
6957 		}
6958 #ifdef DEBUG
6959 		else {
6960 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6961 			power_req.req.ppm_config_req.who = dip;
6962 			(void) pm_ctlops(NULL, dip,
6963 			    DDI_CTLOPS_POWER, &power_req, &result);
6964 		}
6965 #endif
6966 		break;
6967 	case DDI_RESUME:
6968 		pm_forget_power_level(dip);
6969 
6970 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6971 			power_req.request_type = PMR_PPM_PRE_RESUME;
6972 			power_req.req.resume_req.who = cp->ppc_dip;
6973 			power_req.req.resume_req.cmd =
6974 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6975 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6976 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6977 			    DDI_CTLOPS_POWER, &power_req, &result);
6978 		}
6979 #ifdef DEBUG
6980 		else {
6981 			power_req.request_type = PMR_PPM_PRE_RESUME;
6982 			power_req.req.resume_req.who = cp->ppc_dip;
6983 			power_req.req.resume_req.cmd =
6984 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6985 			(void) pm_ctlops(NULL, cp->ppc_dip,
6986 			    DDI_CTLOPS_POWER, &power_req, &result);
6987 		}
6988 #endif
6989 		break;
6990 
6991 	case DDI_PM_RESUME:
6992 		break;
6993 
6994 	default:
6995 		panic(me);
6996 	}
6997 }
6998 
6999 /*
7000  * Nexus drivers call into pm framework to indicate which child driver is
7001  * being uninstalled.  In some platforms, ppm may need to reconfigure the
7002  * hardware since the device driver is no longer installed.
7003  */
7004 int
7005 pm_uninit_child(dev_info_t *dip)
7006 {
7007 	power_req_t power_req;
7008 
7009 	ASSERT(ddi_binding_name(dip));
7010 	ASSERT(ddi_get_name_addr(dip));
7011 	pm_ppm_claim(dip);
7012 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
7013 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
7014 		power_req.req.ppm_config_req.who = dip;
7015 		ASSERT(PPM(dip));
7016 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
7017 		    NULL));
7018 	} else {
7019 #ifdef DEBUG
7020 		/* pass it to the default handler so we can debug things */
7021 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
7022 		power_req.req.ppm_config_req.who = dip;
7023 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
7024 #endif
7025 	}
7026 	return (DDI_SUCCESS);
7027 }
7028 /*
7029  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
7030  * Also notify ppm of result of probe if there is a ppm that cares
7031  */
7032 void
7033 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
7034 {
7035 	_NOTE(ARGUNUSED(probe_failed))
7036 	int result;
7037 	power_req_t power_req;
7038 
7039 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7040 		power_req.request_type = PMR_PPM_POST_PROBE;
7041 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7042 		power_req.req.ppm_config_req.result = ret;
7043 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7044 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
7045 		    &power_req, &result);
7046 	}
7047 #ifdef DEBUG
7048 	else {
7049 		power_req.request_type = PMR_PPM_POST_PROBE;
7050 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7051 		power_req.req.ppm_config_req.result = ret;
7052 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
7053 		    &power_req, &result);
7054 	}
7055 #endif
7056 }
7057 
7058 void
7059 pm_post_config(dev_info_t *dip, char *devnm)
7060 {
7061 	PMD_FUNC(pmf, "post_config")
7062 
7063 	if (MDI_VHCI(dip)) {
7064 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7065 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
7066 		return;
7067 	} else if (!PM_GET_PM_INFO(dip))
7068 		return;
7069 
7070 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7071 	pm_rele_power(dip);
7072 }
7073 
7074 void
7075 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
7076 {
7077 	PMD_FUNC(pmf, "post_unconfig")
7078 
7079 	if (MDI_VHCI(dip)) {
7080 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
7081 		    PM_DEVICE(dip), held))
7082 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
7083 		return;
7084 	} else if (!PM_GET_PM_INFO(dip))
7085 		return;
7086 
7087 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
7088 	    held))
7089 	if (!held)
7090 		return;
7091 	/*
7092 	 * We have held power in pre_unconfig, release it here.
7093 	 */
7094 	pm_rele_power(dip);
7095 }
7096 
7097 /*
7098  * Notify ppm of result of attach if there is a ppm that cares
7099  */
7100 void
7101 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
7102 {
7103 	int result;
7104 	power_req_t power_req;
7105 	dev_info_t	*dip;
7106 
7107 	if (cp->ppc_cmd != DDI_ATTACH)
7108 		return;
7109 
7110 	dip = cp->ppc_dip;
7111 
7112 	if (ret == DDI_SUCCESS) {
7113 		/*
7114 		 * Attach succeeded, so proceed to doing post-attach pm tasks
7115 		 */
7116 		if (PM_GET_PM_INFO(dip) == NULL)
7117 			(void) pm_start(dip);
7118 	} else {
7119 		/*
7120 		 * Attach may have got pm started before failing
7121 		 */
7122 		pm_stop(dip);
7123 	}
7124 
7125 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7126 		power_req.request_type = PMR_PPM_POST_ATTACH;
7127 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7128 		power_req.req.ppm_config_req.result = ret;
7129 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7130 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7131 		    DDI_CTLOPS_POWER, &power_req, &result);
7132 	}
7133 #ifdef DEBUG
7134 	else {
7135 		power_req.request_type = PMR_PPM_POST_ATTACH;
7136 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7137 		power_req.req.ppm_config_req.result = ret;
7138 		(void) pm_ctlops(NULL, cp->ppc_dip,
7139 		    DDI_CTLOPS_POWER, &power_req, &result);
7140 	}
7141 #endif
7142 }
7143 
7144 /*
7145  * Notify ppm of attach action.  Parent is already held at full power by
7146  * probe action.
7147  */
7148 void
7149 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7150 {
7151 	int result;
7152 	power_req_t power_req;
7153 
7154 	bzero(cp, sizeof (*cp));
7155 	cp->ppc_dip = dip;
7156 	cp->ppc_cmd = (int)cmd;
7157 
7158 	switch (cmd) {
7159 	case DDI_DETACH:
7160 		pm_detaching(dip);		/* suspend pm while detaching */
7161 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7162 			power_req.request_type = PMR_PPM_PRE_DETACH;
7163 			power_req.req.ppm_config_req.who = dip;
7164 			ASSERT(PPM(dip));
7165 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7166 			    &power_req, &result);
7167 			cp->ppc_ppm = PPM(dip);
7168 		} else {
7169 #ifdef DEBUG
7170 			/* pass to the default handler so we can debug things */
7171 			power_req.request_type = PMR_PPM_PRE_DETACH;
7172 			power_req.req.ppm_config_req.who = dip;
7173 			(void) pm_ctlops(NULL, dip,
7174 			    DDI_CTLOPS_POWER, &power_req, &result);
7175 #endif
7176 			cp->ppc_ppm = NULL;
7177 		}
7178 		break;
7179 
7180 	default:
7181 		break;
7182 	}
7183 }
7184 
7185 /*
7186  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7187  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7188  * make an entry to record the details, which includes certain flag settings.
7189  */
7190 static void
7191 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7192     int wasvolpmd, major_t major)
7193 {
7194 	PMD_FUNC(pmf, "record_invol_path")
7195 	major_t pm_path_to_major(char *);
7196 	size_t plen;
7197 	pm_noinvol_t *ip, *np, *pp;
7198 	pp = NULL;
7199 
7200 	plen = strlen(path) + 1;
7201 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7202 	np->ni_size = plen;
7203 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7204 	np->ni_noinvolpm = noinvolpm;
7205 	np->ni_volpmd = volpmd;
7206 	np->ni_wasvolpmd = wasvolpmd;
7207 	np->ni_flags = flags;
7208 	(void) strcpy(np->ni_path, path);
7209 	/*
7210 	 * If we haven't actually seen the node attached, it is hard to figure
7211 	 * out its major.  If we could hold the node by path, we would be much
7212 	 * happier here.
7213 	 */
7214 	if (major == DDI_MAJOR_T_NONE) {
7215 		np->ni_major = pm_path_to_major(path);
7216 	} else {
7217 		np->ni_major = major;
7218 	}
7219 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7220 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7221 		int comp = strcmp(path, ip->ni_path);
7222 		if (comp < 0) {
7223 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7224 			    pmf, path, ip->ni_path))
7225 			/* insert before current entry */
7226 			np->ni_next = ip;
7227 			if (pp) {
7228 				pp->ni_next = np;
7229 			} else {
7230 				pm_noinvol_head = np;
7231 			}
7232 			rw_exit(&pm_noinvol_rwlock);
7233 #ifdef DEBUG
7234 			if (pm_debug & PMD_NOINVOL)
7235 				pr_noinvol("record_invol_path exit0");
7236 #endif
7237 			return;
7238 		} else if (comp == 0) {
7239 			panic("%s already in pm_noinvol list", path);
7240 		}
7241 	}
7242 	/*
7243 	 * If we did not find an entry in the list that this should go before,
7244 	 * then it must go at the end
7245 	 */
7246 	if (pp) {
7247 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7248 		    pp->ni_path))
7249 		ASSERT(pp->ni_next == 0);
7250 		pp->ni_next = np;
7251 	} else {
7252 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7253 		ASSERT(!pm_noinvol_head);
7254 		pm_noinvol_head = np;
7255 	}
7256 	rw_exit(&pm_noinvol_rwlock);
7257 #ifdef DEBUG
7258 	if (pm_debug & PMD_NOINVOL)
7259 		pr_noinvol("record_invol_path exit");
7260 #endif
7261 }
7262 
7263 void
7264 pm_record_invol(dev_info_t *dip)
7265 {
7266 	char *pathbuf;
7267 	int pm_all_components_off(dev_info_t *);
7268 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7269 
7270 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7271 	(void) ddi_pathname(dip, pathbuf);
7272 
7273 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7274 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7275 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7276 
7277 	/*
7278 	 * If this child's detach will be holding up its ancestors, then we
7279 	 * allow for an exception to that if all children of this type have
7280 	 * gone down voluntarily.
7281 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7282 	 */
7283 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7284 	    dip);
7285 	kmem_free(pathbuf, MAXPATHLEN);
7286 }
7287 
7288 void
7289 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7290 {
7291 	dev_info_t *dip = cp->ppc_dip;
7292 	int result;
7293 	power_req_t power_req;
7294 
7295 	switch (cp->ppc_cmd) {
7296 	case DDI_DETACH:
7297 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7298 			power_req.request_type = PMR_PPM_POST_DETACH;
7299 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7300 			power_req.req.ppm_config_req.result = ret;
7301 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7302 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7303 			    DDI_CTLOPS_POWER, &power_req, &result);
7304 		}
7305 #ifdef DEBUG
7306 		else {
7307 			power_req.request_type = PMR_PPM_POST_DETACH;
7308 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7309 			power_req.req.ppm_config_req.result = ret;
7310 			(void) pm_ctlops(NULL, cp->ppc_dip,
7311 			    DDI_CTLOPS_POWER, &power_req, &result);
7312 		}
7313 #endif
7314 		if (ret == DDI_SUCCESS) {
7315 			/*
7316 			 * For hotplug detach we assume it is *really* gone
7317 			 */
7318 			if (cp->ppc_cmd == DDI_DETACH &&
7319 			    ((DEVI(dip)->devi_pm_flags &
7320 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7321 			    DEVI(dip)->devi_pm_noinvolpm))
7322 				pm_record_invol(dip);
7323 			DEVI(dip)->devi_pm_flags &=
7324 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7325 
7326 			/*
7327 			 * If console fb is detaching, then we don't need to
7328 			 * worry any more about it going off (pm_detaching has
7329 			 * brought up all components)
7330 			 */
7331 			if (PM_IS_CFB(dip)) {
7332 				mutex_enter(&pm_cfb_lock);
7333 				ASSERT(cfb_dip_detaching);
7334 				ASSERT(cfb_dip == NULL);
7335 				ASSERT(pm_cfb_comps_off == 0);
7336 				cfb_dip_detaching = NULL;
7337 				mutex_exit(&pm_cfb_lock);
7338 			}
7339 			pm_stop(dip);	/* make it permanent */
7340 		} else {
7341 			if (PM_IS_CFB(dip)) {
7342 				mutex_enter(&pm_cfb_lock);
7343 				ASSERT(cfb_dip_detaching);
7344 				ASSERT(cfb_dip == NULL);
7345 				ASSERT(pm_cfb_comps_off == 0);
7346 				cfb_dip = cfb_dip_detaching;
7347 				cfb_dip_detaching = NULL;
7348 				mutex_exit(&pm_cfb_lock);
7349 			}
7350 			pm_detach_failed(dip);	/* resume power management */
7351 		}
7352 		break;
7353 	case DDI_PM_SUSPEND:
7354 		break;
7355 	case DDI_SUSPEND:
7356 		break;				/* legal, but nothing to do */
7357 	default:
7358 #ifdef DEBUG
7359 		panic("pm_post_detach: unrecognized cmd %d for detach",
7360 		    cp->ppc_cmd);
7361 		/*NOTREACHED*/
7362 #else
7363 		break;
7364 #endif
7365 	}
7366 }
7367 
7368 /*
7369  * Called after vfs_mountroot has got the clock started to fix up timestamps
7370  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7371  * devices look busy but have a 0 busycnt
7372  */
7373 int
7374 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7375 {
7376 	_NOTE(ARGUNUSED(arg))
7377 
7378 	pm_info_t *info = PM_GET_PM_INFO(dip);
7379 	struct pm_component *cp;
7380 	int i;
7381 
7382 	if (!info)
7383 		return (DDI_WALK_CONTINUE);
7384 	PM_LOCK_BUSY(dip);
7385 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7386 		cp = PM_CP(dip, i);
7387 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7388 			cp->pmc_timestamp = gethrestime_sec();
7389 	}
7390 	PM_UNLOCK_BUSY(dip);
7391 	return (DDI_WALK_CONTINUE);
7392 }
7393 
7394 /*
7395  * Called at attach time to see if the device being attached has a record in
7396  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7397  * parents and set a flag in the dip
7398  */
7399 void
7400 pm_noinvol_specd(dev_info_t *dip)
7401 {
7402 	PMD_FUNC(pmf, "noinvol_specd")
7403 	char *pathbuf;
7404 	pm_noinvol_t *ip, *pp = NULL;
7405 	int wasvolpmd;
7406 	int found = 0;
7407 
7408 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7409 		return;
7410 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7411 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7412 	(void) ddi_pathname(dip, pathbuf);
7413 
7414 	PM_LOCK_DIP(dip);
7415 	DEVI(dip)->devi_pm_volpmd = 0;
7416 	DEVI(dip)->devi_pm_noinvolpm = 0;
7417 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7418 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7419 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7420 		    pmf, pathbuf, ip->ni_path))
7421 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7422 			found++;
7423 			break;
7424 		}
7425 	}
7426 	rw_exit(&pm_noinvol_rwlock);
7427 	if (!found) {
7428 		PM_UNLOCK_DIP(dip);
7429 		kmem_free(pathbuf, MAXPATHLEN);
7430 		return;
7431 	}
7432 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7433 	pp = NULL;
7434 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7435 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7436 		    pmf, pathbuf, ip->ni_path))
7437 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7438 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7439 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7440 			/*
7441 			 * Handle special case of console fb
7442 			 */
7443 			if (PM_IS_CFB(dip)) {
7444 				mutex_enter(&pm_cfb_lock);
7445 				cfb_dip = dip;
7446 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7447 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7448 				mutex_exit(&pm_cfb_lock);
7449 			}
7450 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7451 			ASSERT((DEVI(dip)->devi_pm_flags &
7452 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7453 			    DEVI(dip)->devi_pm_noinvolpm);
7454 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7455 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7456 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7457 			    ip->ni_noinvolpm, ip->ni_volpmd,
7458 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7459 			/*
7460 			 * free the entry in hopes the list will now be empty
7461 			 * and we won't have to search it any more until the
7462 			 * device detaches
7463 			 */
7464 			if (pp) {
7465 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7466 				    pmf, ip->ni_path, pp->ni_path))
7467 				pp->ni_next = ip->ni_next;
7468 			} else {
7469 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7470 				    pmf, ip->ni_path))
7471 				ASSERT(pm_noinvol_head == ip);
7472 				pm_noinvol_head = ip->ni_next;
7473 			}
7474 			PM_UNLOCK_DIP(dip);
7475 			wasvolpmd = ip->ni_wasvolpmd;
7476 			rw_exit(&pm_noinvol_rwlock);
7477 			kmem_free(ip->ni_path, ip->ni_size);
7478 			kmem_free(ip, sizeof (*ip));
7479 			/*
7480 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7481 			 * (and volpmd if appropriate)
7482 			 */
7483 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7484 			    wasvolpmd, pathbuf, dip);
7485 #ifdef DEBUG
7486 			if (pm_debug & PMD_NOINVOL)
7487 				pr_noinvol("noinvol_specd exit");
7488 #endif
7489 			kmem_free(pathbuf, MAXPATHLEN);
7490 			return;
7491 		}
7492 	}
7493 	kmem_free(pathbuf, MAXPATHLEN);
7494 	rw_exit(&pm_noinvol_rwlock);
7495 	PM_UNLOCK_DIP(dip);
7496 }
7497 
7498 int
7499 pm_all_components_off(dev_info_t *dip)
7500 {
7501 	int i;
7502 	pm_component_t *cp;
7503 
7504 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7505 		cp = PM_CP(dip, i);
7506 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7507 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7508 			return (0);
7509 	}
7510 	return (1);	/* all off */
7511 }
7512 
7513 /*
7514  * Make sure that all "no involuntary power cycles" devices are attached.
7515  * Called before doing a cpr suspend to make sure the driver has a say about
7516  * the power cycle
7517  */
7518 int
7519 pm_reattach_noinvol(void)
7520 {
7521 	PMD_FUNC(pmf, "reattach_noinvol")
7522 	pm_noinvol_t *ip;
7523 	char *path;
7524 	dev_info_t *dip;
7525 
7526 	/*
7527 	 * Prevent the modunload thread from unloading any modules until we
7528 	 * have completely stopped all kernel threads.
7529 	 */
7530 	modunload_disable();
7531 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7532 		/*
7533 		 * Forget we'v ever seen any entry
7534 		 */
7535 		ip->ni_persistent = 0;
7536 	}
7537 restart:
7538 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7539 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7540 #ifdef PMDDEBUG
7541 		major_t maj;
7542 		maj = ip->ni_major;
7543 #endif
7544 		path = ip->ni_path;
7545 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7546 			if (ip->ni_persistent) {
7547 				/*
7548 				 * If we weren't able to make this entry
7549 				 * go away, then we give up, as
7550 				 * holding/attaching the driver ought to have
7551 				 * resulted in this entry being deleted
7552 				 */
7553 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7554 				    "(%s|%d)\n", pmf, ip->ni_path,
7555 				    ddi_major_to_name(maj), (int)maj))
7556 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7557 				    ip->ni_path);
7558 				modunload_enable();
7559 				rw_exit(&pm_noinvol_rwlock);
7560 				return (0);
7561 			}
7562 			ip->ni_persistent++;
7563 			rw_exit(&pm_noinvol_rwlock);
7564 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7565 			dip = e_ddi_hold_devi_by_path(path, 0);
7566 			if (dip == NULL) {
7567 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7568 				    pmf, path, (int)maj))
7569 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7570 				    "driver", path);
7571 				modunload_enable();
7572 				return (0);
7573 			} else {
7574 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7575 				/*
7576 				 * Since the modunload thread is stopped, we
7577 				 * don't have to keep the driver held, which
7578 				 * saves a ton of bookkeeping
7579 				 */
7580 				ddi_release_devi(dip);
7581 				goto restart;
7582 			}
7583 		} else {
7584 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7585 			    pmf, ip->ni_path))
7586 			continue;
7587 		}
7588 	}
7589 	rw_exit(&pm_noinvol_rwlock);
7590 	return (1);
7591 }
7592 
7593 void
7594 pm_reattach_noinvol_fini(void)
7595 {
7596 	modunload_enable();
7597 }
7598 
7599 /*
7600  * Display pm support code
7601  */
7602 
7603 
7604 /*
7605  * console frame-buffer power-mgmt gets enabled when debugging
7606  * services are not present or console fbpm override is set
7607  */
7608 void
7609 pm_cfb_setup(const char *stdout_path)
7610 {
7611 	PMD_FUNC(pmf, "cfb_setup")
7612 	extern int obpdebug;
7613 	char *devname;
7614 	dev_info_t *dip;
7615 	int devname_len;
7616 	extern dev_info_t *fbdip;
7617 
7618 	/*
7619 	 * By virtue of this function being called (from consconfig),
7620 	 * we know stdout is a framebuffer.
7621 	 */
7622 	stdout_is_framebuffer = 1;
7623 
7624 	if (obpdebug || (boothowto & RB_DEBUG)) {
7625 		if (pm_cfb_override == 0) {
7626 			/*
7627 			 * Console is frame buffer, but we want to suppress
7628 			 * pm on it because of debugging setup
7629 			 */
7630 			pm_cfb_enabled = 0;
7631 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7632 			    "console power management.");
7633 			/*
7634 			 * however, we still need to know which is the console
7635 			 * fb in order to suppress pm on it
7636 			 */
7637 		} else {
7638 			cmn_err(CE_WARN, "Kernel debugger present: see "
7639 			    "kmdb(1M) for interaction with power management.");
7640 		}
7641 	}
7642 #ifdef DEBUG
7643 	/*
7644 	 * IF console is fb and is power managed, don't do prom_printfs from
7645 	 * pm debug macro
7646 	 */
7647 	if (pm_cfb_enabled && !pm_debug_to_console) {
7648 		if (pm_debug)
7649 			prom_printf("pm debug output will be to log only\n");
7650 		pm_divertdebug++;
7651 	}
7652 #endif
7653 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7654 	devname_len = strlen(devname) + 1;
7655 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7656 	/* if the driver is attached */
7657 	if ((dip = fbdip) != NULL) {
7658 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7659 		    PM_DEVICE(dip)))
7660 		/*
7661 		 * We set up here as if the driver were power manageable in case
7662 		 * we get a later attach of a pm'able driver (which would result
7663 		 * in a panic later)
7664 		 */
7665 		cfb_dip = dip;
7666 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7667 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7668 		    PM_DEVICE(dip)))
7669 #ifdef DEBUG
7670 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7671 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7672 			    pmf, PM_DEVICE(dip)))
7673 		}
7674 #endif
7675 	} else {
7676 		char *ep;
7677 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7678 		pm_record_invol_path(devname,
7679 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7680 		    DDI_MAJOR_T_NONE);
7681 		for (ep = strrchr(devname, '/'); ep != devname;
7682 		    ep = strrchr(devname, '/')) {
7683 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7684 			*ep = '\0';
7685 			dip = pm_name_to_dip(devname, 0);
7686 			if (dip != NULL) {
7687 				/*
7688 				 * Walk up the tree incrementing
7689 				 * devi_pm_noinvolpm
7690 				 */
7691 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7692 				    0, 0, devname, dip);
7693 				break;
7694 			} else {
7695 				pm_record_invol_path(devname,
7696 				    PMC_NO_INVOL, 1, 0, 0, DDI_MAJOR_T_NONE);
7697 			}
7698 		}
7699 	}
7700 	kmem_free(devname, devname_len);
7701 }
7702 
7703 void
7704 pm_cfb_rele(void)
7705 {
7706 	mutex_enter(&pm_cfb_lock);
7707 	/*
7708 	 * this call isn't using the console any  more, it is ok to take it
7709 	 * down if the count goes to 0
7710 	 */
7711 	cfb_inuse--;
7712 	mutex_exit(&pm_cfb_lock);
7713 }
7714 
7715 /*
7716  * software interrupt handler for fbpm; this function exists because we can't
7717  * bring up the frame buffer power from above lock level.  So if we need to,
7718  * we instead schedule a softint that runs this routine and takes us into
7719  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7720  */
7721 static uint_t
7722 pm_cfb_softint(caddr_t int_handler_arg)
7723 {
7724 	_NOTE(ARGUNUSED(int_handler_arg))
7725 	int rval = DDI_INTR_UNCLAIMED;
7726 
7727 	mutex_enter(&pm_cfb_lock);
7728 	if (pm_soft_pending) {
7729 		mutex_exit(&pm_cfb_lock);
7730 		debug_enter((char *)NULL);
7731 		/* acquired in debug_enter before calling pm_cfb_trigger */
7732 		pm_cfb_rele();
7733 		mutex_enter(&pm_cfb_lock);
7734 		pm_soft_pending = 0;
7735 		mutex_exit(&pm_cfb_lock);
7736 		rval = DDI_INTR_CLAIMED;
7737 	} else
7738 		mutex_exit(&pm_cfb_lock);
7739 
7740 	return (rval);
7741 }
7742 
7743 void
7744 pm_cfb_setup_intr(void)
7745 {
7746 	PMD_FUNC(pmf, "cfb_setup_intr")
7747 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7748 	void pm_cfb_check_and_powerup(void);
7749 
7750 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7751 #ifdef PMDDEBUG
7752 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7753 #endif
7754 
7755 	if (!stdout_is_framebuffer) {
7756 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7757 		return;
7758 	}
7759 
7760 	/*
7761 	 * setup software interrupt handler
7762 	 */
7763 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7764 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7765 		panic("pm: unable to register soft intr.");
7766 
7767 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7768 }
7769 
7770 /*
7771  * Checks to see if it is safe to write to the console wrt power management
7772  * (i.e. if the console is a framebuffer, then it must be at full power)
7773  * returns 1 when power is off (power-up is needed)
7774  * returns 0 when power is on (power-up not needed)
7775  */
7776 int
7777 pm_cfb_check_and_hold(void)
7778 {
7779 	/*
7780 	 * cfb_dip is set iff console is a power manageable frame buffer
7781 	 * device
7782 	 */
7783 	extern int modrootloaded;
7784 
7785 	mutex_enter(&pm_cfb_lock);
7786 	cfb_inuse++;
7787 	ASSERT(cfb_inuse);	/* wrap? */
7788 	if (modrootloaded && cfb_dip) {
7789 		/*
7790 		 * don't power down the frame buffer, the prom is using it
7791 		 */
7792 		if (pm_cfb_comps_off) {
7793 			mutex_exit(&pm_cfb_lock);
7794 			return (1);
7795 		}
7796 	}
7797 	mutex_exit(&pm_cfb_lock);
7798 	return (0);
7799 }
7800 
7801 /*
7802  * turn on cfb power (which is known to be off).
7803  * Must be called below lock level!
7804  */
7805 void
7806 pm_cfb_powerup(void)
7807 {
7808 	pm_info_t *info;
7809 	int norm;
7810 	int ccount, ci;
7811 	int unused;
7812 #ifdef DEBUG
7813 	/*
7814 	 * Can't reenter prom_prekern, so suppress pm debug messages
7815 	 * (still go to circular buffer).
7816 	 */
7817 	mutex_enter(&pm_debug_lock);
7818 	pm_divertdebug++;
7819 	mutex_exit(&pm_debug_lock);
7820 #endif
7821 	info = PM_GET_PM_INFO(cfb_dip);
7822 	ASSERT(info);
7823 
7824 	ccount = PM_NUMCMPTS(cfb_dip);
7825 	for (ci = 0; ci < ccount; ci++) {
7826 		norm = pm_get_normal_power(cfb_dip, ci);
7827 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7828 		    PM_CANBLOCK_BYPASS, 0, &unused);
7829 	}
7830 #ifdef DEBUG
7831 	mutex_enter(&pm_debug_lock);
7832 	pm_divertdebug--;
7833 	mutex_exit(&pm_debug_lock);
7834 #endif
7835 }
7836 
7837 /*
7838  * Check if the console framebuffer is powered up.  If not power it up.
7839  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7840  * must be released by calling pm_cfb_rele when the console fb operation
7841  * is completed.
7842  */
7843 void
7844 pm_cfb_check_and_powerup(void)
7845 {
7846 	if (pm_cfb_check_and_hold())
7847 		pm_cfb_powerup();
7848 }
7849 
7850 /*
7851  * Trigger a low level interrupt to power up console frame buffer.
7852  */
7853 void
7854 pm_cfb_trigger(void)
7855 {
7856 	if (cfb_dip == NULL)
7857 		return;
7858 
7859 	mutex_enter(&pm_cfb_lock);
7860 	/*
7861 	 * If machine appears to be hung, pulling the keyboard connector of
7862 	 * the console will cause a high level interrupt and go to debug_enter.
7863 	 * But, if the fb is powered down, this routine will be called to bring
7864 	 * it up (by generating a softint to do the work).  If soft interrupts
7865 	 * are not running, and the keyboard connector is pulled again, the
7866 	 * following code detects this condition and calls panic which allows
7867 	 * the fb to be brought up from high level.
7868 	 *
7869 	 * If two nearly simultaneous calls to debug_enter occur (both from
7870 	 * high level) the code described above will cause a panic.
7871 	 */
7872 	if (lbolt <= pm_soft_pending) {
7873 		panicstr = "pm_cfb_trigger: lbolt not advancing";
7874 		panic(panicstr);	/* does a power up at any intr level */
7875 		/* NOTREACHED */
7876 	}
7877 	pm_soft_pending = lbolt;
7878 	mutex_exit(&pm_cfb_lock);
7879 	ddi_trigger_softintr(pm_soft_id);
7880 }
7881 
7882 static major_t i_path_to_major(char *, char *);
7883 
7884 major_t
7885 pm_path_to_major(char *path)
7886 {
7887 	PMD_FUNC(pmf, "path_to_major")
7888 	char *np, *ap, *bp;
7889 	major_t ret;
7890 	size_t len;
7891 
7892 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7893 
7894 	np = strrchr(path, '/');
7895 	if (np != NULL)
7896 		np++;
7897 	else
7898 		np = path;
7899 	len = strlen(np) + 1;
7900 	bp = kmem_alloc(len, KM_SLEEP);
7901 	(void) strcpy(bp, np);
7902 	if ((ap = strchr(bp, '@')) != NULL) {
7903 		*ap = '\0';
7904 	}
7905 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7906 	ret = i_path_to_major(path, np);
7907 	kmem_free(bp, len);
7908 	return (ret);
7909 }
7910 
7911 #ifdef DEBUG
7912 #ifndef sparc
7913 clock_t pt_sleep = 1;
7914 #endif
7915 
7916 char	*pm_msgp;
7917 char	*pm_bufend;
7918 char	*pm_msgbuf = NULL;
7919 int	pm_logpages = 0x100;
7920 #include <sys/sunldi.h>
7921 #include <sys/uio.h>
7922 clock_t	pm_log_sleep = 1000;
7923 int	pm_extra_cr = 1;
7924 volatile int pm_tty = 1;
7925 
7926 #define	PMLOGPGS	pm_logpages
7927 
7928 #if defined(__x86)
7929 void pm_printf(char *s);
7930 #endif
7931 
7932 /*PRINTFLIKE1*/
7933 void
7934 pm_log(const char *fmt, ...)
7935 {
7936 	va_list adx;
7937 	size_t size;
7938 
7939 	mutex_enter(&pm_debug_lock);
7940 	if (pm_msgbuf == NULL) {
7941 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7942 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7943 		pm_msgp = pm_msgbuf;
7944 	}
7945 	va_start(adx, fmt);
7946 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7947 	va_end(adx);
7948 	va_start(adx, fmt);
7949 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7950 		bzero(pm_msgp, pm_bufend - pm_msgp);
7951 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7952 		if (!pm_divertdebug)
7953 			prom_printf("%s", pm_msgp);
7954 #if defined(__x86)
7955 		if (pm_tty) {
7956 			pm_printf(pm_msgp);
7957 			if (pm_extra_cr)
7958 				pm_printf("\r");
7959 		}
7960 #endif
7961 		pm_msgp = pm_msgbuf + size;
7962 	} else {
7963 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7964 #if defined(__x86)
7965 		if (pm_tty) {
7966 			pm_printf(pm_msgp);
7967 			if (pm_extra_cr)
7968 				pm_printf("\r");
7969 		}
7970 #endif
7971 		if (!pm_divertdebug)
7972 			prom_printf("%s", pm_msgp);
7973 		pm_msgp += size;
7974 	}
7975 	va_end(adx);
7976 	mutex_exit(&pm_debug_lock);
7977 	drv_usecwait((clock_t)pm_log_sleep);
7978 }
7979 #endif	/* DEBUG */
7980 
7981 /*
7982  * We want to save the state of any directly pm'd devices over the suspend/
7983  * resume process so that we can put them back the way the controlling
7984  * process left them.
7985  */
7986 void
7987 pm_save_direct_levels(void)
7988 {
7989 	pm_processes_stopped = 1;
7990 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7991 }
7992 
7993 static int
7994 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
7995 {
7996 	_NOTE(ARGUNUSED(arg))
7997 	int i;
7998 	int *ip;
7999 	pm_info_t *info = PM_GET_PM_INFO(dip);
8000 
8001 	if (!info)
8002 		return (DDI_WALK_CONTINUE);
8003 
8004 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8005 		if (PM_NUMCMPTS(dip) > 2) {
8006 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
8007 			    sizeof (int), KM_SLEEP);
8008 			ip = info->pmi_lp;
8009 		} else {
8010 			ip = info->pmi_levels;
8011 		}
8012 		/* autopm and processes are stopped, ok not to lock power */
8013 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
8014 			*ip++ = PM_CURPOWER(dip, i);
8015 		/*
8016 		 * There is a small window between stopping the
8017 		 * processes and setting pm_processes_stopped where
8018 		 * a driver could get hung up in a pm_raise_power()
8019 		 * call.  Free any such driver now.
8020 		 */
8021 		pm_proceed(dip, PMP_RELEASE, -1, -1);
8022 	}
8023 
8024 	return (DDI_WALK_CONTINUE);
8025 }
8026 
8027 void
8028 pm_restore_direct_levels(void)
8029 {
8030 	/*
8031 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
8032 	 * threads failed) then we don't want to try to restore them
8033 	 */
8034 	if (!pm_processes_stopped)
8035 		return;
8036 
8037 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
8038 	pm_processes_stopped = 0;
8039 }
8040 
8041 static int
8042 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
8043 {
8044 	_NOTE(ARGUNUSED(arg))
8045 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
8046 	int i, nc, result;
8047 	int *ip;
8048 
8049 	pm_info_t *info = PM_GET_PM_INFO(dip);
8050 	if (!info)
8051 		return (DDI_WALK_CONTINUE);
8052 
8053 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8054 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
8055 			ip = &info->pmi_lp[nc - 1];
8056 		} else {
8057 			ip = &info->pmi_levels[nc - 1];
8058 		}
8059 		/*
8060 		 * Because fb drivers fail attempts to turn off the
8061 		 * fb when the monitor is on, but treat a request to
8062 		 * turn on the monitor as a request to turn on the
8063 		 * fb too, we process components in descending order
8064 		 * Because autopm is disabled and processes aren't
8065 		 * running, it is ok to examine current power outside
8066 		 * of the power lock
8067 		 */
8068 		for (i = nc - 1; i >= 0; i--, ip--) {
8069 			if (PM_CURPOWER(dip, i) == *ip)
8070 				continue;
8071 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
8072 			    PM_CANBLOCK_BYPASS, 0, &result) != DDI_SUCCESS) {
8073 				cmn_err(CE_WARN, "cpr: unable "
8074 				    "to restore power level of "
8075 				    "component %d of directly "
8076 				    "power manged device %s@%s"
8077 				    " to %d",
8078 				    i, PM_NAME(dip),
8079 				    PM_ADDR(dip), *ip);
8080 				PMD(PMD_FAIL, ("%s: failed to restore "
8081 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
8082 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
8083 				    PM_CURPOWER(dip, i), *ip, result))
8084 			}
8085 		}
8086 		if (nc > 2) {
8087 			kmem_free(info->pmi_lp, nc * sizeof (int));
8088 			info->pmi_lp = NULL;
8089 		}
8090 	}
8091 	return (DDI_WALK_CONTINUE);
8092 }
8093 
8094 /*
8095  * Stolen from the bootdev module
8096  * attempt to convert a path to a major number
8097  */
8098 static major_t
8099 i_path_to_major(char *path, char *leaf_name)
8100 {
8101 	extern major_t path_to_major(char *pathname);
8102 	major_t maj;
8103 
8104 	if ((maj = path_to_major(path)) == DDI_MAJOR_T_NONE) {
8105 		maj = ddi_name_to_major(leaf_name);
8106 	}
8107 
8108 	return (maj);
8109 }
8110 
8111 static void i_pm_driver_removed(major_t major);
8112 
8113 /*
8114  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
8115  * An entry in the list means that the device is detached, so we need to
8116  * adjust its ancestors as if they had just seen this attach, and any detached
8117  * ancestors need to have their list entries adjusted.
8118  */
8119 void
8120 pm_driver_removed(major_t major)
8121 {
8122 
8123 	/*
8124 	 * Serialize removal of drivers. This is to keep ancestors of
8125 	 * a node that is being deleted from getting deleted and added back
8126 	 * with different counters.
8127 	 */
8128 	mutex_enter(&pm_remdrv_lock);
8129 	i_pm_driver_removed(major);
8130 	mutex_exit(&pm_remdrv_lock);
8131 }
8132 
8133 static void adjust_ancestors(char *, int);
8134 static int pm_is_noinvol_ancestor(pm_noinvol_t *);
8135 static void pm_noinvol_process_ancestors(char *);
8136 
8137 /*
8138  * This routine is called recursively by pm_noinvol_process_ancestors()
8139  */
8140 static void
8141 i_pm_driver_removed(major_t major)
8142 {
8143 	PMD_FUNC(pmf, "driver_removed")
8144 	pm_noinvol_t *ip, *pp = NULL;
8145 	int wasvolpmd;
8146 	ASSERT(major != DDI_MAJOR_T_NONE);
8147 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
8148 again:
8149 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8150 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
8151 		if (major != ip->ni_major)
8152 			continue;
8153 		/*
8154 		 * If it is an ancestor of no-invol node, which is
8155 		 * not removed, skip it. This is to cover the case of
8156 		 * ancestor removed without removing its descendants.
8157 		 */
8158 		if (pm_is_noinvol_ancestor(ip)) {
8159 			ip->ni_flags |= PMC_DRIVER_REMOVED;
8160 			continue;
8161 		}
8162 		wasvolpmd = ip->ni_wasvolpmd;
8163 		/*
8164 		 * remove the entry from the list
8165 		 */
8166 		if (pp) {
8167 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8168 			    pmf, ip->ni_path, pp->ni_path))
8169 			pp->ni_next = ip->ni_next;
8170 		} else {
8171 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8172 			    ip->ni_path))
8173 			ASSERT(pm_noinvol_head == ip);
8174 			pm_noinvol_head = ip->ni_next;
8175 		}
8176 		rw_exit(&pm_noinvol_rwlock);
8177 		adjust_ancestors(ip->ni_path, wasvolpmd);
8178 		/*
8179 		 * Had an ancestor been removed before this node, it would have
8180 		 * been skipped. Adjust the no-invol counters for such skipped
8181 		 * ancestors.
8182 		 */
8183 		pm_noinvol_process_ancestors(ip->ni_path);
8184 		kmem_free(ip->ni_path, ip->ni_size);
8185 		kmem_free(ip, sizeof (*ip));
8186 		goto again;
8187 	}
8188 	rw_exit(&pm_noinvol_rwlock);
8189 }
8190 
8191 /*
8192  * returns 1, if *aip is a ancestor of a no-invol node
8193  *	   0, otherwise
8194  */
8195 static int
8196 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8197 {
8198 	pm_noinvol_t *ip;
8199 
8200 	ASSERT(strlen(aip->ni_path) != 0);
8201 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8202 		if (ip == aip)
8203 			continue;
8204 		/*
8205 		 * To be an ancestor, the path must be an initial substring of
8206 		 * the descendent, and end just before a '/' in the
8207 		 * descendent's path.
8208 		 */
8209 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8210 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8211 			return (1);
8212 	}
8213 	return (0);
8214 }
8215 
8216 /*
8217  * scan through the pm_noinvolpm list adjusting ancestors of the current
8218  * node;  Modifies string *path.
8219  */
8220 static void
8221 adjust_ancestors(char *path, int wasvolpmd)
8222 {
8223 	PMD_FUNC(pmf, "adjust_ancestors")
8224 	char *cp;
8225 	pm_noinvol_t *lp;
8226 	pm_noinvol_t *pp = NULL;
8227 	major_t locked = DDI_MAJOR_T_NONE;
8228 	dev_info_t *dip;
8229 	char	*pathbuf;
8230 	size_t pathbuflen = strlen(path) + 1;
8231 
8232 	/*
8233 	 * First we look up the ancestor's dip.  If we find it, then we
8234 	 * adjust counts up the tree
8235 	 */
8236 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8237 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8238 	(void) strcpy(pathbuf, path);
8239 	cp = strrchr(pathbuf, '/');
8240 	if (cp == NULL)	{
8241 		/* if no ancestors, then nothing to do */
8242 		kmem_free(pathbuf, pathbuflen);
8243 		return;
8244 	}
8245 	*cp = '\0';
8246 	dip = pm_name_to_dip(pathbuf, 1);
8247 	if (dip != NULL) {
8248 		locked = PM_MAJOR(dip);
8249 
8250 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8251 		    path, dip);
8252 
8253 		if (locked != DDI_MAJOR_T_NONE)
8254 			ddi_release_devi(dip);
8255 	} else {
8256 		char *apath;
8257 		size_t len = strlen(pathbuf) + 1;
8258 		int  lock_held = 1;
8259 
8260 		/*
8261 		 * Now check for ancestors that exist only in the list
8262 		 */
8263 		apath = kmem_alloc(len, KM_SLEEP);
8264 		(void) strcpy(apath, pathbuf);
8265 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8266 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8267 			/*
8268 			 * This can only happen once.  Since we have to drop
8269 			 * the lock, we need to extract the relevant info.
8270 			 */
8271 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8272 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8273 				    lp->ni_path, lp->ni_noinvolpm,
8274 				    lp->ni_noinvolpm - 1))
8275 				lp->ni_noinvolpm--;
8276 				if (wasvolpmd && lp->ni_volpmd) {
8277 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8278 					    "%d\n", pmf, lp->ni_path,
8279 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8280 					lp->ni_volpmd--;
8281 				}
8282 				/*
8283 				 * remove the entry from the list, if there
8284 				 * are no more no-invol descendants and node
8285 				 * itself is not a no-invol node.
8286 				 */
8287 				if (!(lp->ni_noinvolpm ||
8288 				    (lp->ni_flags & PMC_NO_INVOL))) {
8289 					ASSERT(lp->ni_volpmd == 0);
8290 					if (pp) {
8291 						PMD(PMD_NOINVOL, ("%s: freeing "
8292 						    "%s, prev is %s\n", pmf,
8293 						    lp->ni_path, pp->ni_path))
8294 						pp->ni_next = lp->ni_next;
8295 					} else {
8296 						PMD(PMD_NOINVOL, ("%s: free %s "
8297 						    "head\n", pmf, lp->ni_path))
8298 						ASSERT(pm_noinvol_head == lp);
8299 						pm_noinvol_head = lp->ni_next;
8300 					}
8301 					lock_held = 0;
8302 					rw_exit(&pm_noinvol_rwlock);
8303 					adjust_ancestors(apath, wasvolpmd);
8304 					/* restore apath */
8305 					(void) strcpy(apath, pathbuf);
8306 					kmem_free(lp->ni_path, lp->ni_size);
8307 					kmem_free(lp, sizeof (*lp));
8308 				}
8309 				break;
8310 			}
8311 		}
8312 		if (lock_held)
8313 			rw_exit(&pm_noinvol_rwlock);
8314 		adjust_ancestors(apath, wasvolpmd);
8315 		kmem_free(apath, len);
8316 	}
8317 	kmem_free(pathbuf, pathbuflen);
8318 }
8319 
8320 /*
8321  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8322  * which were skipped even though their drivers were removed.
8323  */
8324 static void
8325 pm_noinvol_process_ancestors(char *path)
8326 {
8327 	pm_noinvol_t *lp;
8328 
8329 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8330 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8331 		if (strstr(path, lp->ni_path) &&
8332 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8333 			rw_exit(&pm_noinvol_rwlock);
8334 			i_pm_driver_removed(lp->ni_major);
8335 			return;
8336 		}
8337 	}
8338 	rw_exit(&pm_noinvol_rwlock);
8339 }
8340 
8341 /*
8342  * Returns true if (detached) device needs to be kept up because it exported the
8343  * "no-involuntary-power-cycles" property or we're pretending it did (console
8344  * fb case) or it is an ancestor of such a device and has used up the "one
8345  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8346  * upon detach.  In any event, we need an exact hit on the path or we return
8347  * false.
8348  */
8349 int
8350 pm_noinvol_detached(char *path)
8351 {
8352 	PMD_FUNC(pmf, "noinvol_detached")
8353 	pm_noinvol_t *ip;
8354 	int ret = 0;
8355 
8356 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8357 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8358 		if (strcmp(path, ip->ni_path) == 0) {
8359 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8360 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8361 				    "%s\n", pmf, path))
8362 				ret = 1;
8363 				break;
8364 			}
8365 #ifdef	DEBUG
8366 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8367 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8368 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8369 				    path))
8370 #endif
8371 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8372 			break;
8373 		}
8374 	}
8375 	rw_exit(&pm_noinvol_rwlock);
8376 	return (ret);
8377 }
8378 
8379 int
8380 pm_is_cfb(dev_info_t *dip)
8381 {
8382 	return (dip == cfb_dip);
8383 }
8384 
8385 #ifdef	DEBUG
8386 /*
8387  * Return true if all components of the console frame buffer are at
8388  * "normal" power, i.e., fully on.  For the case where the console is not
8389  * a framebuffer, we also return true
8390  */
8391 int
8392 pm_cfb_is_up(void)
8393 {
8394 	return (pm_cfb_comps_off == 0);
8395 }
8396 #endif
8397 
8398 /*
8399  * Preventing scan from powering down the node by incrementing the
8400  * kidsupcnt.
8401  */
8402 void
8403 pm_hold_power(dev_info_t *dip)
8404 {
8405 	e_pm_hold_rele_power(dip, 1);
8406 }
8407 
8408 /*
8409  * Releasing the hold by decrementing the kidsupcnt allowing scan
8410  * to power down the node if all conditions are met.
8411  */
8412 void
8413 pm_rele_power(dev_info_t *dip)
8414 {
8415 	e_pm_hold_rele_power(dip, -1);
8416 }
8417 
8418 /*
8419  * A wrapper of pm_all_to_normal() to power up a dip
8420  * to its normal level
8421  */
8422 int
8423 pm_powerup(dev_info_t *dip)
8424 {
8425 	PMD_FUNC(pmf, "pm_powerup")
8426 
8427 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8428 	ASSERT(!(servicing_interrupt()));
8429 
8430 	/*
8431 	 * in case this node is not already participating pm
8432 	 */
8433 	if (!PM_GET_PM_INFO(dip)) {
8434 		if (!DEVI_IS_ATTACHING(dip))
8435 			return (DDI_SUCCESS);
8436 		if (pm_start(dip) != DDI_SUCCESS)
8437 			return (DDI_FAILURE);
8438 		if (!PM_GET_PM_INFO(dip))
8439 			return (DDI_SUCCESS);
8440 	}
8441 
8442 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8443 }
8444 
8445 int
8446 pm_rescan_walk(dev_info_t *dip, void *arg)
8447 {
8448 	_NOTE(ARGUNUSED(arg))
8449 
8450 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8451 		return (DDI_WALK_CONTINUE);
8452 
8453 	/*
8454 	 * Currently pm_cpr_callb/resume code is the only caller
8455 	 * and it needs to make sure that stopped scan get
8456 	 * reactivated. Otherwise, rescan walk needn't reactive
8457 	 * stopped scan.
8458 	 */
8459 	pm_scan_init(dip);
8460 
8461 	(void) pm_rescan(dip);
8462 	return (DDI_WALK_CONTINUE);
8463 }
8464 
8465 static dev_info_t *
8466 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8467 {
8468 	dev_info_t *wdip, *pdip;
8469 
8470 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8471 		pdip = ddi_get_parent(wdip);
8472 		if (pdip == dip)
8473 			return (wdip);
8474 	}
8475 	return (NULL);
8476 }
8477 
8478 int
8479 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8480     void *arg, void *result)
8481 {
8482 	PMD_FUNC(pmf, "bp_bus_power")
8483 	dev_info_t	*cdip;
8484 	pm_info_t	*cinfo;
8485 	pm_bp_child_pwrchg_t	*bpc;
8486 	pm_sp_misc_t		*pspm;
8487 	pm_bp_nexus_pwrup_t *bpn;
8488 	pm_bp_child_pwrchg_t new_bpc;
8489 	pm_bp_noinvol_t *bpi;
8490 	dev_info_t *tdip;
8491 	char *pathbuf;
8492 	int		ret = DDI_SUCCESS;
8493 	int		errno = 0;
8494 	pm_component_t *cp;
8495 
8496 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8497 	    pm_decode_op(op)))
8498 	switch (op) {
8499 	case BUS_POWER_CHILD_PWRCHG:
8500 		bpc = (pm_bp_child_pwrchg_t *)arg;
8501 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8502 		tdip = bpc->bpc_dip;
8503 		cdip = pm_get_next_descendent(dip, tdip);
8504 		cinfo = PM_GET_PM_INFO(cdip);
8505 		if (cdip != tdip) {
8506 			/*
8507 			 * If the node is an involved parent, it needs to
8508 			 * power up the node as it is needed.  There is nothing
8509 			 * else the framework can do here.
8510 			 */
8511 			if (PM_WANTS_NOTIFICATION(cdip)) {
8512 				PMD(PMD_SET, ("%s: call bus_power for "
8513 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8514 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8515 				    impl_arg, op, arg, result));
8516 			}
8517 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8518 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8519 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8520 			/*
8521 			 * we presume that the parent needs to be up in
8522 			 * order for the child to change state (either
8523 			 * because it must already be on if the child is on
8524 			 * (and the pm_all_to_normal_nexus() will be a nop)
8525 			 * or because it will need to be on for the child
8526 			 * to come on; so we make the call regardless
8527 			 */
8528 			pm_hold_power(cdip);
8529 			if (cinfo) {
8530 				pm_canblock_t canblock = pspm->pspm_canblock;
8531 				ret = pm_all_to_normal_nexus(cdip, canblock);
8532 				if (ret != DDI_SUCCESS) {
8533 					pm_rele_power(cdip);
8534 					return (ret);
8535 				}
8536 			}
8537 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8538 			    PM_DEVICE(cdip)))
8539 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8540 			    result);
8541 			pm_rele_power(cdip);
8542 		} else {
8543 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8544 			    result);
8545 		}
8546 		return (ret);
8547 
8548 	case BUS_POWER_NEXUS_PWRUP:
8549 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8550 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8551 
8552 		if (!e_pm_valid_info(dip, NULL) ||
8553 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8554 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8555 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8556 			    pmf, PM_DEVICE(dip)))
8557 			*pspm->pspm_errnop = EIO;
8558 			*(int *)result = DDI_FAILURE;
8559 			return (DDI_FAILURE);
8560 		}
8561 
8562 		ASSERT(bpn->bpn_dip == dip);
8563 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8564 		    PM_DEVICE(dip)))
8565 		new_bpc.bpc_dip = dip;
8566 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8567 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8568 		new_bpc.bpc_comp = bpn->bpn_comp;
8569 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8570 		new_bpc.bpc_nlevel = bpn->bpn_level;
8571 		new_bpc.bpc_private = bpn->bpn_private;
8572 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8573 		    PM_LEVEL_UPONLY;
8574 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8575 		    &errno;
8576 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8577 		    (void *)&new_bpc, result);
8578 		kmem_free(pathbuf, MAXPATHLEN);
8579 		return (ret);
8580 
8581 	case BUS_POWER_NOINVOL:
8582 		bpi = (pm_bp_noinvol_t *)arg;
8583 		tdip = bpi->bpni_dip;
8584 		cdip = pm_get_next_descendent(dip, tdip);
8585 
8586 		/* In case of rem_drv, the leaf node has been removed */
8587 		if (cdip == NULL)
8588 			return (DDI_SUCCESS);
8589 
8590 		cinfo = PM_GET_PM_INFO(cdip);
8591 		if (cdip != tdip) {
8592 			if (PM_WANTS_NOTIFICATION(cdip)) {
8593 				PMD(PMD_NOINVOL,
8594 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8595 				    pmf, PM_DEVICE(cdip)))
8596 				ret = (*PM_BUS_POWER_FUNC(cdip))
8597 				    (cdip, NULL, op, arg, result);
8598 				if ((cinfo) && (ret == DDI_SUCCESS))
8599 					(void) pm_noinvol_update_node(cdip,
8600 					    bpi);
8601 				return (ret);
8602 			} else {
8603 				PMD(PMD_NOINVOL,
8604 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8605 				    PM_DEVICE(cdip)))
8606 				ret = pm_busop_bus_power(cdip, NULL, op,
8607 				    arg, result);
8608 				/*
8609 				 * Update the current node.
8610 				 */
8611 				if ((cinfo) && (ret == DDI_SUCCESS))
8612 					(void) pm_noinvol_update_node(cdip,
8613 					    bpi);
8614 				return (ret);
8615 			}
8616 		} else {
8617 			/*
8618 			 * For attach, detach, power up:
8619 			 * Do nothing for leaf node since its
8620 			 * counts are already updated.
8621 			 * For CFB and driver removal, since the
8622 			 * path and the target dip passed in is up to and incl.
8623 			 * the immediate ancestor, need to do the update.
8624 			 */
8625 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8626 			    "reached\n", pmf, PM_DEVICE(cdip)))
8627 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8628 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8629 				(void) pm_noinvol_update_node(cdip, bpi);
8630 			return (DDI_SUCCESS);
8631 		}
8632 
8633 	default:
8634 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8635 		return (DDI_FAILURE);
8636 	}
8637 }
8638 
8639 static int
8640 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8641     void *arg, void *resultp)
8642 {
8643 	_NOTE(ARGUNUSED(impl_arg))
8644 	PMD_FUNC(pmf, "bp_set_power")
8645 	pm_ppm_devlist_t *devl = NULL;
8646 	int clevel, circ;
8647 #ifdef	DEBUG
8648 	int circ_db, ccirc_db;
8649 #endif
8650 	int ret = DDI_SUCCESS;
8651 	dev_info_t *cdip;
8652 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8653 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8654 	pm_canblock_t canblock = pspm->pspm_canblock;
8655 	int scan = pspm->pspm_scan;
8656 	int comp = bpc->bpc_comp;
8657 	int olevel = bpc->bpc_olevel;
8658 	int nlevel = bpc->bpc_nlevel;
8659 	int comps_off_incr = 0;
8660 	dev_info_t *pdip = ddi_get_parent(dip);
8661 	int dodeps;
8662 	int direction = pspm->pspm_direction;
8663 	int *errnop = pspm->pspm_errnop;
8664 #ifdef PMDDEBUG
8665 	char *dir = pm_decode_direction(direction);
8666 #endif
8667 	int *iresp = (int *)resultp;
8668 	time_t	idletime, thresh;
8669 	pm_component_t *cp = PM_CP(dip, comp);
8670 	int work_type;
8671 
8672 	*iresp = DDI_SUCCESS;
8673 	*errnop = 0;
8674 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8675 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8676 	    pm_decode_op(op)))
8677 
8678 	/*
8679 	 * The following set of conditions indicate we are here to handle a
8680 	 * driver's pm_[raise|lower]_power request, but the device is being
8681 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8682 	 * we want to pm_block and pass a status back to the caller based
8683 	 * on whether the controlling process's next activity on the device
8684 	 * matches the current request or not.  This distinction tells
8685 	 * downstream functions to avoid calling into a driver or changing
8686 	 * the framework's power state.  To actually block, we need:
8687 	 *
8688 	 * PM_ISDIRECT(dip)
8689 	 *	no reason to block unless a process is directly controlling dev
8690 	 * direction != PM_LEVEL_EXACT
8691 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8692 	 * !pm_processes_stopped
8693 	 *	don't block if controlling proc already be stopped for cpr
8694 	 * canblock != PM_CANBLOCK_BYPASS
8695 	 *	our caller must not have explicitly prevented blocking
8696 	 */
8697 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8698 		PM_LOCK_DIP(dip);
8699 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8700 			/* releases dip lock */
8701 			ret = pm_busop_match_request(dip, bpc);
8702 			if (ret == EAGAIN) {
8703 				PM_LOCK_DIP(dip);
8704 				continue;
8705 			}
8706 			return (*iresp = ret);
8707 		}
8708 		PM_UNLOCK_DIP(dip);
8709 	}
8710 	/* BC device is never scanned, so power will stick until we are done */
8711 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8712 	    direction != PM_LEVEL_DOWNONLY) {
8713 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8714 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8715 		    canblock, 0, resultp) != DDI_SUCCESS) {
8716 			/* *resultp set by pm_set_power */
8717 			return (DDI_FAILURE);
8718 		}
8719 	}
8720 	if (PM_WANTS_NOTIFICATION(pdip)) {
8721 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8722 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8723 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8724 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8725 		if (ret != DDI_SUCCESS) {
8726 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8727 			    pmf, PM_DEVICE(pdip)))
8728 			return (DDI_FAILURE);
8729 		}
8730 	} else {
8731 		/*
8732 		 * Since we don't know what the actual power level is,
8733 		 * we place a power hold on the parent no matter what
8734 		 * component and level is changing.
8735 		 */
8736 		pm_hold_power(pdip);
8737 	}
8738 	PM_LOCK_POWER(dip, &circ);
8739 	clevel = PM_CURPOWER(dip, comp);
8740 	/*
8741 	 * It's possible that a call was made to pm_update_maxpower()
8742 	 * on another thread before we took the lock above. So, we need to
8743 	 * make sure that this request isn't processed after the
8744 	 * change of power executed on behalf of pm_update_maxpower().
8745 	 */
8746 	if (nlevel > pm_get_normal_power(dip, comp)) {
8747 		PMD(PMD_SET, ("%s: requested level is higher than normal.\n",
8748 		    pmf))
8749 		ret = DDI_FAILURE;
8750 		*iresp = DDI_FAILURE;
8751 		goto post_notify;
8752 	}
8753 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8754 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8755 	    clevel, dir))
8756 	switch (direction) {
8757 	case PM_LEVEL_UPONLY:
8758 		/* Powering up */
8759 		if (clevel >= nlevel) {
8760 			PMD(PMD_SET, ("%s: current level is already "
8761 			    "at or above the requested level.\n", pmf))
8762 			*iresp = DDI_SUCCESS;
8763 			ret = DDI_SUCCESS;
8764 			goto post_notify;
8765 		}
8766 		break;
8767 	case PM_LEVEL_EXACT:
8768 		/* specific level request */
8769 		if (clevel == nlevel && !PM_ISBC(dip)) {
8770 			PMD(PMD_SET, ("%s: current level is already "
8771 			    "at the requested level.\n", pmf))
8772 			*iresp = DDI_SUCCESS;
8773 			ret = DDI_SUCCESS;
8774 			goto post_notify;
8775 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8776 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8777 			if (!pm_cfb_enabled) {
8778 				PMD(PMD_ERROR | PMD_CFB,
8779 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8780 				*errnop = EINVAL;
8781 				*iresp = DDI_FAILURE;
8782 				ret = DDI_FAILURE;
8783 				goto post_notify;
8784 			}
8785 			mutex_enter(&pm_cfb_lock);
8786 			while (cfb_inuse) {
8787 				mutex_exit(&pm_cfb_lock);
8788 				if (delay_sig(1) == EINTR) {
8789 					ret = DDI_FAILURE;
8790 					*iresp = DDI_FAILURE;
8791 					*errnop = EINTR;
8792 					goto post_notify;
8793 				}
8794 				mutex_enter(&pm_cfb_lock);
8795 			}
8796 			mutex_exit(&pm_cfb_lock);
8797 		}
8798 		break;
8799 	case PM_LEVEL_DOWNONLY:
8800 		/* Powering down */
8801 		thresh = cur_threshold(dip, comp);
8802 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8803 		if (scan && ((PM_KUC(dip) != 0) ||
8804 		    (cp->pmc_busycount > 0) ||
8805 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8806 #ifdef	DEBUG
8807 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8808 				PMD(PMD_SET, ("%s: scan failed: "
8809 				    "kidsupcnt != 0\n", pmf))
8810 			if (cp->pmc_busycount > 0)
8811 				PMD(PMD_SET, ("%s: scan failed: "
8812 				    "device become busy\n", pmf))
8813 			if (idletime < thresh)
8814 				PMD(PMD_SET, ("%s: scan failed: device "
8815 				    "hasn't been idle long enough\n", pmf))
8816 #endif
8817 			*iresp = DDI_FAILURE;
8818 			*errnop = EBUSY;
8819 			ret = DDI_FAILURE;
8820 			goto post_notify;
8821 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8822 			PMD(PMD_SET, ("%s: current level is already at "
8823 			    "or below the requested level.\n", pmf))
8824 			*iresp = DDI_SUCCESS;
8825 			ret = DDI_SUCCESS;
8826 			goto post_notify;
8827 		}
8828 		break;
8829 	}
8830 
8831 	if (PM_IS_CFB(dip) && (comps_off_incr =
8832 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8833 		/*
8834 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8835 		 * component from full power.  Remember that we tried to
8836 		 * lower power in case it fails and we need to back out
8837 		 * the adjustment.
8838 		 */
8839 		update_comps_off(comps_off_incr, dip);
8840 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8841 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8842 		    pm_cfb_comps_off))
8843 	}
8844 
8845 	if ((*iresp = power_dev(dip,
8846 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8847 #ifdef DEBUG
8848 		/*
8849 		 * All descendents of this node should already be powered off.
8850 		 */
8851 		if (PM_CURPOWER(dip, comp) == 0) {
8852 			pm_desc_pwrchk_t pdpchk;
8853 			pdpchk.pdpc_dip = dip;
8854 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8855 			ndi_devi_enter(dip, &circ_db);
8856 			for (cdip = ddi_get_child(dip); cdip != NULL;
8857 			    cdip = ddi_get_next_sibling(cdip)) {
8858 				ndi_devi_enter(cdip, &ccirc_db);
8859 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8860 				    (void *)&pdpchk);
8861 				ndi_devi_exit(cdip, ccirc_db);
8862 			}
8863 			ndi_devi_exit(dip, circ_db);
8864 		}
8865 #endif
8866 		/*
8867 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8868 		 * back up to full power.
8869 		 */
8870 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8871 			update_comps_off(comps_off_incr, dip);
8872 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8873 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8874 			    comp, clevel, nlevel, pm_cfb_comps_off))
8875 		}
8876 		dodeps = 0;
8877 		if (POWERING_OFF(clevel, nlevel)) {
8878 			if (PM_ISBC(dip)) {
8879 				dodeps = (comp == 0);
8880 			} else {
8881 				int i;
8882 				dodeps = 1;
8883 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8884 					/* if some component still on */
8885 					if (PM_CURPOWER(dip, i)) {
8886 						dodeps = 0;
8887 						break;
8888 					}
8889 				}
8890 			}
8891 			if (dodeps)
8892 				work_type = PM_DEP_WK_POWER_OFF;
8893 		} else if (POWERING_ON(clevel, nlevel)) {
8894 			if (PM_ISBC(dip)) {
8895 				dodeps = (comp == 0);
8896 			} else {
8897 				int i;
8898 				dodeps = 1;
8899 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8900 					if (i == comp)
8901 						continue;
8902 					if (PM_CURPOWER(dip, i) > 0) {
8903 						dodeps = 0;
8904 						break;
8905 					}
8906 				}
8907 			}
8908 			if (dodeps)
8909 				work_type = PM_DEP_WK_POWER_ON;
8910 		}
8911 
8912 		if (dodeps) {
8913 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8914 
8915 			(void) ddi_pathname(dip, pathbuf);
8916 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8917 			    PM_DEP_NOWAIT, NULL, 0);
8918 			kmem_free(pathbuf, MAXPATHLEN);
8919 		}
8920 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8921 			int old;
8922 
8923 			/* If old power cached during deadlock, use it. */
8924 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8925 			    cp->pmc_phc_pwr : olevel);
8926 			mutex_enter(&pm_rsvp_lock);
8927 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8928 			    old, canblock);
8929 			pm_enqueue_notify_others(&devl, canblock);
8930 			mutex_exit(&pm_rsvp_lock);
8931 		} else {
8932 			pm_ppm_devlist_t *p;
8933 			pm_ppm_devlist_t *next;
8934 			for (p = devl; p != NULL; p = next) {
8935 				next = p->ppd_next;
8936 				kmem_free(p, sizeof (pm_ppm_devlist_t));
8937 			}
8938 			devl = NULL;
8939 		}
8940 
8941 		/*
8942 		 * If we are coming from a scan, don't do it again,
8943 		 * else we can have infinite loops.
8944 		 */
8945 		if (!scan)
8946 			pm_rescan(dip);
8947 	} else {
8948 		/* if we incremented pm_comps_off_count, but failed */
8949 		if (comps_off_incr > 0) {
8950 			update_comps_off(-comps_off_incr, dip);
8951 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8952 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8953 			    comp, clevel, nlevel, pm_cfb_comps_off))
8954 		}
8955 		*errnop = EIO;
8956 	}
8957 
8958 post_notify:
8959 	/*
8960 	 * This thread may have been in deadlock with pm_power_has_changed.
8961 	 * Before releasing power lock, clear the flag which marks this
8962 	 * condition.
8963 	 */
8964 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8965 
8966 	/*
8967 	 * Update the old power level in the bus power structure with the
8968 	 * actual power level before the transition was made to the new level.
8969 	 * Some involved parents depend on this information to keep track of
8970 	 * their children's power transition.
8971 	 */
8972 	if (*iresp != DDI_FAILURE)
8973 		bpc->bpc_olevel = clevel;
8974 
8975 	if (PM_WANTS_NOTIFICATION(pdip)) {
8976 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8977 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8978 		PM_UNLOCK_POWER(dip, circ);
8979 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8980 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8981 		    PM_DEVICE(dip), ret))
8982 	} else {
8983 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8984 		PM_UNLOCK_POWER(dip, circ);
8985 		/*
8986 		 * Now that we know what power transition has occurred
8987 		 * (if any), release the power hold.  Leave the hold
8988 		 * in effect in the case of OFF->ON transition.
8989 		 */
8990 		if (!(clevel == 0 && nlevel > 0 &&
8991 		    (!PM_ISBC(dip) || comp == 0)))
8992 			pm_rele_power(pdip);
8993 		/*
8994 		 * If the power transition was an ON->OFF transition,
8995 		 * remove the power hold from the parent.
8996 		 */
8997 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
8998 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
8999 			pm_rele_power(pdip);
9000 	}
9001 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
9002 		return (DDI_FAILURE);
9003 	else
9004 		return (DDI_SUCCESS);
9005 }
9006 
9007 /*
9008  * If an app (SunVTS or Xsun) has taken control, then block until it
9009  * gives it up or makes the requested power level change, unless
9010  * we have other instructions about blocking.  Returns DDI_SUCCESS,
9011  * DDI_FAILURE or EAGAIN (owner released device from directpm).
9012  */
9013 static int
9014 pm_busop_match_request(dev_info_t *dip, void *arg)
9015 {
9016 	PMD_FUNC(pmf, "bp_match_request")
9017 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
9018 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
9019 	int comp = bpc->bpc_comp;
9020 	int nlevel = bpc->bpc_nlevel;
9021 	pm_canblock_t canblock = pspm->pspm_canblock;
9022 	int direction = pspm->pspm_direction;
9023 	int clevel, circ;
9024 
9025 	ASSERT(PM_IAM_LOCKING_DIP(dip));
9026 	PM_LOCK_POWER(dip, &circ);
9027 	clevel = PM_CURPOWER(dip, comp);
9028 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
9029 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
9030 	if (direction == PM_LEVEL_UPONLY) {
9031 		if (clevel >= nlevel) {
9032 			PM_UNLOCK_POWER(dip, circ);
9033 			PM_UNLOCK_DIP(dip);
9034 			return (DDI_SUCCESS);
9035 		}
9036 	} else if (clevel == nlevel) {
9037 		PM_UNLOCK_POWER(dip, circ);
9038 		PM_UNLOCK_DIP(dip);
9039 		return (DDI_SUCCESS);
9040 	}
9041 	if (canblock == PM_CANBLOCK_FAIL) {
9042 		PM_UNLOCK_POWER(dip, circ);
9043 		PM_UNLOCK_DIP(dip);
9044 		return (DDI_FAILURE);
9045 	}
9046 	if (canblock == PM_CANBLOCK_BLOCK) {
9047 		/*
9048 		 * To avoid a deadlock, we must not hold the
9049 		 * power lock when we pm_block.
9050 		 */
9051 		PM_UNLOCK_POWER(dip, circ);
9052 		PMD(PMD_SET, ("%s: blocking\n", pmf))
9053 		/* pm_block releases dip lock */
9054 		switch (pm_block(dip, comp, nlevel, clevel)) {
9055 		case PMP_RELEASE:
9056 			return (EAGAIN);
9057 		case PMP_SUCCEED:
9058 			return (DDI_SUCCESS);
9059 		case PMP_FAIL:
9060 			return (DDI_FAILURE);
9061 		}
9062 	} else {
9063 		ASSERT(0);
9064 	}
9065 	_NOTE(NOTREACHED);
9066 	return (DDI_FAILURE);	/* keep gcc happy */
9067 }
9068 
9069 static int
9070 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
9071 {
9072 	PMD_FUNC(pmf, "all_to_normal_nexus")
9073 	int		*normal;
9074 	int		i, ncomps;
9075 	size_t		size;
9076 	int		changefailed = 0;
9077 	int		ret, result = DDI_SUCCESS;
9078 	pm_bp_nexus_pwrup_t	bpn;
9079 	pm_sp_misc_t	pspm;
9080 
9081 	ASSERT(PM_GET_PM_INFO(dip));
9082 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9083 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
9084 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
9085 		return (DDI_FAILURE);
9086 	}
9087 	ncomps = PM_NUMCMPTS(dip);
9088 	for (i = 0; i < ncomps; i++) {
9089 		bpn.bpn_dip = dip;
9090 		bpn.bpn_comp = i;
9091 		bpn.bpn_level = normal[i];
9092 		pspm.pspm_canblock = canblock;
9093 		pspm.pspm_scan = 0;
9094 		bpn.bpn_private = &pspm;
9095 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
9096 		    (void *)&bpn, (void *)&result);
9097 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
9098 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
9099 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
9100 			    i, normal[i], result))
9101 			changefailed++;
9102 		}
9103 	}
9104 	kmem_free(normal, size);
9105 	if (changefailed) {
9106 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
9107 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
9108 		return (DDI_FAILURE);
9109 	}
9110 	return (DDI_SUCCESS);
9111 }
9112 
9113 int
9114 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
9115     dev_info_t *tdip)
9116 {
9117 	PMD_FUNC(pmf, "noinvol_update")
9118 	pm_bp_noinvol_t args;
9119 	int ret;
9120 	int result = DDI_SUCCESS;
9121 
9122 	args.bpni_path = path;
9123 	args.bpni_dip = tdip;
9124 	args.bpni_cmd = subcmd;
9125 	args.bpni_wasvolpmd = wasvolpmd;
9126 	args.bpni_volpmd = volpmd;
9127 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
9128 	    "volpmd %d wasvolpmd %d\n", pmf,
9129 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
9130 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
9131 	    &args, &result);
9132 	return (ret);
9133 }
9134 
9135 void
9136 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
9137 {
9138 	PMD_FUNC(pmf, "noinvol_update_node")
9139 
9140 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9141 	switch (req->bpni_cmd) {
9142 	case PM_BP_NOINVOL_ATTACH:
9143 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
9144 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9145 		    DEVI(dip)->devi_pm_noinvolpm,
9146 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9147 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9148 		PM_LOCK_DIP(dip);
9149 		DEVI(dip)->devi_pm_noinvolpm--;
9150 		if (req->bpni_wasvolpmd) {
9151 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
9152 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9153 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9154 			    DEVI(dip)->devi_pm_volpmd - 1))
9155 			if (DEVI(dip)->devi_pm_volpmd)
9156 				DEVI(dip)->devi_pm_volpmd--;
9157 		}
9158 		PM_UNLOCK_DIP(dip);
9159 		break;
9160 
9161 	case PM_BP_NOINVOL_DETACH:
9162 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
9163 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
9164 		    DEVI(dip)->devi_pm_noinvolpm,
9165 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9166 		PM_LOCK_DIP(dip);
9167 		DEVI(dip)->devi_pm_noinvolpm++;
9168 		if (req->bpni_wasvolpmd) {
9169 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
9170 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9171 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9172 			    DEVI(dip)->devi_pm_volpmd + 1))
9173 			DEVI(dip)->devi_pm_volpmd++;
9174 		}
9175 		PM_UNLOCK_DIP(dip);
9176 		break;
9177 
9178 	case PM_BP_NOINVOL_REMDRV:
9179 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9180 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9181 		    DEVI(dip)->devi_pm_noinvolpm,
9182 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9183 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9184 		PM_LOCK_DIP(dip);
9185 		DEVI(dip)->devi_pm_noinvolpm--;
9186 		if (req->bpni_wasvolpmd) {
9187 			PMD(PMD_NOINVOL,
9188 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9189 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9190 			    DEVI(dip)->devi_pm_volpmd,
9191 			    DEVI(dip)->devi_pm_volpmd - 1))
9192 			/*
9193 			 * A power up could come in between and
9194 			 * clear the volpmd, if that's the case,
9195 			 * volpmd would be clear.
9196 			 */
9197 			if (DEVI(dip)->devi_pm_volpmd)
9198 				DEVI(dip)->devi_pm_volpmd--;
9199 		}
9200 		PM_UNLOCK_DIP(dip);
9201 		break;
9202 
9203 	case PM_BP_NOINVOL_CFB:
9204 		PMD(PMD_NOINVOL,
9205 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9206 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9207 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9208 		PM_LOCK_DIP(dip);
9209 		DEVI(dip)->devi_pm_noinvolpm++;
9210 		PM_UNLOCK_DIP(dip);
9211 		break;
9212 
9213 	case PM_BP_NOINVOL_POWER:
9214 		PMD(PMD_NOINVOL,
9215 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9216 		    pmf, PM_DEVICE(dip),
9217 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9218 		    req->bpni_volpmd))
9219 		PM_LOCK_DIP(dip);
9220 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9221 		PM_UNLOCK_DIP(dip);
9222 		break;
9223 
9224 	default:
9225 		break;
9226 	}
9227 
9228 }
9229 
9230 #ifdef DEBUG
9231 static int
9232 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9233 {
9234 	PMD_FUNC(pmf, "desc_pwrchk")
9235 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9236 	pm_info_t *info = PM_GET_PM_INFO(dip);
9237 	int i;
9238 	/* LINTED */
9239 	int curpwr, ce_level;
9240 
9241 	if (!info)
9242 		return (DDI_WALK_CONTINUE);
9243 
9244 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9245 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9246 		/* LINTED */
9247 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9248 			continue;
9249 		/* E_FUNC_SET_NOT_USED */
9250 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9251 		    CE_WARN;
9252 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9253 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9254 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9255 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9256 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9257 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9258 	}
9259 	return (DDI_WALK_CONTINUE);
9260 }
9261 #endif
9262 
9263 /*
9264  * Record the fact that one thread is borrowing the lock on a device node.
9265  * Use is restricted to the case where the lending thread will block until
9266  * the borrowing thread (always curthread) completes.
9267  */
9268 void
9269 pm_borrow_lock(kthread_t *lender)
9270 {
9271 	lock_loan_t *prev = &lock_loan_head;
9272 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9273 
9274 	cur->pmlk_borrower = curthread;
9275 	cur->pmlk_lender = lender;
9276 	mutex_enter(&pm_loan_lock);
9277 	cur->pmlk_next = prev->pmlk_next;
9278 	prev->pmlk_next = cur;
9279 	mutex_exit(&pm_loan_lock);
9280 }
9281 
9282 /*
9283  * Return the borrowed lock.  A thread can borrow only one.
9284  */
9285 void
9286 pm_return_lock(void)
9287 {
9288 	lock_loan_t *cur;
9289 	lock_loan_t *prev = &lock_loan_head;
9290 
9291 	mutex_enter(&pm_loan_lock);
9292 	ASSERT(prev->pmlk_next != NULL);
9293 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9294 		if (cur->pmlk_borrower == curthread)
9295 			break;
9296 
9297 	ASSERT(cur != NULL);
9298 	prev->pmlk_next = cur->pmlk_next;
9299 	mutex_exit(&pm_loan_lock);
9300 	kmem_free(cur, sizeof (*cur));
9301 }
9302 
9303 #if defined(__x86)
9304 
9305 #define	CPR_RXR	0x1
9306 #define	CPR_TXR	0x20
9307 #define	CPR_DATAREG	0x3f8
9308 #define	CPR_LSTAT	0x3fd
9309 #define	CPR_INTRCTL	0x3f9
9310 
9311 char
9312 pm_getchar(void)
9313 {
9314 	while ((inb(CPR_LSTAT) & CPR_RXR) != CPR_RXR)
9315 		drv_usecwait(10);
9316 
9317 	return (inb(CPR_DATAREG));
9318 
9319 }
9320 
9321 void
9322 pm_putchar(char c)
9323 {
9324 	while ((inb(CPR_LSTAT) & CPR_TXR) == 0)
9325 		drv_usecwait(10);
9326 
9327 	outb(CPR_DATAREG, c);
9328 }
9329 
9330 void
9331 pm_printf(char *s)
9332 {
9333 	while (*s) {
9334 		pm_putchar(*s++);
9335 	}
9336 }
9337 
9338 #endif
9339 
9340 int
9341 pm_ppm_searchlist(pm_searchargs_t *sp)
9342 {
9343 	power_req_t power_req;
9344 	int result = 0;
9345 	/* LINTED */
9346 	int ret;
9347 
9348 	power_req.request_type = PMR_PPM_SEARCH_LIST;
9349 	power_req.req.ppm_search_list_req.searchlist = sp;
9350 	ASSERT(DEVI(ddi_root_node())->devi_pm_ppm);
9351 	ret = pm_ctlops((dev_info_t *)DEVI(ddi_root_node())->devi_pm_ppm,
9352 	    ddi_root_node(), DDI_CTLOPS_POWER, &power_req, &result);
9353 	PMD(PMD_SX, ("pm_ppm_searchlist returns %d, result %d\n",
9354 	    ret, result))
9355 	return (result);
9356 }
9357