xref: /illumos-gate/usr/src/uts/common/os/sunpm.c (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
27  */
28 
29 /*
30  * sunpm.c builds sunpm.o	"power management framework"
31  *	kernel-resident power management code.  Implements power management
32  *	policy
33  *	Assumes: all backwards compat. device components wake up on &
34  *		 the pm_info pointer in dev_info is initially NULL
35  *
36  * PM - (device) Power Management
37  *
38  * Each device may have 0 or more components.  If a device has no components,
39  * then it can't be power managed.  Each component has 2 or more
40  * power states.
41  *
42  * "Backwards Compatible" (bc) devices:
43  * There are two different types of devices from the point of view of this
44  * code.  The original type, left over from the original PM implementation on
45  * the voyager platform are known in this code as "backwards compatible"
46  * devices (PM_ISBC(dip) returns true).
47  * They are recognized by the pm code by the lack of a pm-components property
48  * and a call made by the driver to pm_create_components(9F).
49  * For these devices, component 0 is special, and represents the power state
50  * of the device.  If component 0 is to be set to power level 0 (off), then
51  * the framework must first call into the driver's detach(9E) routine with
52  * DDI_PM_SUSPEND, to get the driver to save the hardware state of the device.
53  * After setting component 0 from 0 to a non-zero power level, a call must be
54  * made into the driver's attach(9E) routine with DDI_PM_RESUME.
55  *
56  * Currently, the only way to get a bc device power managed is via a set of
57  * ioctls (PM_DIRECT_PM, PM_SET_CURRENT_POWER) issued to /dev/pm.
58  *
59  * For non-bc devices, the driver describes the components by exporting a
60  * pm-components(9P) property that tells how many components there are,
61  * tells what each component's power state values are, and provides human
62  * readable strings (currently unused) for each component name and power state.
63  * Devices which export pm-components(9P) are automatically power managed
64  * whenever autopm is enabled (via PM_START_PM ioctl issued by pmconfig(1M)
65  * after parsing power.conf(4)). The exception to this rule is that power
66  * manageable CPU devices may be automatically managed independently of autopm
67  * by either enabling or disabling (via PM_START_CPUPM and PM_STOP_CPUPM
68  * ioctls) cpupm. If the CPU devices are not managed independently, then they
69  * are managed by autopm. In either case, for automatically power managed
70  * devices, all components are considered independent of each other, and it is
71  * up to the driver to decide when a transition requires saving or restoring
72  * hardware state.
73  *
74  * Each device component also has a threshold time associated with each power
75  * transition (see power.conf(4)), and a busy/idle state maintained by the
76  * driver calling pm_idle_component(9F) and pm_busy_component(9F).
77  * Components are created idle.
78  *
79  * The PM framework provides several functions:
80  * -implement PM policy as described in power.conf(4)
81  *  Policy is set by pmconfig(1M) issuing pm ioctls based on power.conf(4).
82  *  Policies consist of:
83  *    -set threshold values (defaults if none provided by pmconfig)
84  *    -set dependencies among devices
85  *    -enable/disable autopm
86  *    -enable/disable cpupm
87  *    -turn down idle components based on thresholds (if autopm or cpupm is
88  *     enabled) (aka scanning)
89  *    -maintain power states based on dependencies among devices
90  *    -upon request, or when the frame buffer powers off, attempt to turn off
91  *     all components that are idle or become idle over the next (10 sec)
92  *     period in an attempt to get down to an EnergyStar compliant state
93  *    -prevent powering off of a device which exported the
94  *     pm-no-involuntary-power-cycles property without active involvement of
95  *     the device's driver (so no removing power when the device driver is
96  *     not attached)
97  * -provide a mechanism for a device driver to request that a device's component
98  *  be brought back to the power level necessary for the use of the device
99  * -allow a process to directly control the power levels of device components
100  *  (via ioctls issued to /dev/pm--see usr/src/uts/common/io/pm.c)
101  * -ensure that the console frame buffer is powered up before being referenced
102  *  via prom_printf() or other prom calls that might generate console output
103  * -maintain implicit dependencies (e.g. parent must be powered up if child is)
104  * -provide "backwards compatible" behavior for devices without pm-components
105  *  property
106  *
107  * Scanning:
108  * Whenever autopm or cpupm  is enabled, the framework attempts to bring each
109  * component of each managed device to its lowest power based on the threshold
110  * of idleness associated with each transition and the busy/idle state of the
111  * component.
112  *
113  * The actual work of this is done by pm_scan_dev(), which cycles through each
114  * component of a device, checking its idleness against its current threshold,
115  * and calling pm_set_power() as appropriate to change the power level.
116  * This function also indicates when it would next be profitable to scan the
117  * device again, and a new scan is scheduled after that time.
118  *
119  * Dependencies:
120  * It is possible to establish a dependency between the power states of two
121  * otherwise unrelated devices.  This is currently done to ensure that the
122  * cdrom is always up whenever the console framebuffer is up, so that the user
123  * can insert a cdrom and see a popup as a result.
124  *
125  * The dependency terminology used in power.conf(4) is not easy to understand,
126  * so we've adopted a different terminology in the implementation.  We write
127  * of a "keeps up" and a "kept up" device.  A relationship can be established
128  * where one device keeps up another.  That means that if the keepsup device
129  * has any component that is at a non-zero power level, all components of the
130  * "kept up" device must be brought to full power.  This relationship is
131  * asynchronous.  When the keeping device is powered up, a request is queued
132  * to a worker thread to bring up the kept device.  The caller does not wait.
133  * Scan will not turn down a kept up device.
134  *
135  * Direct PM:
136  * A device may be directly power managed by a process.  If a device is
137  * directly pm'd, then it will not be scanned, and dependencies will not be
138  * enforced.  * If a directly pm'd device's driver requests a power change (via
139  * pm_raise_power(9F)), then the request is blocked and notification is sent
140  * to the controlling process, which must issue the requested power change for
141  * the driver to proceed.
142  *
143  */
144 
145 #include <sys/types.h>
146 #include <sys/errno.h>
147 #include <sys/callb.h>		/* callback registration during CPR */
148 #include <sys/conf.h>		/* driver flags and functions */
149 #include <sys/open.h>		/* OTYP_CHR definition */
150 #include <sys/stat.h>		/* S_IFCHR definition */
151 #include <sys/pathname.h>	/* name -> dev_info xlation */
152 #include <sys/ddi_impldefs.h>	/* dev_info node fields */
153 #include <sys/kmem.h>		/* memory alloc stuff */
154 #include <sys/debug.h>
155 #include <sys/archsystm.h>
156 #include <sys/pm.h>
157 #include <sys/ddi.h>
158 #include <sys/sunddi.h>
159 #include <sys/sunndi.h>
160 #include <sys/sunpm.h>
161 #include <sys/epm.h>
162 #include <sys/vfs.h>
163 #include <sys/mode.h>
164 #include <sys/mkdev.h>
165 #include <sys/promif.h>
166 #include <sys/consdev.h>
167 #include <sys/esunddi.h>
168 #include <sys/modctl.h>
169 #include <sys/fs/ufs_fs.h>
170 #include <sys/note.h>
171 #include <sys/taskq.h>
172 #include <sys/bootconf.h>
173 #include <sys/reboot.h>
174 #include <sys/spl.h>
175 #include <sys/disp.h>
176 #include <sys/sobject.h>
177 #include <sys/sunmdi.h>
178 #include <sys/systm.h>
179 #include <sys/cpuvar.h>
180 #include <sys/cyclic.h>
181 #include <sys/uadmin.h>
182 #include <sys/srn.h>
183 
184 
185 /*
186  * PM LOCKING
187  *	The list of locks:
188  * Global pm mutex locks.
189  *
190  * pm_scan_lock:
191  *		It protects the timeout id of the scan thread, and the value
192  *		of autopm_enabled and cpupm.  This lock is not held
193  *		concurrently with any other PM locks.
194  *
195  * pm_clone_lock:	Protects the clone list and count of poll events
196  *		pending for the pm driver.
197  *		Lock ordering:
198  *			pm_clone_lock -> pm_pscc_interest_rwlock,
199  *			pm_clone_lock -> pm_pscc_direct_rwlock.
200  *
201  * pm_rsvp_lock:
202  *		Used to synchronize the data structures used for processes
203  *		to rendezvous with state change information when doing
204  *		direct PM.
205  *		Lock ordering:
206  *			pm_rsvp_lock -> pm_pscc_interest_rwlock,
207  *			pm_rsvp_lock -> pm_pscc_direct_rwlock,
208  *			pm_rsvp_lock -> pm_clone_lock.
209  *
210  * ppm_lock:	protects the list of registered ppm drivers
211  *		Lock ordering:
212  *			ppm_lock -> ppm driver unit_lock
213  *
214  * pm_compcnt_lock:
215  *		Protects count of components that are not at their lowest
216  *		power level.
217  *		Lock ordering:
218  *			pm_compcnt_lock -> ppm_lock.
219  *
220  * pm_dep_thread_lock:
221  *		Protects work list for pm_dep_thread.  Not taken concurrently
222  *		with any other pm lock.
223  *
224  * pm_remdrv_lock:
225  *		Serializes the operation of removing noinvol data structure
226  *		entries for a branch of the tree when a driver has been
227  *		removed from the system (modctl_rem_major).
228  *		Lock ordering:
229  *			pm_remdrv_lock -> pm_noinvol_rwlock.
230  *
231  * pm_cfb_lock: (High level spin lock)
232  *		Protects the count of how many components of the console
233  *		frame buffer are off (so we know if we have to bring up the
234  *		console as a result of a prom_printf, etc.
235  *		No other locks are taken while holding this lock.
236  *
237  * pm_loan_lock:
238  *		Protects the lock_loan list.  List is used to record that one
239  *		thread has acquired a power lock but has launched another thread
240  *		to complete its processing.  An entry in the list indicates that
241  *		the worker thread can borrow the lock held by the other thread,
242  *		which must block on the completion of the worker.  Use is
243  *		specific to module loading.
244  *		No other locks are taken while holding this lock.
245  *
246  * Global PM rwlocks
247  *
248  * pm_thresh_rwlock:
249  *		Protects the list of thresholds recorded for future use (when
250  *		devices attach).
251  *		Lock ordering:
252  *			pm_thresh_rwlock -> devi_pm_lock
253  *
254  * pm_noinvol_rwlock:
255  *		Protects list of detached nodes that had noinvol registered.
256  *		No other PM locks are taken while holding pm_noinvol_rwlock.
257  *
258  * pm_pscc_direct_rwlock:
259  *		Protects the list that maps devices being directly power
260  *		managed to the processes that manage them.
261  *		Lock ordering:
262  *			pm_pscc_direct_rwlock -> psce_lock
263  *
264  * pm_pscc_interest_rwlock;
265  *		Protects the list that maps state change events to processes
266  *		that want to know about them.
267  *		Lock ordering:
268  *			pm_pscc_interest_rwlock -> psce_lock
269  *
270  * per-dip locks:
271  *
272  * Each node has these per-dip locks, which are only used if the device is
273  * a candidate for power management (e.g. has pm components)
274  *
275  * devi_pm_lock:
276  *		Protects all power management state of the node except for
277  *		power level, which is protected by ndi_devi_enter().
278  *		Encapsulated in macros PM_LOCK_DIP()/PM_UNLOCK_DIP().
279  *		Lock ordering:
280  *			devi_pm_lock -> pm_rsvp_lock,
281  *			devi_pm_lock -> pm_dep_thread_lock,
282  *			devi_pm_lock -> pm_noinvol_rwlock,
283  *			devi_pm_lock -> power lock
284  *
285  * power lock (ndi_devi_enter()):
286  *		Since changing power level is possibly a slow operation (30
287  *		seconds to spin up a disk drive), this is locked separately.
288  *		Since a call into the driver to change the power level of one
289  *		component may result in a call back into the framework to change
290  *		the power level of another, this lock allows re-entrancy by
291  *		the same thread (ndi_devi_enter is used for this because
292  *		the USB framework uses ndi_devi_enter in its power entry point,
293  *		and use of any other lock would produce a deadlock.
294  *
295  * devi_pm_busy_lock:
296  *		This lock protects the integrity of the busy count.  It is
297  *		only taken by pm_busy_component() and pm_idle_component and
298  *		some code that adjust the busy time after the timer gets set
299  *		up or after a CPR operation.  It is per-dip to keep from
300  *		single-threading all the disk drivers on a system.
301  *		It could be per component instead, but most devices have
302  *		only one component.
303  *		No other PM locks are taken while holding this lock.
304  *
305  */
306 
307 static int stdout_is_framebuffer;
308 static kmutex_t	e_pm_power_lock;
309 static kmutex_t pm_loan_lock;
310 kmutex_t	pm_scan_lock;
311 callb_id_t	pm_cpr_cb_id;
312 callb_id_t	pm_panic_cb_id;
313 callb_id_t	pm_halt_cb_id;
314 int		pm_comps_notlowest;	/* no. of comps not at lowest power */
315 int		pm_powering_down;	/* cpr is source of DDI_SUSPEND calls */
316 
317 clock_t pm_id_ticks = 5;	/* ticks to wait before scan during idle-down */
318 clock_t pm_default_min_scan = PM_DEFAULT_MIN_SCAN;
319 clock_t pm_cpu_min_scan = PM_CPU_MIN_SCAN;
320 
321 #define	PM_MIN_SCAN(dip)	(PM_ISCPU(dip) ? pm_cpu_min_scan : \
322 				    pm_default_min_scan)
323 
324 static int pm_busop_set_power(dev_info_t *,
325     void *, pm_bus_power_op_t, void *, void *);
326 static int pm_busop_match_request(dev_info_t *, void *);
327 static int pm_all_to_normal_nexus(dev_info_t *, pm_canblock_t);
328 static void e_pm_set_max_power(dev_info_t *, int, int);
329 static int e_pm_get_max_power(dev_info_t *, int);
330 
331 /*
332  * Dependency Processing is done thru a seperate thread.
333  */
334 kmutex_t	pm_dep_thread_lock;
335 kcondvar_t	pm_dep_thread_cv;
336 pm_dep_wk_t	*pm_dep_thread_workq = NULL;
337 pm_dep_wk_t	*pm_dep_thread_tail = NULL;
338 
339 /*
340  * Autopm  must be turned on by a PM_START_PM ioctl, so we don't end up
341  * power managing things in single user mode that have been suppressed via
342  * power.conf entries.  Protected by pm_scan_lock.
343  */
344 int		autopm_enabled;
345 
346 /*
347  * cpupm is turned on and off, by the PM_START_CPUPM and PM_STOP_CPUPM ioctls,
348  * to define the power management behavior of CPU devices separate from
349  * autopm. Protected by pm_scan_lock.
350  */
351 pm_cpupm_t	cpupm = PM_CPUPM_NOTSET;
352 
353 /*
354  * Defines the default mode of operation for CPU power management,
355  * either the polling implementation, or the event based dispatcher driven
356  * implementation.
357  */
358 pm_cpupm_t	cpupm_default_mode = PM_CPUPM_EVENT;
359 
360 /*
361  * AutoS3 depends on autopm being enabled, and must be enabled by
362  * PM_START_AUTOS3 command.
363  */
364 int		autoS3_enabled;
365 
366 #if !defined(__sparc)
367 /*
368  * on sparc these live in fillsysinfo.c
369  *
370  * If this variable is non-zero, cpr should return "not supported" when
371  * it is queried even though it would normally be supported on this platform.
372  */
373 int cpr_supported_override;
374 
375 /*
376  * Some platforms may need to support CPR even in the absence of
377  * having the correct platform id information.  If this
378  * variable is non-zero, cpr should proceed even in the absence
379  * of otherwise being qualified.
380  */
381 int cpr_platform_enable = 0;
382 
383 #endif
384 
385 /*
386  * pm_S3_enabled indicates that we believe the platform can support S3,
387  * which we get from pmconfig(1M)
388  */
389 int		pm_S3_enabled;
390 
391 /*
392  * This flag is true while processes are stopped for a checkpoint/resume.
393  * Controlling processes of direct pm'd devices are not available to
394  * participate in power level changes, so we bypass them when this is set.
395  */
396 static int	pm_processes_stopped;
397 
398 #ifdef	DEBUG
399 
400 /*
401  * see common/sys/epm.h for PMD_* values
402  */
403 
404 uint_t		pm_debug = 0;
405 
406 /*
407  * If pm_divertdebug is set, then no prom_printf calls will be made by
408  * PMD(), which will prevent debug output from bringing up the console
409  * frame buffer.  Clearing this variable before setting pm_debug will result
410  * in PMD output going to the console.
411  *
412  * pm_divertdebug is incremented in pm_set_power() if dip == cfb_dip to avoid
413  * deadlocks and decremented at the end of pm_set_power()
414  */
415 uint_t		pm_divertdebug = 1;
416 volatile uint_t pm_debug_to_console = 0;
417 kmutex_t	pm_debug_lock;		/* protects pm_divertdebug */
418 
419 void prdeps(char *);
420 #endif
421 
422 /* Globals */
423 
424 /*
425  * List of recorded thresholds and dependencies
426  */
427 pm_thresh_rec_t *pm_thresh_head;
428 krwlock_t pm_thresh_rwlock;
429 
430 pm_pdr_t *pm_dep_head;
431 static int pm_unresolved_deps = 0;
432 static int pm_prop_deps = 0;
433 
434 /*
435  * List of devices that exported no-involuntary-power-cycles property
436  */
437 pm_noinvol_t *pm_noinvol_head;
438 
439 /*
440  * Locks used in noinvol processing
441  */
442 krwlock_t pm_noinvol_rwlock;
443 kmutex_t pm_remdrv_lock;
444 
445 int pm_default_idle_threshold = PM_DEFAULT_SYS_IDLENESS;
446 int pm_system_idle_threshold;
447 int pm_cpu_idle_threshold;
448 
449 /*
450  * By default nexus has 0 threshold, and depends on its children to keep it up
451  */
452 int pm_default_nexus_threshold = 0;
453 
454 /*
455  * Data structures shared with common/io/pm.c
456  */
457 kmutex_t	pm_clone_lock;
458 kcondvar_t	pm_clones_cv[PM_MAX_CLONE];
459 uint_t		pm_poll_cnt[PM_MAX_CLONE];	/* count of events for poll */
460 unsigned char	pm_interest[PM_MAX_CLONE];
461 struct pollhead	pm_pollhead;
462 
463 /*
464  * Data structures shared with common/io/srn.c
465  */
466 kmutex_t	srn_clone_lock;		/* protects srn_signal, srn_inuse */
467 void (*srn_signal)(int type, int event);
468 int srn_inuse;				/* stop srn detach */
469 
470 extern int	hz;
471 extern char	*platform_module_list[];
472 
473 /*
474  * Wrappers for use in ddi_walk_devs
475  */
476 
477 static int		pm_set_dev_thr_walk(dev_info_t *, void *);
478 static int		pm_restore_direct_lvl_walk(dev_info_t *, void *);
479 static int		pm_save_direct_lvl_walk(dev_info_t *, void *);
480 static int		pm_discard_dep_walk(dev_info_t *, void *);
481 #ifdef DEBUG
482 static int		pm_desc_pwrchk_walk(dev_info_t *, void *);
483 #endif
484 
485 /*
486  * Routines for managing noinvol devices
487  */
488 int			pm_noinvol_update(int, int, int, char *, dev_info_t *);
489 void			pm_noinvol_update_node(dev_info_t *,
490 			    pm_bp_noinvol_t *req);
491 
492 kmutex_t pm_rsvp_lock;
493 kmutex_t pm_compcnt_lock;
494 krwlock_t pm_pscc_direct_rwlock;
495 krwlock_t pm_pscc_interest_rwlock;
496 
497 #define	PSC_INTEREST	0	/* belongs to interest psc list */
498 #define	PSC_DIRECT	1	/* belongs to direct psc list */
499 
500 pscc_t *pm_pscc_interest;
501 pscc_t *pm_pscc_direct;
502 
503 #define	PM_MAJOR(dip) ddi_driver_major(dip)
504 #define	PM_IS_NEXUS(dip) ((PM_MAJOR(dip) == DDI_MAJOR_T_NONE) ? 0 : \
505 	NEXUS_DRV(devopsp[PM_MAJOR(dip)]))
506 #define	POWERING_ON(old, new) ((old) == 0 && (new) != 0)
507 #define	POWERING_OFF(old, new) ((old) != 0 && (new) == 0)
508 
509 #define	PM_INCR_NOTLOWEST(dip) {					\
510 	mutex_enter(&pm_compcnt_lock);					\
511 	if (!PM_IS_NEXUS(dip) ||					\
512 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
513 		if (pm_comps_notlowest == 0)				\
514 			pm_ppm_notify_all_lowest(dip, PM_NOT_ALL_LOWEST);\
515 		pm_comps_notlowest++;					\
516 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr notlowest->%d\n",\
517 		    pmf, PM_DEVICE(dip), pm_comps_notlowest))		\
518 	}								\
519 	mutex_exit(&pm_compcnt_lock);					\
520 }
521 #define	PM_DECR_NOTLOWEST(dip) {					\
522 	mutex_enter(&pm_compcnt_lock);					\
523 	if (!PM_IS_NEXUS(dip) ||					\
524 	    (DEVI(dip)->devi_pm_flags & (PMC_DEV_THRESH|PMC_COMP_THRESH))) {\
525 		ASSERT(pm_comps_notlowest);				\
526 		pm_comps_notlowest--;					\
527 		PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr notlowest to "	\
528 			    "%d\n", pmf, PM_DEVICE(dip), pm_comps_notlowest))\
529 		if (pm_comps_notlowest == 0)				\
530 			pm_ppm_notify_all_lowest(dip, PM_ALL_LOWEST);	\
531 	}								\
532 	mutex_exit(&pm_compcnt_lock);					\
533 }
534 
535 /*
536  * console frame-buffer power-management is not enabled when
537  * debugging services are present.  to override, set pm_cfb_override
538  * to non-zero.
539  */
540 uint_t pm_cfb_comps_off = 0;	/* PM_LEVEL_UNKNOWN is considered on */
541 kmutex_t pm_cfb_lock;
542 int pm_cfb_enabled = 1;		/* non-zero allows pm of console frame buffer */
543 #ifdef DEBUG
544 int pm_cfb_override = 1;	/* non-zero allows pm of cfb with debuggers */
545 #else
546 int pm_cfb_override = 0;	/* non-zero allows pm of cfb with debuggers */
547 #endif
548 
549 static dev_info_t *cfb_dip = 0;
550 static dev_info_t *cfb_dip_detaching = 0;
551 uint_t cfb_inuse = 0;
552 static ddi_softintr_t pm_soft_id;
553 static boolean_t pm_soft_pending;
554 int	pm_scans_disabled = 0;
555 
556 /*
557  * A structure to record the fact that one thread has borrowed a lock held
558  * by another thread.  The context requires that the lender block on the
559  * completion of the borrower.
560  */
561 typedef struct lock_loan {
562 	struct lock_loan	*pmlk_next;
563 	kthread_t		*pmlk_borrower;
564 	kthread_t		*pmlk_lender;
565 	dev_info_t		*pmlk_dip;
566 } lock_loan_t;
567 static lock_loan_t lock_loan_head;	/* list head is a dummy element */
568 
569 #ifdef	DEBUG
570 #ifdef	PMDDEBUG
571 #define	PMD_FUNC(func, name)	char *(func) = (name);
572 #else	/* !PMDDEBUG */
573 #define	PMD_FUNC(func, name)
574 #endif	/* PMDDEBUG */
575 #else	/* !DEBUG */
576 #define	PMD_FUNC(func, name)
577 #endif	/* DEBUG */
578 
579 
580 /*
581  * Must be called before first device (including pseudo) attach
582  */
583 void
584 pm_init_locks(void)
585 {
586 	mutex_init(&pm_scan_lock, NULL, MUTEX_DRIVER, NULL);
587 	mutex_init(&pm_rsvp_lock, NULL, MUTEX_DRIVER, NULL);
588 	mutex_init(&pm_compcnt_lock, NULL, MUTEX_DRIVER, NULL);
589 	mutex_init(&pm_dep_thread_lock, NULL, MUTEX_DRIVER, NULL);
590 	mutex_init(&pm_remdrv_lock, NULL, MUTEX_DRIVER, NULL);
591 	mutex_init(&pm_loan_lock, NULL, MUTEX_DRIVER, NULL);
592 	rw_init(&pm_thresh_rwlock, NULL, RW_DEFAULT, NULL);
593 	rw_init(&pm_noinvol_rwlock, NULL, RW_DEFAULT, NULL);
594 	cv_init(&pm_dep_thread_cv, NULL, CV_DEFAULT, NULL);
595 }
596 
597 static int pm_reset_timestamps(dev_info_t *, void *);
598 
599 static boolean_t
600 pm_cpr_callb(void *arg, int code)
601 {
602 	_NOTE(ARGUNUSED(arg))
603 	static int auto_save;
604 	static pm_cpupm_t cpupm_save;
605 
606 	switch (code) {
607 	case CB_CODE_CPR_CHKPT:
608 		/*
609 		 * Cancel scan or wait for scan in progress to finish
610 		 * Other threads may be trying to restart the scan, so we
611 		 * have to keep at it unil it sticks
612 		 */
613 		mutex_enter(&pm_scan_lock);
614 		ASSERT(!pm_scans_disabled);
615 		pm_scans_disabled = 1;
616 		auto_save = autopm_enabled;
617 		autopm_enabled = 0;
618 		cpupm_save = cpupm;
619 		cpupm = PM_CPUPM_NOTSET;
620 		mutex_exit(&pm_scan_lock);
621 		ddi_walk_devs(ddi_root_node(), pm_scan_stop_walk, NULL);
622 		break;
623 
624 	case CB_CODE_CPR_RESUME:
625 		ASSERT(!autopm_enabled);
626 		ASSERT(cpupm == PM_CPUPM_NOTSET);
627 		ASSERT(pm_scans_disabled);
628 		pm_scans_disabled = 0;
629 		/*
630 		 * Call pm_reset_timestamps to reset timestamps of each
631 		 * device to the time when the system is resumed so that their
632 		 * idleness can be re-calculated. That's to avoid devices from
633 		 * being powered down right after resume if the system was in
634 		 * suspended mode long enough.
635 		 */
636 		ddi_walk_devs(ddi_root_node(), pm_reset_timestamps, NULL);
637 
638 		autopm_enabled = auto_save;
639 		cpupm = cpupm_save;
640 		/*
641 		 * If there is any auto-pm device, get the scanning
642 		 * going. Otherwise don't bother.
643 		 */
644 		ddi_walk_devs(ddi_root_node(), pm_rescan_walk, NULL);
645 		break;
646 	}
647 	return (B_TRUE);
648 }
649 
650 /*
651  * This callback routine is called when there is a system panic.  This function
652  * exists for prototype matching.
653  */
654 static boolean_t
655 pm_panic_callb(void *arg, int code)
656 {
657 	_NOTE(ARGUNUSED(arg, code))
658 	void pm_cfb_check_and_powerup(void);
659 	PMD(PMD_CFB, ("pm_panic_callb\n"))
660 	pm_cfb_check_and_powerup();
661 	return (B_TRUE);
662 }
663 
664 static boolean_t
665 pm_halt_callb(void *arg, int code)
666 {
667 	_NOTE(ARGUNUSED(arg, code))
668 	return (B_TRUE);
669 }
670 
671 static void pm_dep_thread(void);
672 
673 /*
674  * This needs to be called after the root and platform drivers are loaded
675  * and be single-threaded with respect to driver attach/detach
676  */
677 void
678 pm_init(void)
679 {
680 	PMD_FUNC(pmf, "pm_init")
681 	char **mod;
682 	extern pri_t minclsyspri;
683 
684 	pm_comps_notlowest = 0;
685 	pm_system_idle_threshold = pm_default_idle_threshold;
686 	pm_cpu_idle_threshold = 0;
687 
688 	pm_cpr_cb_id = callb_add(pm_cpr_callb, (void *)NULL,
689 	    CB_CL_CPR_PM, "pm_cpr");
690 	pm_panic_cb_id = callb_add(pm_panic_callb, (void *)NULL,
691 	    CB_CL_PANIC, "pm_panic");
692 	pm_halt_cb_id = callb_add(pm_halt_callb, (void *)NULL,
693 	    CB_CL_HALT, "pm_halt");
694 
695 	/*
696 	 * Create a thread to do dependency processing.
697 	 */
698 	(void) thread_create(NULL, 0, (void (*)())pm_dep_thread, NULL, 0, &p0,
699 	    TS_RUN, minclsyspri);
700 
701 	/*
702 	 * loadrootmodules already loaded these ppm drivers, now get them
703 	 * attached so they can claim the root drivers as they attach
704 	 */
705 	for (mod = platform_module_list; *mod; mod++) {
706 		if (i_ddi_attach_hw_nodes(*mod) != DDI_SUCCESS) {
707 			cmn_err(CE_WARN, "!cannot load platform pm driver %s\n",
708 			    *mod);
709 		} else {
710 			PMD(PMD_DHR, ("%s: %s (%s)\n", pmf, *mod,
711 			    ddi_major_to_name(ddi_name_to_major(*mod))))
712 		}
713 	}
714 }
715 
716 /*
717  * pm_scan_init - create pm scan data structure.  Called (if autopm or cpupm
718  * enabled) when device becomes power managed or after a failed detach and
719  * when autopm is started via PM_START_PM or PM_START_CPUPM ioctls, and after
720  * a CPR resume to get all the devices scanning again.
721  */
722 void
723 pm_scan_init(dev_info_t *dip)
724 {
725 	PMD_FUNC(pmf, "scan_init")
726 	pm_scan_t	*scanp;
727 
728 	ASSERT(!PM_ISBC(dip));
729 
730 	PM_LOCK_DIP(dip);
731 	scanp = PM_GET_PM_SCAN(dip);
732 	if (!scanp) {
733 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): create scan data\n",
734 		    pmf, PM_DEVICE(dip)))
735 		scanp =  kmem_zalloc(sizeof (pm_scan_t), KM_SLEEP);
736 		DEVI(dip)->devi_pm_scan = scanp;
737 	} else if (scanp->ps_scan_flags & PM_SCAN_STOP) {
738 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): "
739 		    "clear PM_SCAN_STOP flag\n", pmf, PM_DEVICE(dip)))
740 		scanp->ps_scan_flags &= ~PM_SCAN_STOP;
741 	}
742 	PM_UNLOCK_DIP(dip);
743 }
744 
745 /*
746  * pm_scan_fini - remove pm scan data structure when stopping pm on the device
747  */
748 void
749 pm_scan_fini(dev_info_t *dip)
750 {
751 	PMD_FUNC(pmf, "scan_fini")
752 	pm_scan_t	*scanp;
753 
754 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
755 	ASSERT(!PM_ISBC(dip));
756 	PM_LOCK_DIP(dip);
757 	scanp = PM_GET_PM_SCAN(dip);
758 	if (!scanp) {
759 		PM_UNLOCK_DIP(dip);
760 		return;
761 	}
762 
763 	ASSERT(!scanp->ps_scan_id && !(scanp->ps_scan_flags &
764 	    (PM_SCANNING | PM_SCAN_DISPATCHED | PM_SCAN_AGAIN)));
765 
766 	kmem_free(scanp, sizeof (pm_scan_t));
767 	DEVI(dip)->devi_pm_scan = NULL;
768 	PM_UNLOCK_DIP(dip);
769 }
770 
771 /*
772  * Given a pointer to a component struct, return the current power level
773  * (struct contains index unless it is a continuous level).
774  * Located here in hopes of getting both this and dev_is_needed into the
775  * cache together
776  */
777 static int
778 cur_power(pm_component_t *cp)
779 {
780 	if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN)
781 		return (cp->pmc_cur_pwr);
782 
783 	return (cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr]);
784 }
785 
786 static char *
787 pm_decode_direction(int direction)
788 {
789 	switch (direction) {
790 	case PM_LEVEL_UPONLY:
791 		return ("up");
792 
793 	case PM_LEVEL_EXACT:
794 		return ("exact");
795 
796 	case PM_LEVEL_DOWNONLY:
797 		return ("down");
798 
799 	default:
800 		return ("INVALID DIRECTION");
801 	}
802 }
803 
804 char *
805 pm_decode_op(pm_bus_power_op_t op)
806 {
807 	switch (op) {
808 	case BUS_POWER_CHILD_PWRCHG:
809 		return ("CHILD_PWRCHG");
810 	case BUS_POWER_NEXUS_PWRUP:
811 		return ("NEXUS_PWRUP");
812 	case BUS_POWER_PRE_NOTIFICATION:
813 		return ("PRE_NOTIFICATION");
814 	case BUS_POWER_POST_NOTIFICATION:
815 		return ("POST_NOTIFICATION");
816 	case BUS_POWER_HAS_CHANGED:
817 		return ("HAS_CHANGED");
818 	case BUS_POWER_NOINVOL:
819 		return ("NOINVOL");
820 	default:
821 		return ("UNKNOWN OP");
822 	}
823 }
824 
825 /*
826  * Returns true if level is a possible (valid) power level for component
827  */
828 int
829 e_pm_valid_power(dev_info_t *dip, int cmpt, int level)
830 {
831 	PMD_FUNC(pmf, "e_pm_valid_power")
832 	pm_component_t *cp = PM_CP(dip, cmpt);
833 	int i;
834 	int *ip = cp->pmc_comp.pmc_lvals;
835 	int limit = cp->pmc_comp.pmc_numlevels;
836 
837 	if (level < 0)
838 		return (0);
839 	for (i = 0; i < limit; i++) {
840 		if (level == *ip++)
841 			return (1);
842 	}
843 #ifdef DEBUG
844 	if (pm_debug & PMD_FAIL) {
845 		ip = cp->pmc_comp.pmc_lvals;
846 
847 		for (i = 0; i < limit; i++)
848 			PMD(PMD_FAIL, ("%s: index=%d, level=%d\n",
849 			    pmf, i, *ip++))
850 	}
851 #endif
852 	return (0);
853 }
854 
855 static int pm_start(dev_info_t *dip);
856 /*
857  * Returns true if device is pm'd (after calling pm_start if need be)
858  */
859 int
860 e_pm_valid_info(dev_info_t *dip, pm_info_t **infop)
861 {
862 	pm_info_t *info;
863 
864 	/*
865 	 * Check if the device is power managed if not.
866 	 * To make the common case (device is power managed already)
867 	 * fast, we check without the lock.  If device is not already
868 	 * power managed, then we take the lock and the long route through
869 	 * go get it managed.  Devices never go unmanaged until they
870 	 * detach.
871 	 */
872 	info = PM_GET_PM_INFO(dip);
873 	if (!info) {
874 		if (!DEVI_IS_ATTACHING(dip)) {
875 			return (0);
876 		}
877 		if (pm_start(dip) != DDI_SUCCESS) {
878 			return (0);
879 		}
880 		info = PM_GET_PM_INFO(dip);
881 	}
882 	ASSERT(info);
883 	if (infop != NULL)
884 		*infop = info;
885 	return (1);
886 }
887 
888 int
889 e_pm_valid_comp(dev_info_t *dip, int cmpt, pm_component_t **cpp)
890 {
891 	if (cmpt >= 0 && cmpt < PM_NUMCMPTS(dip)) {
892 		if (cpp != NULL)
893 			*cpp = PM_CP(dip, cmpt);
894 		return (1);
895 	} else {
896 		return (0);
897 	}
898 }
899 
900 /*
901  * Internal guts of ddi_dev_is_needed and pm_raise/lower_power
902  */
903 static int
904 dev_is_needed(dev_info_t *dip, int cmpt, int level, int direction)
905 {
906 	PMD_FUNC(pmf, "din")
907 	pm_component_t *cp;
908 	char *pathbuf;
909 	int result;
910 
911 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY);
912 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp) ||
913 	    !e_pm_valid_power(dip, cmpt, level))
914 		return (DDI_FAILURE);
915 
916 	PMD(PMD_DIN, ("%s: %s@%s(%s#%d) cmpt=%d, dir=%s, new=%d, cur=%d\n",
917 	    pmf, PM_DEVICE(dip), cmpt, pm_decode_direction(direction),
918 	    level, cur_power(cp)))
919 
920 	if (pm_set_power(dip, cmpt, level,  direction,
921 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
922 		if (direction == PM_LEVEL_UPONLY) {
923 			pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
924 			(void) ddi_pathname(dip, pathbuf);
925 			cmn_err(CE_WARN, "Device %s failed to power up.",
926 			    pathbuf);
927 			kmem_free(pathbuf, MAXPATHLEN);
928 		}
929 		PMD(PMD_DIN | PMD_FAIL, ("%s: %s@%s(%s#%d) [%d] %s->%d failed, "
930 		    "errno %d\n", pmf, PM_DEVICE(dip), cmpt,
931 		    pm_decode_direction(direction), level, result))
932 		return (DDI_FAILURE);
933 	}
934 
935 	PMD(PMD_RESCAN | PMD_DIN, ("%s: pm_rescan %s@%s(%s#%d)\n", pmf,
936 	    PM_DEVICE(dip)))
937 	pm_rescan(dip);
938 	return (DDI_SUCCESS);
939 }
940 
941 /*
942  * We can get multiple pm_rescan() threads, if one of them discovers
943  * that no scan is running at the moment, it kicks it into action.
944  * Otherwise, it tells the current scanning thread to scan again when
945  * it is done by asserting the PM_SCAN_AGAIN flag. The PM_SCANNING and
946  * PM_SCAN_AGAIN flags are used to regulate scan, to make sure only one
947  * thread at a time runs the pm_scan_dev() code.
948  */
949 void
950 pm_rescan(void *arg)
951 {
952 	PMD_FUNC(pmf, "rescan")
953 	dev_info_t	*dip = (dev_info_t *)arg;
954 	pm_info_t	*info;
955 	pm_scan_t	*scanp;
956 	timeout_id_t	scanid;
957 
958 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
959 	PM_LOCK_DIP(dip);
960 	info = PM_GET_PM_INFO(dip);
961 	scanp = PM_GET_PM_SCAN(dip);
962 	if (pm_scans_disabled || !PM_SCANABLE(dip) || !info || !scanp ||
963 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
964 		PM_UNLOCK_DIP(dip);
965 		return;
966 	}
967 	if (scanp->ps_scan_flags & PM_SCANNING) {
968 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
969 		PM_UNLOCK_DIP(dip);
970 		return;
971 	} else if (scanp->ps_scan_id) {
972 		scanid = scanp->ps_scan_id;
973 		scanp->ps_scan_id = 0;
974 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): cancel timeout scanid %lx\n",
975 		    pmf, PM_DEVICE(dip), (ulong_t)scanid))
976 		PM_UNLOCK_DIP(dip);
977 		(void) untimeout(scanid);
978 		PM_LOCK_DIP(dip);
979 	}
980 
981 	/*
982 	 * Dispatching pm_scan during attach time is risky due to the fact that
983 	 * attach might soon fail and dip dissolved, and panic may happen while
984 	 * attempting to stop scan. So schedule a pm_rescan instead.
985 	 * (Note that if either of the first two terms are true, taskq_dispatch
986 	 * will not be invoked).
987 	 *
988 	 * Multiple pm_scan dispatching is unecessary and costly to keep track
989 	 * of. The PM_SCAN_DISPATCHED flag is used between pm_rescan and pm_scan
990 	 * to regulate the dispatching.
991 	 *
992 	 * Scan is stopped before the device is detached (in pm_detaching())
993 	 * but it may get re-started during the post_detach processing if the
994 	 * driver fails to detach.
995 	 */
996 	if (DEVI_IS_ATTACHING(dip) ||
997 	    (scanp->ps_scan_flags & PM_SCAN_DISPATCHED) ||
998 	    taskq_dispatch(system_taskq, pm_scan, (void *)dip, TQ_NOSLEEP) ==
999 	    TASKQID_INVALID) {
1000 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): attaching, pm_scan already "
1001 		    "dispatched or dispatching failed\n", pmf, PM_DEVICE(dip)))
1002 		if (scanp->ps_scan_id) {
1003 			scanid = scanp->ps_scan_id;
1004 			scanp->ps_scan_id = 0;
1005 			PM_UNLOCK_DIP(dip);
1006 			(void) untimeout(scanid);
1007 			PM_LOCK_DIP(dip);
1008 			if (scanp->ps_scan_id) {
1009 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): a competing "
1010 				    "thread scheduled pm_rescan, scanid %lx\n",
1011 				    pmf, PM_DEVICE(dip),
1012 				    (ulong_t)scanp->ps_scan_id))
1013 				PM_UNLOCK_DIP(dip);
1014 				return;
1015 			}
1016 		}
1017 		scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1018 		    (scanp->ps_idle_down ? pm_id_ticks :
1019 		    (PM_MIN_SCAN(dip) * hz)));
1020 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): scheduled next pm_rescan, "
1021 		    "scanid %lx\n", pmf, PM_DEVICE(dip),
1022 		    (ulong_t)scanp->ps_scan_id))
1023 	} else {
1024 		PMD(PMD_SCAN, ("%s: dispatched pm_scan for %s@%s(%s#%d)\n",
1025 		    pmf, PM_DEVICE(dip)))
1026 		scanp->ps_scan_flags |= PM_SCAN_DISPATCHED;
1027 	}
1028 	PM_UNLOCK_DIP(dip);
1029 }
1030 
1031 void
1032 pm_scan(void *arg)
1033 {
1034 	PMD_FUNC(pmf, "scan")
1035 	dev_info_t	*dip = (dev_info_t *)arg;
1036 	pm_scan_t	*scanp;
1037 	time_t		nextscan;
1038 
1039 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
1040 
1041 	PM_LOCK_DIP(dip);
1042 	scanp = PM_GET_PM_SCAN(dip);
1043 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1044 
1045 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1046 	    (scanp->ps_scan_flags & PM_SCAN_STOP)) {
1047 		scanp->ps_scan_flags &= ~(PM_SCAN_AGAIN | PM_SCAN_DISPATCHED);
1048 		PM_UNLOCK_DIP(dip);
1049 		return;
1050 	}
1051 
1052 	if (scanp->ps_idle_down) {
1053 		/*
1054 		 * make sure we remember idledown was in affect until
1055 		 * we've completed the scan
1056 		 */
1057 		PMID_SET_SCANS(scanp->ps_idle_down)
1058 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown starts "
1059 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1060 	}
1061 
1062 	/* possible having two threads running pm_scan() */
1063 	if (scanp->ps_scan_flags & PM_SCANNING) {
1064 		scanp->ps_scan_flags |= PM_SCAN_AGAIN;
1065 		PMD(PMD_SCAN, ("%s: scanning, will scan %s@%s(%s#%d) again\n",
1066 		    pmf, PM_DEVICE(dip)))
1067 		scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1068 		PM_UNLOCK_DIP(dip);
1069 		return;
1070 	}
1071 
1072 	scanp->ps_scan_flags |= PM_SCANNING;
1073 	scanp->ps_scan_flags &= ~PM_SCAN_DISPATCHED;
1074 	do {
1075 		scanp->ps_scan_flags &= ~PM_SCAN_AGAIN;
1076 		PM_UNLOCK_DIP(dip);
1077 		nextscan = pm_scan_dev(dip);
1078 		PM_LOCK_DIP(dip);
1079 	} while (scanp->ps_scan_flags & PM_SCAN_AGAIN);
1080 
1081 	ASSERT(scanp->ps_scan_flags & PM_SCANNING);
1082 	scanp->ps_scan_flags &= ~PM_SCANNING;
1083 
1084 	if (scanp->ps_idle_down) {
1085 		scanp->ps_idle_down &= ~PMID_SCANS;
1086 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d): idledown ends "
1087 		    "(pmid %x)\n", pmf, PM_DEVICE(dip), scanp->ps_idle_down))
1088 	}
1089 
1090 	/* schedule for next idle check */
1091 	if (nextscan != LONG_MAX) {
1092 		if (nextscan > (LONG_MAX / hz))
1093 			nextscan = (LONG_MAX - 1) / hz;
1094 		if (scanp->ps_scan_id) {
1095 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): while scanning "
1096 			    "another rescan scheduled scanid(%lx)\n", pmf,
1097 			    PM_DEVICE(dip), (ulong_t)scanp->ps_scan_id))
1098 			PM_UNLOCK_DIP(dip);
1099 			return;
1100 		} else if (!(scanp->ps_scan_flags & PM_SCAN_STOP)) {
1101 			scanp->ps_scan_id = timeout(pm_rescan, (void *)dip,
1102 			    (clock_t)(nextscan * hz));
1103 			PMD(PMD_SCAN, ("%s: nextscan for %s@%s(%s#%d) in "
1104 			    "%lx sec, scanid(%lx) \n", pmf, PM_DEVICE(dip),
1105 			    (ulong_t)nextscan, (ulong_t)scanp->ps_scan_id))
1106 		}
1107 	}
1108 	PM_UNLOCK_DIP(dip);
1109 }
1110 
1111 void
1112 pm_get_timestamps(dev_info_t *dip, time_t *valuep)
1113 {
1114 	int components = PM_NUMCMPTS(dip);
1115 	int i;
1116 
1117 	ASSERT(components > 0);
1118 	PM_LOCK_BUSY(dip);	/* so we get a consistent view */
1119 	for (i = 0; i < components; i++) {
1120 		valuep[i] = PM_CP(dip, i)->pmc_timestamp;
1121 	}
1122 	PM_UNLOCK_BUSY(dip);
1123 }
1124 
1125 /*
1126  * Returns true if device needs to be kept up because it exported the
1127  * "no-involuntary-power-cycles" property or we're pretending it did (console
1128  * fb case) or it is an ancestor of such a device and has used up the "one
1129  * free cycle" allowed when all such leaf nodes have voluntarily powered down
1130  * upon detach
1131  */
1132 int
1133 pm_noinvol(dev_info_t *dip)
1134 {
1135 	PMD_FUNC(pmf, "noinvol")
1136 
1137 	/*
1138 	 * This doesn't change over the life of a driver, so no locking needed
1139 	 */
1140 	if (PM_IS_CFB(dip)) {
1141 		PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB %s@%s(%s#%d)\n",
1142 		    pmf, PM_DEVICE(dip)))
1143 		return (1);
1144 	}
1145 	/*
1146 	 * Not an issue if no such kids
1147 	 */
1148 	if (DEVI(dip)->devi_pm_noinvolpm == 0) {
1149 #ifdef DEBUG
1150 		if (DEVI(dip)->devi_pm_volpmd != 0) {
1151 			dev_info_t *pdip = dip;
1152 			do {
1153 				PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d) noinvol %d "
1154 				    "volpmd %d\n", pmf, PM_DEVICE(pdip),
1155 				    DEVI(pdip)->devi_pm_noinvolpm,
1156 				    DEVI(pdip)->devi_pm_volpmd))
1157 				pdip = ddi_get_parent(pdip);
1158 			} while (pdip);
1159 		}
1160 #endif
1161 		ASSERT(DEVI(dip)->devi_pm_volpmd == 0);
1162 		return (0);
1163 	}
1164 
1165 	/*
1166 	 * Since we now maintain the counts correct at every node, we no longer
1167 	 * need to look up the tree.  An ancestor cannot use up the free cycle
1168 	 * without the children getting their counts adjusted.
1169 	 */
1170 
1171 #ifdef	DEBUG
1172 	if (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd)
1173 		PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s@%s(%s#%d)\n", pmf,
1174 		    DEVI(dip)->devi_pm_noinvolpm, DEVI(dip)->devi_pm_volpmd,
1175 		    PM_DEVICE(dip)))
1176 #endif
1177 	return (DEVI(dip)->devi_pm_noinvolpm != DEVI(dip)->devi_pm_volpmd);
1178 }
1179 
1180 static int	cur_threshold(dev_info_t *, int);
1181 static int	pm_next_lower_power(pm_component_t *, int);
1182 
1183 /*
1184  * This function performs the actual scanning of the device.
1185  * It attempts to power off the indicated device's components if they have
1186  * been idle and other restrictions are met.
1187  * pm_scan_dev calculates and returns when the next scan should happen for
1188  * this device.
1189  */
1190 time_t
1191 pm_scan_dev(dev_info_t *dip)
1192 {
1193 	PMD_FUNC(pmf, "scan_dev")
1194 	pm_scan_t	*scanp;
1195 	time_t		*timestamp, idletime, now, thresh;
1196 	time_t		timeleft = 0;
1197 #ifdef PMDDEBUG
1198 	int		curpwr;
1199 #endif
1200 	int		i, nxtpwr, pwrndx, unused;
1201 	size_t		size;
1202 	pm_component_t	 *cp;
1203 	dev_info_t	*pdip = ddi_get_parent(dip);
1204 	int		circ;
1205 	clock_t		min_scan = pm_default_min_scan;
1206 
1207 	/*
1208 	 * skip attaching device
1209 	 */
1210 	if (DEVI_IS_ATTACHING(dip)) {
1211 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) is attaching, timeleft(%lx)\n",
1212 		    pmf, PM_DEVICE(dip), min_scan))
1213 		return (min_scan);
1214 	}
1215 
1216 	PM_LOCK_DIP(dip);
1217 	scanp = PM_GET_PM_SCAN(dip);
1218 	min_scan = PM_MIN_SCAN(dip);
1219 	ASSERT(scanp && PM_GET_PM_INFO(dip));
1220 
1221 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1222 	PMD(PMD_SCAN, ("%s: %s@%s(%s#%d): kuc is %d\n", pmf, PM_DEVICE(dip),
1223 	    PM_KUC(dip)))
1224 
1225 	/* no scan under the following conditions */
1226 	if (pm_scans_disabled || !PM_SCANABLE(dip) ||
1227 	    (scanp->ps_scan_flags & PM_SCAN_STOP) ||
1228 	    (PM_KUC(dip) != 0) ||
1229 	    PM_ISDIRECT(dip) || pm_noinvol(dip)) {
1230 		PM_UNLOCK_DIP(dip);
1231 		PMD(PMD_SCAN, ("%s: [END, %s@%s(%s#%d)] no scan, "
1232 		    "scan_disabled(%d), apm_enabled(%d), cpupm(%d), "
1233 		    "kuc(%d), %s directpm, %s pm_noinvol\n",
1234 		    pmf, PM_DEVICE(dip), pm_scans_disabled, autopm_enabled,
1235 		    cpupm, PM_KUC(dip),
1236 		    PM_ISDIRECT(dip) ? "is" : "is not",
1237 		    pm_noinvol(dip) ? "is" : "is not"))
1238 		return (LONG_MAX);
1239 	}
1240 	PM_UNLOCK_DIP(dip);
1241 
1242 	if (!ndi_devi_tryenter(pdip, &circ)) {
1243 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) can't hold pdip",
1244 		    pmf, PM_DEVICE(pdip)))
1245 		return ((time_t)1);
1246 	}
1247 	now = gethrestime_sec();
1248 	size = PM_NUMCMPTS(dip) * sizeof (time_t);
1249 	timestamp = kmem_alloc(size, KM_SLEEP);
1250 	pm_get_timestamps(dip, timestamp);
1251 
1252 	/*
1253 	 * Since we removed support for backwards compatible devices,
1254 	 * (see big comment at top of file)
1255 	 * it is no longer required to deal with component 0 last.
1256 	 */
1257 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
1258 		/*
1259 		 * If already off (an optimization, perhaps)
1260 		 */
1261 		cp = PM_CP(dip, i);
1262 		pwrndx = cp->pmc_cur_pwr;
1263 #ifdef PMDDEBUG
1264 		curpwr = (pwrndx == PM_LEVEL_UNKNOWN) ?
1265 		    PM_LEVEL_UNKNOWN :
1266 		    cp->pmc_comp.pmc_lvals[pwrndx];
1267 #endif
1268 
1269 		if (pwrndx == 0) {
1270 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d off or "
1271 			    "lowest\n", pmf, PM_DEVICE(dip), i))
1272 			/* skip device if off or at its lowest */
1273 			continue;
1274 		}
1275 
1276 		thresh = cur_threshold(dip, i);		/* comp i threshold */
1277 		if ((timestamp[i] == 0) || (cp->pmc_busycount > 0)) {
1278 			/* were busy or newly became busy by another thread */
1279 			if (timeleft == 0)
1280 				timeleft = max(thresh, min_scan);
1281 			else
1282 				timeleft = min(
1283 				    timeleft, max(thresh, min_scan));
1284 			continue;
1285 		}
1286 
1287 		idletime = now - timestamp[i];		/* idle time */
1288 		PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d idle time %lx\n",
1289 		    pmf, PM_DEVICE(dip), i, idletime))
1290 		if (idletime >= thresh || PM_IS_PID(dip)) {
1291 			nxtpwr = pm_next_lower_power(cp, pwrndx);
1292 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, %d->%d\n",
1293 			    pmf, PM_DEVICE(dip), i, curpwr, nxtpwr))
1294 			if (pm_set_power(dip, i, nxtpwr, PM_LEVEL_DOWNONLY,
1295 			    PM_CANBLOCK_FAIL, 1, &unused) != DDI_SUCCESS &&
1296 			    PM_CURPOWER(dip, i) != nxtpwr) {
1297 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1298 				    "%d->%d Failed\n", pmf, PM_DEVICE(dip),
1299 				    i, curpwr, nxtpwr))
1300 				timeleft = min_scan;
1301 				continue;
1302 			} else {
1303 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1304 				    "%d->%d, GOOD curpwr %d\n", pmf,
1305 				    PM_DEVICE(dip), i, curpwr, nxtpwr,
1306 				    cur_power(cp)))
1307 
1308 				if (nxtpwr == 0)	/* component went off */
1309 					continue;
1310 
1311 				/*
1312 				 * scan to next lower level
1313 				 */
1314 				if (timeleft == 0)
1315 					timeleft = max(
1316 					    1, cur_threshold(dip, i));
1317 				else
1318 					timeleft = min(timeleft,
1319 					    max(1, cur_threshold(dip, i)));
1320 				PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, "
1321 				    "timeleft(%lx)\n", pmf, PM_DEVICE(dip),
1322 				    i, timeleft))
1323 			}
1324 		} else {	/* comp not idle long enough */
1325 			if (timeleft == 0)
1326 				timeleft = thresh - idletime;
1327 			else
1328 				timeleft = min(timeleft, (thresh - idletime));
1329 			PMD(PMD_SCAN, ("%s: %s@%s(%s#%d) comp %d, timeleft="
1330 			    "%lx\n", pmf, PM_DEVICE(dip), i, timeleft))
1331 		}
1332 	}
1333 	ndi_devi_exit(pdip, circ);
1334 	kmem_free(timestamp, size);
1335 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] timeleft(%lx)\n", pmf,
1336 	    PM_DEVICE(dip), timeleft))
1337 
1338 	/*
1339 	 * if components are already at lowest level, timeleft is left 0
1340 	 */
1341 	return ((timeleft == 0) ? LONG_MAX : timeleft);
1342 }
1343 
1344 /*
1345  * pm_scan_stop - cancel scheduled pm_rescan,
1346  *                wait for termination of dispatched pm_scan thread
1347  *                     and active pm_scan_dev thread.
1348  */
1349 void
1350 pm_scan_stop(dev_info_t *dip)
1351 {
1352 	PMD_FUNC(pmf, "scan_stop")
1353 	pm_scan_t	*scanp;
1354 	timeout_id_t	scanid;
1355 
1356 	PMD(PMD_SCAN, ("%s: [BEGIN %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1357 	PM_LOCK_DIP(dip);
1358 	scanp = PM_GET_PM_SCAN(dip);
1359 	if (!scanp) {
1360 		PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)] scan not initialized\n",
1361 		    pmf, PM_DEVICE(dip)))
1362 		PM_UNLOCK_DIP(dip);
1363 		return;
1364 	}
1365 	scanp->ps_scan_flags |= PM_SCAN_STOP;
1366 
1367 	/* cancel scheduled scan taskq */
1368 	while (scanp->ps_scan_id) {
1369 		scanid = scanp->ps_scan_id;
1370 		scanp->ps_scan_id = 0;
1371 		PM_UNLOCK_DIP(dip);
1372 		(void) untimeout(scanid);
1373 		PM_LOCK_DIP(dip);
1374 	}
1375 
1376 	while (scanp->ps_scan_flags & (PM_SCANNING | PM_SCAN_DISPATCHED)) {
1377 		PM_UNLOCK_DIP(dip);
1378 		delay(1);
1379 		PM_LOCK_DIP(dip);
1380 	}
1381 	PM_UNLOCK_DIP(dip);
1382 	PMD(PMD_SCAN, ("%s: [END %s@%s(%s#%d)]\n", pmf, PM_DEVICE(dip)))
1383 }
1384 
1385 int
1386 pm_scan_stop_walk(dev_info_t *dip, void *arg)
1387 {
1388 	_NOTE(ARGUNUSED(arg))
1389 
1390 	if (!PM_GET_PM_SCAN(dip))
1391 		return (DDI_WALK_CONTINUE);
1392 	ASSERT(!PM_ISBC(dip));
1393 	pm_scan_stop(dip);
1394 	return (DDI_WALK_CONTINUE);
1395 }
1396 
1397 /*
1398  * Converts a power level value to its index
1399  */
1400 static int
1401 power_val_to_index(pm_component_t *cp, int val)
1402 {
1403 	int limit, i, *ip;
1404 
1405 	ASSERT(val != PM_LEVEL_UPONLY && val != PM_LEVEL_DOWNONLY &&
1406 	    val != PM_LEVEL_EXACT);
1407 	/*  convert power value into index (i) */
1408 	limit = cp->pmc_comp.pmc_numlevels;
1409 	ip = cp->pmc_comp.pmc_lvals;
1410 	for (i = 0; i < limit; i++)
1411 		if (val == *ip++)
1412 			return (i);
1413 	return (-1);
1414 }
1415 
1416 /*
1417  * Converts a numeric power level to a printable string
1418  */
1419 static char *
1420 power_val_to_string(pm_component_t *cp, int val)
1421 {
1422 	int index;
1423 
1424 	if (val == PM_LEVEL_UPONLY)
1425 		return ("<UPONLY>");
1426 
1427 	if (val == PM_LEVEL_UNKNOWN ||
1428 	    (index = power_val_to_index(cp, val)) == -1)
1429 		return ("<LEVEL_UNKNOWN>");
1430 
1431 	return (cp->pmc_comp.pmc_lnames[index]);
1432 }
1433 
1434 /*
1435  * Return true if this node has been claimed by a ppm.
1436  */
1437 static int
1438 pm_ppm_claimed(dev_info_t *dip)
1439 {
1440 	return (PPM(dip) != NULL);
1441 }
1442 
1443 /*
1444  * A node which was voluntarily power managed has just used up its "free cycle"
1445  * and need is volpmd field cleared, and the same done to all its descendents
1446  */
1447 static void
1448 pm_clear_volpm_dip(dev_info_t *dip)
1449 {
1450 	PMD_FUNC(pmf, "clear_volpm_dip")
1451 
1452 	if (dip == NULL)
1453 		return;
1454 	PMD(PMD_NOINVOL, ("%s: clear volpm from %s@%s(%s#%d)\n", pmf,
1455 	    PM_DEVICE(dip)))
1456 	DEVI(dip)->devi_pm_volpmd = 0;
1457 	for (dip = ddi_get_child(dip); dip; dip = ddi_get_next_sibling(dip)) {
1458 		pm_clear_volpm_dip(dip);
1459 	}
1460 }
1461 
1462 /*
1463  * A node which was voluntarily power managed has used up the "free cycles"
1464  * for the subtree that it is the root of.  Scan through the list of detached
1465  * nodes and adjust the counts of any that are descendents of the node.
1466  */
1467 static void
1468 pm_clear_volpm_list(dev_info_t *dip)
1469 {
1470 	PMD_FUNC(pmf, "clear_volpm_list")
1471 	char	*pathbuf;
1472 	size_t	len;
1473 	pm_noinvol_t *ip;
1474 
1475 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1476 	(void) ddi_pathname(dip, pathbuf);
1477 	len = strlen(pathbuf);
1478 	PMD(PMD_NOINVOL, ("%s: clear volpm list %s\n", pmf, pathbuf))
1479 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
1480 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
1481 		PMD(PMD_NOINVOL, ("%s: clear volpm: ni_path %s\n", pmf,
1482 		    ip->ni_path))
1483 		if (strncmp(pathbuf, ip->ni_path, len) == 0 &&
1484 		    ip->ni_path[len] == '/') {
1485 			PMD(PMD_NOINVOL, ("%s: clear volpm: %s\n", pmf,
1486 			    ip->ni_path))
1487 			ip->ni_volpmd = 0;
1488 			ip->ni_wasvolpmd = 0;
1489 		}
1490 	}
1491 	kmem_free(pathbuf, MAXPATHLEN);
1492 	rw_exit(&pm_noinvol_rwlock);
1493 }
1494 
1495 /*
1496  * Powers a device, suspending or resuming the driver if it is a backward
1497  * compatible device, calling into ppm to change power level.
1498  * Called with the component's power lock held.
1499  */
1500 static int
1501 power_dev(dev_info_t *dip, int comp, int level, int old_level,
1502     pm_canblock_t canblock, pm_ppm_devlist_t **devlist)
1503 {
1504 	PMD_FUNC(pmf, "power_dev")
1505 	power_req_t power_req;
1506 	int		power_op_ret;	/* DDI_SUCCESS or DDI_FAILURE */
1507 	int		resume_needed = 0;
1508 	int		suspended = 0;
1509 	int		result;
1510 #ifdef PMDDEBUG
1511 	struct pm_component *cp = PM_CP(dip, comp);
1512 #endif
1513 	int		bc = PM_ISBC(dip);
1514 	int pm_all_components_off(dev_info_t *);
1515 	int		clearvolpmd = 0;
1516 	char		pathbuf[MAXNAMELEN];
1517 #ifdef PMDDEBUG
1518 	char *ppmname, *ppmaddr;
1519 #endif
1520 	/*
1521 	 * If this is comp 0 of a backwards compat device and we are
1522 	 * going to take the power away, we need to detach it with
1523 	 * DDI_PM_SUSPEND command.
1524 	 */
1525 	if (bc && comp == 0 && POWERING_OFF(old_level, level)) {
1526 		if (devi_detach(dip, DDI_PM_SUSPEND) != DDI_SUCCESS) {
1527 			/* We could not suspend before turning cmpt zero off */
1528 			PMD(PMD_ERROR, ("%s: could not suspend %s@%s(%s#%d)\n",
1529 			    pmf, PM_DEVICE(dip)))
1530 			return (DDI_FAILURE);
1531 		} else {
1532 			DEVI(dip)->devi_pm_flags |= PMC_SUSPENDED;
1533 			suspended++;
1534 		}
1535 	}
1536 	power_req.request_type = PMR_PPM_SET_POWER;
1537 	power_req.req.ppm_set_power_req.who = dip;
1538 	power_req.req.ppm_set_power_req.cmpt = comp;
1539 	power_req.req.ppm_set_power_req.old_level = old_level;
1540 	power_req.req.ppm_set_power_req.new_level = level;
1541 	power_req.req.ppm_set_power_req.canblock = canblock;
1542 	power_req.req.ppm_set_power_req.cookie = NULL;
1543 #ifdef PMDDEBUG
1544 	if (pm_ppm_claimed(dip)) {
1545 		ppmname = PM_NAME(PPM(dip));
1546 		ppmaddr = PM_ADDR(PPM(dip));
1547 
1548 	} else {
1549 		ppmname = "noppm";
1550 		ppmaddr = "0";
1551 	}
1552 	PMD(PMD_PPM, ("%s: %s@%s(%s#%d):%s[%d] %s (%d) -> %s (%d) via %s@%s\n",
1553 	    pmf, PM_DEVICE(dip), cp->pmc_comp.pmc_name, comp,
1554 	    power_val_to_string(cp, old_level), old_level,
1555 	    power_val_to_string(cp, level), level, ppmname, ppmaddr))
1556 #endif
1557 	/*
1558 	 * If non-bc noinvolpm device is turning first comp on, or noinvolpm
1559 	 * bc device comp 0 is powering on, then we count it as a power cycle
1560 	 * against its voluntary count.
1561 	 */
1562 	if (DEVI(dip)->devi_pm_volpmd &&
1563 	    (!bc && pm_all_components_off(dip) && level != 0) ||
1564 	    (bc && comp == 0 && POWERING_ON(old_level, level)))
1565 		clearvolpmd = 1;
1566 	if ((power_op_ret = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
1567 	    &power_req, &result)) == DDI_SUCCESS) {
1568 		/*
1569 		 * Now do involuntary pm accounting;  If we've just cycled power
1570 		 * on a voluntarily pm'd node, and by inference on its entire
1571 		 * subtree, we need to set the subtree (including those nodes
1572 		 * already detached) volpmd counts to 0, and subtract out the
1573 		 * value of the current node's volpmd count from the ancestors
1574 		 */
1575 		if (clearvolpmd) {
1576 			int volpmd = DEVI(dip)->devi_pm_volpmd;
1577 			pm_clear_volpm_dip(dip);
1578 			pm_clear_volpm_list(dip);
1579 			if (volpmd) {
1580 				(void) ddi_pathname(dip, pathbuf);
1581 				(void) pm_noinvol_update(PM_BP_NOINVOL_POWER,
1582 				    volpmd, 0, pathbuf, dip);
1583 			}
1584 		}
1585 	} else {
1586 		PMD(PMD_FAIL, ("%s: can't set comp %d (%s) of %s@%s(%s#%d) "
1587 		    "to level %d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name,
1588 		    PM_DEVICE(dip), level, power_val_to_string(cp, level)))
1589 	}
1590 	/*
1591 	 * If some other devices were also powered up (e.g. other cpus in
1592 	 * the same domain) return a pointer to that list
1593 	 */
1594 	if (devlist) {
1595 		*devlist = (pm_ppm_devlist_t *)
1596 		    power_req.req.ppm_set_power_req.cookie;
1597 	}
1598 	/*
1599 	 * We will have to resume the device if the device is backwards compat
1600 	 * device and either of the following is true:
1601 	 * -This is comp 0 and we have successfully powered it up
1602 	 * -This is comp 0 and we have failed to power it down. Resume is
1603 	 *  needed because we have suspended it above
1604 	 */
1605 
1606 	if (bc && comp == 0) {
1607 		ASSERT(PM_ISDIRECT(dip) || DEVI_IS_DETACHING(dip));
1608 		if (power_op_ret == DDI_SUCCESS) {
1609 			if (POWERING_ON(old_level, level)) {
1610 				/*
1611 				 * It must be either suspended or resumed
1612 				 * via pm_power_has_changed path
1613 				 */
1614 				ASSERT((DEVI(dip)->devi_pm_flags &
1615 				    PMC_SUSPENDED) ||
1616 				    (PM_CP(dip, comp)->pmc_flags &
1617 				    PM_PHC_WHILE_SET_POWER));
1618 
1619 					resume_needed = suspended;
1620 			}
1621 		} else {
1622 			if (POWERING_OFF(old_level, level)) {
1623 				/*
1624 				 * It must be either suspended or resumed
1625 				 * via pm_power_has_changed path
1626 				 */
1627 				ASSERT((DEVI(dip)->devi_pm_flags &
1628 				    PMC_SUSPENDED) ||
1629 				    (PM_CP(dip, comp)->pmc_flags &
1630 				    PM_PHC_WHILE_SET_POWER));
1631 
1632 					resume_needed = suspended;
1633 			}
1634 		}
1635 	}
1636 	if (resume_needed) {
1637 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
1638 		/* ppm is not interested in DDI_PM_RESUME */
1639 		if ((power_op_ret = devi_attach(dip, DDI_PM_RESUME)) ==
1640 		    DDI_SUCCESS) {
1641 			DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
1642 		} else
1643 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s(%s#%d)",
1644 			    PM_DEVICE(dip));
1645 	}
1646 	return (power_op_ret);
1647 }
1648 
1649 /*
1650  * Return true if we are the owner or a borrower of the devi lock.  See
1651  * pm_lock_power_single() about borrowing the lock.
1652  */
1653 static int
1654 pm_devi_lock_held(dev_info_t *dip)
1655 {
1656 	lock_loan_t *cur;
1657 
1658 	if (DEVI_BUSY_OWNED(dip))
1659 		return (1);
1660 
1661 	/* return false if no locks borrowed */
1662 	if (lock_loan_head.pmlk_next == NULL)
1663 		return (0);
1664 
1665 	mutex_enter(&pm_loan_lock);
1666 	/* see if our thread is registered as a lock borrower. */
1667 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
1668 		if (cur->pmlk_borrower == curthread)
1669 			break;
1670 	mutex_exit(&pm_loan_lock);
1671 
1672 	return (cur != NULL && cur->pmlk_lender == DEVI(dip)->devi_busy_thread);
1673 }
1674 
1675 /*
1676  * pm_set_power: adjusts power level of device.	 Assumes device is power
1677  * manageable & component exists.
1678  *
1679  * Cases which require us to bring up devices we keep up ("wekeepups") for
1680  * backwards compatible devices:
1681  *	component 0 is off and we're bringing it up from 0
1682  *		bring up wekeepup first
1683  *	and recursively when component 0 is off and we bring some other
1684  *	component up from 0
1685  * For devices which are not backward compatible, our dependency notion is much
1686  * simpler.  Unless all components are off, then wekeeps must be on.
1687  * We don't treat component 0 differently.
1688  * Canblock tells how to deal with a direct pm'd device.
1689  * Scan arg tells us if we were called from scan, in which case we don't need
1690  * to go back to the root node and walk down to change power.
1691  */
1692 int
1693 pm_set_power(dev_info_t *dip, int comp, int level, int direction,
1694     pm_canblock_t canblock, int scan, int *retp)
1695 {
1696 	PMD_FUNC(pmf, "set_power")
1697 	char		*pathbuf;
1698 	pm_bp_child_pwrchg_t bpc;
1699 	pm_sp_misc_t	pspm;
1700 	int		ret = DDI_SUCCESS;
1701 	int		unused = DDI_SUCCESS;
1702 	dev_info_t	*pdip = ddi_get_parent(dip);
1703 
1704 #ifdef DEBUG
1705 	int		diverted = 0;
1706 
1707 	/*
1708 	 * This prevents operations on the console from calling prom_printf and
1709 	 * either deadlocking or bringing up the console because of debug
1710 	 * output
1711 	 */
1712 	if (dip == cfb_dip) {
1713 		diverted++;
1714 		mutex_enter(&pm_debug_lock);
1715 		pm_divertdebug++;
1716 		mutex_exit(&pm_debug_lock);
1717 	}
1718 #endif
1719 	ASSERT(direction == PM_LEVEL_UPONLY || direction == PM_LEVEL_DOWNONLY ||
1720 	    direction == PM_LEVEL_EXACT);
1721 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d, dir=%s, new=%d\n",
1722 	    pmf, PM_DEVICE(dip), comp, pm_decode_direction(direction), level))
1723 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1724 	(void) ddi_pathname(dip, pathbuf);
1725 	bpc.bpc_dip = dip;
1726 	bpc.bpc_path = pathbuf;
1727 	bpc.bpc_comp = comp;
1728 	bpc.bpc_olevel = PM_CURPOWER(dip, comp);
1729 	bpc.bpc_nlevel = level;
1730 	pspm.pspm_direction = direction;
1731 	pspm.pspm_errnop = retp;
1732 	pspm.pspm_canblock = canblock;
1733 	pspm.pspm_scan = scan;
1734 	bpc.bpc_private = &pspm;
1735 
1736 	/*
1737 	 * If a config operation is being done (we've locked the parent) or
1738 	 * we already hold the power lock (we've locked the node)
1739 	 * then we can operate directly on the node because we have already
1740 	 * brought up all the ancestors, otherwise, we have to go back to the
1741 	 * top of the tree.
1742 	 */
1743 	if (pm_devi_lock_held(pdip) || pm_devi_lock_held(dip))
1744 		ret = pm_busop_set_power(dip, NULL, BUS_POWER_CHILD_PWRCHG,
1745 		    (void *)&bpc, (void *)&unused);
1746 	else
1747 		ret = pm_busop_bus_power(ddi_root_node(), NULL,
1748 		    BUS_POWER_CHILD_PWRCHG, (void *)&bpc, (void *)&unused);
1749 #ifdef DEBUG
1750 	if (ret != DDI_SUCCESS || *retp != DDI_SUCCESS) {
1751 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) can't change power, ret=%d, "
1752 		    "errno=%d\n", pmf, PM_DEVICE(dip), ret, *retp))
1753 	}
1754 	if (diverted) {
1755 		mutex_enter(&pm_debug_lock);
1756 		pm_divertdebug--;
1757 		mutex_exit(&pm_debug_lock);
1758 	}
1759 #endif
1760 	kmem_free(pathbuf, MAXPATHLEN);
1761 	return (ret);
1762 }
1763 
1764 /*
1765  * If holddip is set, then if a dip is found we return with the node held.
1766  *
1767  * This code uses the same locking scheme as e_ddi_hold_devi_by_path
1768  * (resolve_pathname), but it does not drive attach.
1769  */
1770 dev_info_t *
1771 pm_name_to_dip(char *pathname, int holddip)
1772 {
1773 	struct pathname pn;
1774 	char		*component;
1775 	dev_info_t	*parent, *child;
1776 	int		circ;
1777 
1778 	if ((pathname == NULL) || (*pathname != '/'))
1779 		return (NULL);
1780 
1781 	/* setup pathname and allocate component */
1782 	if (pn_get(pathname, UIO_SYSSPACE, &pn))
1783 		return (NULL);
1784 	component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1785 
1786 	/* start at top, process '/' component */
1787 	parent = child = ddi_root_node();
1788 	ndi_hold_devi(parent);
1789 	pn_skipslash(&pn);
1790 	ASSERT(i_ddi_devi_attached(parent));
1791 
1792 	/* process components of pathname */
1793 	while (pn_pathleft(&pn)) {
1794 		(void) pn_getcomponent(&pn, component);
1795 
1796 		/* enter parent and search for component child */
1797 		ndi_devi_enter(parent, &circ);
1798 		child = ndi_devi_findchild(parent, component);
1799 		if ((child == NULL) || !i_ddi_devi_attached(child)) {
1800 			child = NULL;
1801 			ndi_devi_exit(parent, circ);
1802 			ndi_rele_devi(parent);
1803 			goto out;
1804 		}
1805 
1806 		/* attached child found, hold child and release parent */
1807 		ndi_hold_devi(child);
1808 		ndi_devi_exit(parent, circ);
1809 		ndi_rele_devi(parent);
1810 
1811 		/* child becomes parent, and process next component */
1812 		parent = child;
1813 		pn_skipslash(&pn);
1814 
1815 		/* loop with active ndi_devi_hold of child->parent */
1816 	}
1817 
1818 out:
1819 	pn_free(&pn);
1820 	kmem_free(component, MAXNAMELEN);
1821 
1822 	/* if we are not asked to return with hold, drop current hold */
1823 	if (child && !holddip)
1824 		ndi_rele_devi(child);
1825 	return (child);
1826 }
1827 
1828 /*
1829  * Search for a dependency and mark it unsatisfied
1830  */
1831 static void
1832 pm_unsatisfy(char *keeper, char *kept)
1833 {
1834 	PMD_FUNC(pmf, "unsatisfy")
1835 	pm_pdr_t *dp;
1836 
1837 	PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf, keeper, kept))
1838 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1839 		if (!dp->pdr_isprop) {
1840 			if (strcmp(dp->pdr_keeper, keeper) == 0 &&
1841 			    (dp->pdr_kept_count > 0) &&
1842 			    strcmp(dp->pdr_kept_paths[0], kept) == 0) {
1843 				if (dp->pdr_satisfied) {
1844 					dp->pdr_satisfied = 0;
1845 					pm_unresolved_deps++;
1846 					PMD(PMD_KEEPS, ("%s: clear satisfied, "
1847 					    "pm_unresolved_deps now %d\n", pmf,
1848 					    pm_unresolved_deps))
1849 				}
1850 			}
1851 		}
1852 	}
1853 }
1854 
1855 /*
1856  * Device dip is being un power managed, it keeps up count other devices.
1857  * We need to release any hold we have on the kept devices, and also
1858  * mark the dependency no longer satisfied.
1859  */
1860 static void
1861 pm_unkeeps(int count, char *keeper, char **keptpaths, int pwr)
1862 {
1863 	PMD_FUNC(pmf, "unkeeps")
1864 	int i, j;
1865 	dev_info_t *kept;
1866 	dev_info_t *dip;
1867 	struct pm_component *cp;
1868 	int keeper_on = 0, circ;
1869 
1870 	PMD(PMD_KEEPS, ("%s: count=%d, keeper=%s, keptpaths=%p\n", pmf, count,
1871 	    keeper, (void *)keptpaths))
1872 	/*
1873 	 * Try to grab keeper. Keeper may have gone away by now,
1874 	 * in this case, used the passed in value pwr
1875 	 */
1876 	dip = pm_name_to_dip(keeper, 1);
1877 	for (i = 0; i < count; i++) {
1878 		/* Release power hold */
1879 		kept = pm_name_to_dip(keptpaths[i], 1);
1880 		if (kept) {
1881 			PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
1882 			    PM_DEVICE(kept), i))
1883 			/*
1884 			 * We need to check if we skipped a bringup here
1885 			 * because we could have failed the bringup
1886 			 * (ie DIRECT PM device) and have
1887 			 * not increment the count.
1888 			 */
1889 			if ((dip != NULL) && (PM_GET_PM_INFO(dip) != NULL)) {
1890 				keeper_on = 0;
1891 				PM_LOCK_POWER(dip, &circ);
1892 				for (j = 0; j < PM_NUMCMPTS(dip); j++) {
1893 					cp = &DEVI(dip)->devi_pm_components[j];
1894 					if (cur_power(cp)) {
1895 						keeper_on++;
1896 						break;
1897 					}
1898 				}
1899 				if (keeper_on && (PM_SKBU(kept) == 0)) {
1900 					pm_rele_power(kept);
1901 					DEVI(kept)->devi_pm_flags
1902 					    &= ~PMC_SKIP_BRINGUP;
1903 				}
1904 				PM_UNLOCK_POWER(dip, circ);
1905 			} else if (pwr) {
1906 				if (PM_SKBU(kept) == 0) {
1907 					pm_rele_power(kept);
1908 					DEVI(kept)->devi_pm_flags
1909 					    &= ~PMC_SKIP_BRINGUP;
1910 				}
1911 			}
1912 			ddi_release_devi(kept);
1913 		}
1914 		/*
1915 		 * mark this dependency not satisfied
1916 		 */
1917 		pm_unsatisfy(keeper, keptpaths[i]);
1918 	}
1919 	if (dip)
1920 		ddi_release_devi(dip);
1921 }
1922 
1923 /*
1924  * Device kept is being un power managed, it is kept up by keeper.
1925  * We need to mark the dependency no longer satisfied.
1926  */
1927 static void
1928 pm_unkepts(char *kept, char *keeper)
1929 {
1930 	PMD_FUNC(pmf, "unkepts")
1931 	PMD(PMD_KEEPS, ("%s: kept=%s, keeper=%s\n", pmf, kept, keeper))
1932 	ASSERT(keeper != NULL);
1933 	/*
1934 	 * mark this dependency not satisfied
1935 	 */
1936 	pm_unsatisfy(keeper, kept);
1937 }
1938 
1939 /*
1940  * Removes dependency information and hold on the kepts, if the path is a
1941  * path of a keeper.
1942  */
1943 static void
1944 pm_free_keeper(char *path, int pwr)
1945 {
1946 	pm_pdr_t *dp;
1947 	int i;
1948 	size_t length;
1949 
1950 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1951 		if (strcmp(dp->pdr_keeper, path) != 0)
1952 			continue;
1953 		/*
1954 		 * Remove all our kept holds and the dependency records,
1955 		 * then free up the kept lists.
1956 		 */
1957 		pm_unkeeps(dp->pdr_kept_count, path, dp->pdr_kept_paths, pwr);
1958 		if (dp->pdr_kept_count)  {
1959 			for (i = 0; i < dp->pdr_kept_count; i++) {
1960 				length = strlen(dp->pdr_kept_paths[i]);
1961 				kmem_free(dp->pdr_kept_paths[i], length + 1);
1962 			}
1963 			kmem_free(dp->pdr_kept_paths,
1964 			    dp->pdr_kept_count * sizeof (char **));
1965 			dp->pdr_kept_paths = NULL;
1966 			dp->pdr_kept_count = 0;
1967 		}
1968 	}
1969 }
1970 
1971 /*
1972  * Removes the device represented by path from the list of kepts, if the
1973  * path is a path of a kept
1974  */
1975 static void
1976 pm_free_kept(char *path)
1977 {
1978 	pm_pdr_t *dp;
1979 	int i;
1980 	int j, count;
1981 	size_t length;
1982 	char **paths;
1983 
1984 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
1985 		if (dp->pdr_kept_count == 0)
1986 			continue;
1987 		count = dp->pdr_kept_count;
1988 		/* Remove this device from the kept path lists */
1989 		for (i = 0; i < count; i++) {
1990 			if (strcmp(dp->pdr_kept_paths[i], path) == 0) {
1991 				pm_unkepts(path, dp->pdr_keeper);
1992 				length = strlen(dp->pdr_kept_paths[i]) + 1;
1993 				kmem_free(dp->pdr_kept_paths[i], length);
1994 				dp->pdr_kept_paths[i] = NULL;
1995 				dp->pdr_kept_count--;
1996 			}
1997 		}
1998 		/* Compact the kept paths array */
1999 		if (dp->pdr_kept_count) {
2000 			length = dp->pdr_kept_count * sizeof (char **);
2001 			paths = kmem_zalloc(length, KM_SLEEP);
2002 			j = 0;
2003 			for (i = 0; i < count; i++) {
2004 				if (dp->pdr_kept_paths[i] != NULL) {
2005 					paths[j] = dp->pdr_kept_paths[i];
2006 					j++;
2007 				}
2008 			}
2009 			ASSERT(j == dp->pdr_kept_count);
2010 		}
2011 		/* Now free the old array and point to the new one */
2012 		kmem_free(dp->pdr_kept_paths, count * sizeof (char **));
2013 		if (dp->pdr_kept_count)
2014 			dp->pdr_kept_paths = paths;
2015 		else
2016 			dp->pdr_kept_paths = NULL;
2017 	}
2018 }
2019 
2020 /*
2021  * Free the dependency information for a device.
2022  */
2023 void
2024 pm_free_keeps(char *path, int pwr)
2025 {
2026 	PMD_FUNC(pmf, "free_keeps")
2027 
2028 #ifdef DEBUG
2029 	int doprdeps = 0;
2030 	void prdeps(char *);
2031 
2032 	PMD(PMD_KEEPS, ("%s: %s\n", pmf, path))
2033 	if (pm_debug & PMD_KEEPS) {
2034 		doprdeps = 1;
2035 		prdeps("pm_free_keeps before");
2036 	}
2037 #endif
2038 	/*
2039 	 * First assume we are a keeper and remove all our kepts.
2040 	 */
2041 	pm_free_keeper(path, pwr);
2042 	/*
2043 	 * Now assume we a kept device, and remove all our records.
2044 	 */
2045 	pm_free_kept(path);
2046 #ifdef	DEBUG
2047 	if (doprdeps) {
2048 		prdeps("pm_free_keeps after");
2049 	}
2050 #endif
2051 }
2052 
2053 static int
2054 pm_is_kept(char *path)
2055 {
2056 	pm_pdr_t *dp;
2057 	int i;
2058 
2059 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
2060 		if (dp->pdr_kept_count == 0)
2061 			continue;
2062 		for (i = 0; i < dp->pdr_kept_count; i++) {
2063 			if (strcmp(dp->pdr_kept_paths[i], path) == 0)
2064 				return (1);
2065 		}
2066 	}
2067 	return (0);
2068 }
2069 
2070 static void
2071 e_pm_hold_rele_power(dev_info_t *dip, int cnt)
2072 {
2073 	PMD_FUNC(pmf, "hold_rele_power")
2074 	int circ;
2075 
2076 	if ((dip == NULL) ||
2077 	    (PM_GET_PM_INFO(dip) == NULL) || PM_ISBC(dip))
2078 		return;
2079 
2080 	PM_LOCK_POWER(dip, &circ);
2081 	ASSERT(cnt >= 0 && PM_KUC(dip) >= 0 || cnt < 0 && PM_KUC(dip) > 0);
2082 	PMD(PMD_KIDSUP, ("%s: kidsupcnt for %s@%s(%s#%d) %d->%d\n", pmf,
2083 	    PM_DEVICE(dip), PM_KUC(dip), (PM_KUC(dip) + cnt)))
2084 
2085 	PM_KUC(dip) += cnt;
2086 
2087 	ASSERT(PM_KUC(dip) >= 0);
2088 	PM_UNLOCK_POWER(dip, circ);
2089 
2090 	if (cnt < 0 && PM_KUC(dip) == 0)
2091 		pm_rescan(dip);
2092 }
2093 
2094 #define	MAX_PPM_HANDLERS	4
2095 
2096 kmutex_t ppm_lock;	/* in case we ever do multi-threaded startup */
2097 
2098 struct	ppm_callbacks {
2099 	int (*ppmc_func)(dev_info_t *);
2100 	dev_info_t	*ppmc_dip;
2101 } ppm_callbacks[MAX_PPM_HANDLERS + 1];
2102 
2103 
2104 /*
2105  * This routine calls into all the registered ppms to notify them
2106  * that either all components of power-managed devices are at their
2107  * lowest levels or no longer all are at their lowest levels.
2108  */
2109 static void
2110 pm_ppm_notify_all_lowest(dev_info_t *dip, int mode)
2111 {
2112 	struct ppm_callbacks *ppmcp;
2113 	power_req_t power_req;
2114 	int result = 0;
2115 
2116 	power_req.request_type = PMR_PPM_ALL_LOWEST;
2117 	power_req.req.ppm_all_lowest_req.mode = mode;
2118 	mutex_enter(&ppm_lock);
2119 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++)
2120 		(void) pm_ctlops((dev_info_t *)ppmcp->ppmc_dip, dip,
2121 		    DDI_CTLOPS_POWER, &power_req, &result);
2122 	mutex_exit(&ppm_lock);
2123 	if (mode == PM_ALL_LOWEST) {
2124 		if (autoS3_enabled) {
2125 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest triggering "
2126 			    "autos3\n"))
2127 			mutex_enter(&srn_clone_lock);
2128 			if (srn_signal) {
2129 				srn_inuse++;
2130 				PMD(PMD_SX, ("(*srn_signal)(AUTOSX, 3)\n"))
2131 				(*srn_signal)(SRN_TYPE_AUTOSX, 3);
2132 				srn_inuse--;
2133 			} else {
2134 				PMD(PMD_SX, ("srn_signal NULL\n"))
2135 			}
2136 			mutex_exit(&srn_clone_lock);
2137 		} else {
2138 			PMD(PMD_SX, ("pm_ppm_notify_all_lowest autos3 "
2139 			    "disabled\n"));
2140 		}
2141 	}
2142 }
2143 
2144 static void
2145 pm_set_pm_info(dev_info_t *dip, void *value)
2146 {
2147 	DEVI(dip)->devi_pm_info = value;
2148 }
2149 
2150 pm_rsvp_t *pm_blocked_list;
2151 
2152 /*
2153  * Look up an entry in the blocked list by dip and component
2154  */
2155 static pm_rsvp_t *
2156 pm_rsvp_lookup(dev_info_t *dip, int comp)
2157 {
2158 	pm_rsvp_t *p;
2159 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2160 	for (p = pm_blocked_list; p; p = p->pr_next)
2161 		if (p->pr_dip == dip && p->pr_comp == comp) {
2162 			return (p);
2163 		}
2164 	return (NULL);
2165 }
2166 
2167 /*
2168  * Called when a device which is direct power managed (or the parent or
2169  * dependent of such a device) changes power, or when a pm clone is closed
2170  * that was direct power managing a device.  This call results in pm_blocked()
2171  * (below) returning.
2172  */
2173 void
2174 pm_proceed(dev_info_t *dip, int cmd, int comp, int newlevel)
2175 {
2176 	PMD_FUNC(pmf, "proceed")
2177 	pm_rsvp_t *found = NULL;
2178 	pm_rsvp_t *p;
2179 
2180 	mutex_enter(&pm_rsvp_lock);
2181 	switch (cmd) {
2182 	/*
2183 	 * we're giving up control, let any pending op continue
2184 	 */
2185 	case PMP_RELEASE:
2186 		for (p = pm_blocked_list; p; p = p->pr_next) {
2187 			if (dip == p->pr_dip) {
2188 				p->pr_retval = PMP_RELEASE;
2189 				PMD(PMD_DPM, ("%s: RELEASE %s@%s(%s#%d)\n",
2190 				    pmf, PM_DEVICE(dip)))
2191 				cv_signal(&p->pr_cv);
2192 			}
2193 		}
2194 		break;
2195 
2196 	/*
2197 	 * process has done PM_SET_CURRENT_POWER; let a matching request
2198 	 * succeed and a non-matching request for the same device fail
2199 	 */
2200 	case PMP_SETPOWER:
2201 		found = pm_rsvp_lookup(dip, comp);
2202 		if (!found)	/* if driver not waiting */
2203 			break;
2204 		/*
2205 		 * This cannot be pm_lower_power, since that can only happen
2206 		 * during detach or probe
2207 		 */
2208 		if (found->pr_newlevel <= newlevel) {
2209 			found->pr_retval = PMP_SUCCEED;
2210 			PMD(PMD_DPM, ("%s: SUCCEED %s@%s(%s#%d)\n", pmf,
2211 			    PM_DEVICE(dip)))
2212 		} else {
2213 			found->pr_retval = PMP_FAIL;
2214 			PMD(PMD_DPM, ("%s: FAIL %s@%s(%s#%d)\n", pmf,
2215 			    PM_DEVICE(dip)))
2216 		}
2217 		cv_signal(&found->pr_cv);
2218 		break;
2219 
2220 	default:
2221 		panic("pm_proceed unknown cmd %d", cmd);
2222 	}
2223 	mutex_exit(&pm_rsvp_lock);
2224 }
2225 
2226 /*
2227  * This routine dispatches new work to the dependency thread. Caller must
2228  * be prepared to block for memory if necessary.
2229  */
2230 void
2231 pm_dispatch_to_dep_thread(int cmd, char *keeper, char *kept, int wait,
2232     int *res, int cached_pwr)
2233 {
2234 	pm_dep_wk_t	*new_work;
2235 
2236 	new_work = kmem_zalloc(sizeof (pm_dep_wk_t), KM_SLEEP);
2237 	new_work->pdw_type = cmd;
2238 	new_work->pdw_wait = wait;
2239 	new_work->pdw_done = 0;
2240 	new_work->pdw_ret = 0;
2241 	new_work->pdw_pwr = cached_pwr;
2242 	cv_init(&new_work->pdw_cv, NULL, CV_DEFAULT, NULL);
2243 	if (keeper != NULL) {
2244 		new_work->pdw_keeper = kmem_zalloc(strlen(keeper) + 1,
2245 		    KM_SLEEP);
2246 		(void) strcpy(new_work->pdw_keeper, keeper);
2247 	}
2248 	if (kept != NULL) {
2249 		new_work->pdw_kept = kmem_zalloc(strlen(kept) + 1, KM_SLEEP);
2250 		(void) strcpy(new_work->pdw_kept, kept);
2251 	}
2252 	mutex_enter(&pm_dep_thread_lock);
2253 	if (pm_dep_thread_workq == NULL) {
2254 		pm_dep_thread_workq = new_work;
2255 		pm_dep_thread_tail = new_work;
2256 		new_work->pdw_next = NULL;
2257 	} else {
2258 		pm_dep_thread_tail->pdw_next = new_work;
2259 		pm_dep_thread_tail = new_work;
2260 		new_work->pdw_next = NULL;
2261 	}
2262 	cv_signal(&pm_dep_thread_cv);
2263 	/* If caller asked for it, wait till it is done. */
2264 	if (wait)  {
2265 		while (!new_work->pdw_done)
2266 			cv_wait(&new_work->pdw_cv, &pm_dep_thread_lock);
2267 		/*
2268 		 * Pass return status, if any, back.
2269 		 */
2270 		if (res != NULL)
2271 			*res = new_work->pdw_ret;
2272 		/*
2273 		 * If we asked to wait, it is our job to free the request
2274 		 * structure.
2275 		 */
2276 		if (new_work->pdw_keeper)
2277 			kmem_free(new_work->pdw_keeper,
2278 			    strlen(new_work->pdw_keeper) + 1);
2279 		if (new_work->pdw_kept)
2280 			kmem_free(new_work->pdw_kept,
2281 			    strlen(new_work->pdw_kept) + 1);
2282 		kmem_free(new_work, sizeof (pm_dep_wk_t));
2283 	}
2284 	mutex_exit(&pm_dep_thread_lock);
2285 }
2286 
2287 /*
2288  * Release the pm resource for this device.
2289  */
2290 void
2291 pm_rem_info(dev_info_t *dip)
2292 {
2293 	PMD_FUNC(pmf, "rem_info")
2294 	int		i, count = 0;
2295 	pm_info_t	*info = PM_GET_PM_INFO(dip);
2296 	dev_info_t	*pdip = ddi_get_parent(dip);
2297 	char		*pathbuf;
2298 	int		work_type = PM_DEP_WK_DETACH;
2299 
2300 	ASSERT(info);
2301 
2302 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2303 	if (PM_ISDIRECT(dip)) {
2304 		info->pmi_dev_pm_state &= ~PM_DIRECT;
2305 		ASSERT(info->pmi_clone);
2306 		info->pmi_clone = 0;
2307 		pm_proceed(dip, PMP_RELEASE, -1, -1);
2308 	}
2309 	ASSERT(!PM_GET_PM_SCAN(dip));
2310 
2311 	/*
2312 	 * Now adjust parent's kidsupcnt.  BC nodes we check only comp 0,
2313 	 * Others we check all components.  BC node that has already
2314 	 * called pm_destroy_components() has zero component count.
2315 	 * Parents that get notification are not adjusted because their
2316 	 * kidsupcnt is always 0 (or 1 during configuration).
2317 	 */
2318 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d) has %d components\n", pmf,
2319 	    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
2320 
2321 	/* node is detached, so we can examine power without locking */
2322 	if (PM_ISBC(dip)) {
2323 		count = (PM_CURPOWER(dip, 0) != 0);
2324 	} else {
2325 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
2326 			count += (PM_CURPOWER(dip, i) != 0);
2327 	}
2328 
2329 	if (PM_NUMCMPTS(dip) && pdip && !PM_WANTS_NOTIFICATION(pdip))
2330 		e_pm_hold_rele_power(pdip, -count);
2331 
2332 	/* Schedule a request to clean up dependency records */
2333 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2334 	(void) ddi_pathname(dip, pathbuf);
2335 	pm_dispatch_to_dep_thread(work_type, pathbuf, pathbuf,
2336 	    PM_DEP_NOWAIT, NULL, (count > 0));
2337 	kmem_free(pathbuf, MAXPATHLEN);
2338 
2339 	/*
2340 	 * Adjust the pm_comps_notlowest count since this device is
2341 	 * not being power-managed anymore.
2342 	 */
2343 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
2344 		pm_component_t *cp = PM_CP(dip, i);
2345 		if (cp->pmc_cur_pwr != 0)
2346 			PM_DECR_NOTLOWEST(dip)
2347 	}
2348 	/*
2349 	 * Once we clear the info pointer, it looks like it is not power
2350 	 * managed to everybody else.
2351 	 */
2352 	pm_set_pm_info(dip, NULL);
2353 	kmem_free(info, sizeof (pm_info_t));
2354 }
2355 
2356 int
2357 pm_get_norm_pwrs(dev_info_t *dip, int **valuep, size_t *length)
2358 {
2359 	int components = PM_NUMCMPTS(dip);
2360 	int *bufp;
2361 	size_t size;
2362 	int i;
2363 
2364 	if (components <= 0) {
2365 		cmn_err(CE_NOTE, "!pm: %s@%s(%s#%d) has no components, "
2366 		    "can't get normal power values\n", PM_DEVICE(dip));
2367 		return (DDI_FAILURE);
2368 	} else {
2369 		size = components * sizeof (int);
2370 		bufp = kmem_alloc(size, KM_SLEEP);
2371 		for (i = 0; i < components; i++) {
2372 			bufp[i] = pm_get_normal_power(dip, i);
2373 		}
2374 	}
2375 	*length = size;
2376 	*valuep = bufp;
2377 	return (DDI_SUCCESS);
2378 }
2379 
2380 static int
2381 pm_reset_timestamps(dev_info_t *dip, void *arg)
2382 {
2383 	_NOTE(ARGUNUSED(arg))
2384 
2385 	int components;
2386 	int	i;
2387 
2388 	if (!PM_GET_PM_INFO(dip))
2389 		return (DDI_WALK_CONTINUE);
2390 	components = PM_NUMCMPTS(dip);
2391 	ASSERT(components > 0);
2392 	PM_LOCK_BUSY(dip);
2393 	for (i = 0; i < components; i++) {
2394 		struct pm_component *cp;
2395 		/*
2396 		 * If the component was not marked as busy,
2397 		 * reset its timestamp to now.
2398 		 */
2399 		cp = PM_CP(dip, i);
2400 		if (cp->pmc_timestamp)
2401 			cp->pmc_timestamp = gethrestime_sec();
2402 	}
2403 	PM_UNLOCK_BUSY(dip);
2404 	return (DDI_WALK_CONTINUE);
2405 }
2406 
2407 /*
2408  * Convert a power level to an index into the levels array (or
2409  * just PM_LEVEL_UNKNOWN in that special case).
2410  */
2411 static int
2412 pm_level_to_index(dev_info_t *dip, pm_component_t *cp, int level)
2413 {
2414 	PMD_FUNC(pmf, "level_to_index")
2415 	int i;
2416 	int limit = cp->pmc_comp.pmc_numlevels;
2417 	int *ip = cp->pmc_comp.pmc_lvals;
2418 
2419 	if (level == PM_LEVEL_UNKNOWN)
2420 		return (level);
2421 
2422 	for (i = 0; i < limit; i++) {
2423 		if (level == *ip++) {
2424 			PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d)[%d] to %x\n",
2425 			    pmf, PM_DEVICE(dip),
2426 			    (int)(cp - DEVI(dip)->devi_pm_components), level))
2427 			return (i);
2428 		}
2429 	}
2430 	panic("pm_level_to_index: level %d not found for device "
2431 	    "%s@%s(%s#%d)", level, PM_DEVICE(dip));
2432 	/*NOTREACHED*/
2433 }
2434 
2435 /*
2436  * Internal function to set current power level
2437  */
2438 static void
2439 e_pm_set_cur_pwr(dev_info_t *dip, pm_component_t *cp, int level)
2440 {
2441 	PMD_FUNC(pmf, "set_cur_pwr")
2442 	int curpwr = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
2443 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
2444 
2445 	/*
2446 	 * Nothing to adjust if current & new levels are the same.
2447 	 */
2448 	if (curpwr != PM_LEVEL_UNKNOWN &&
2449 	    level == cp->pmc_comp.pmc_lvals[curpwr])
2450 		return;
2451 
2452 	/*
2453 	 * Keep the count for comps doing transition to/from lowest
2454 	 * level.
2455 	 */
2456 	if (curpwr == 0) {
2457 		PM_INCR_NOTLOWEST(dip);
2458 	} else if (level == cp->pmc_comp.pmc_lvals[0]) {
2459 		PM_DECR_NOTLOWEST(dip);
2460 	}
2461 	cp->pmc_phc_pwr = PM_LEVEL_UNKNOWN;
2462 	cp->pmc_cur_pwr = pm_level_to_index(dip, cp, level);
2463 }
2464 
2465 static int pm_phc_impl(dev_info_t *, int, int, int);
2466 
2467 /*
2468  * This is the default method of setting the power of a device if no ppm
2469  * driver has claimed it.
2470  */
2471 int
2472 pm_power(dev_info_t *dip, int comp, int level)
2473 {
2474 	PMD_FUNC(pmf, "power")
2475 	struct dev_ops	*ops;
2476 	int		(*fn)(dev_info_t *, int, int);
2477 	struct pm_component *cp = PM_CP(dip, comp);
2478 	int retval;
2479 	pm_info_t *info = PM_GET_PM_INFO(dip);
2480 
2481 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2482 	    PM_DEVICE(dip), comp, level))
2483 	if (!(ops = ddi_get_driver(dip))) {
2484 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) has no ops\n", pmf,
2485 		    PM_DEVICE(dip)))
2486 		return (DDI_FAILURE);
2487 	}
2488 	if ((ops->devo_rev < 2) || !(fn = ops->devo_power)) {
2489 		PMD(PMD_FAIL, ("%s: %s%s\n", pmf,
2490 		    (ops->devo_rev < 2 ? " wrong devo_rev" : ""),
2491 		    (!fn ? " devo_power NULL" : "")))
2492 		return (DDI_FAILURE);
2493 	}
2494 	cp->pmc_flags |= PM_POWER_OP;
2495 	retval = (*fn)(dip, comp, level);
2496 	cp->pmc_flags &= ~PM_POWER_OP;
2497 	if (retval == DDI_SUCCESS) {
2498 		e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
2499 		return (DDI_SUCCESS);
2500 	}
2501 
2502 	/*
2503 	 * If pm_power_has_changed() detected a deadlock with pm_power() it
2504 	 * updated only the power level of the component.  If our attempt to
2505 	 * set the device new to a power level above has failed we sync the
2506 	 * total power state via phc code now.
2507 	 */
2508 	if (cp->pmc_flags & PM_PHC_WHILE_SET_POWER) {
2509 		int phc_lvl =
2510 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr];
2511 
2512 		ASSERT(info);
2513 		(void) pm_phc_impl(dip, comp, phc_lvl, 0);
2514 		PMD(PMD_PHC, ("%s: phc %s@%s(%s#%d) comp=%d level=%d\n",
2515 		    pmf, PM_DEVICE(dip), comp, phc_lvl))
2516 	}
2517 
2518 	PMD(PMD_FAIL, ("%s: can't set comp=%d (%s) of %s@%s(%s#%d) to "
2519 	    "level=%d (%s)\n", pmf, comp, cp->pmc_comp.pmc_name, PM_DEVICE(dip),
2520 	    level, power_val_to_string(cp, level)));
2521 	return (DDI_FAILURE);
2522 }
2523 
2524 int
2525 pm_unmanage(dev_info_t *dip)
2526 {
2527 	PMD_FUNC(pmf, "unmanage")
2528 	power_req_t power_req;
2529 	int result, retval = 0;
2530 
2531 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
2532 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
2533 	    PM_DEVICE(dip)))
2534 	power_req.request_type = PMR_PPM_UNMANAGE;
2535 	power_req.req.ppm_config_req.who = dip;
2536 	if (pm_ppm_claimed(dip))
2537 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2538 		    &power_req, &result);
2539 #ifdef DEBUG
2540 	else
2541 		retval = pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
2542 		    &power_req, &result);
2543 #endif
2544 	ASSERT(retval == DDI_SUCCESS);
2545 	pm_rem_info(dip);
2546 	return (retval);
2547 }
2548 
2549 int
2550 pm_raise_power(dev_info_t *dip, int comp, int level)
2551 {
2552 	if (level < 0)
2553 		return (DDI_FAILURE);
2554 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2555 	    !e_pm_valid_power(dip, comp, level))
2556 		return (DDI_FAILURE);
2557 
2558 	return (dev_is_needed(dip, comp, level, PM_LEVEL_UPONLY));
2559 }
2560 
2561 int
2562 pm_lower_power(dev_info_t *dip, int comp, int level)
2563 {
2564 	PMD_FUNC(pmf, "pm_lower_power")
2565 
2566 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
2567 	    !e_pm_valid_power(dip, comp, level)) {
2568 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
2569 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
2570 		return (DDI_FAILURE);
2571 	}
2572 
2573 	if (!DEVI_IS_DETACHING(dip)) {
2574 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) not detaching\n",
2575 		    pmf, PM_DEVICE(dip)))
2576 		return (DDI_FAILURE);
2577 	}
2578 
2579 	/*
2580 	 * If we don't care about saving power, or we're treating this node
2581 	 * specially, then this is a no-op
2582 	 */
2583 	if (!PM_SCANABLE(dip) || pm_noinvol(dip)) {
2584 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) %s%s%s%s\n",
2585 		    pmf, PM_DEVICE(dip),
2586 		    !autopm_enabled ? "!autopm_enabled " : "",
2587 		    !PM_POLLING_CPUPM ? "!cpupm_polling " : "",
2588 		    PM_CPUPM_DISABLED ? "cpupm_disabled " : "",
2589 		    pm_noinvol(dip) ? "pm_noinvol()" : ""))
2590 		return (DDI_SUCCESS);
2591 	}
2592 
2593 	if (dev_is_needed(dip, comp, level, PM_LEVEL_DOWNONLY) != DDI_SUCCESS) {
2594 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) dev_is_needed failed\n", pmf,
2595 		    PM_DEVICE(dip)))
2596 		return (DDI_FAILURE);
2597 	}
2598 	return (DDI_SUCCESS);
2599 }
2600 
2601 /*
2602  * Find the entries struct for a given dip in the blocked list, return it locked
2603  */
2604 static psce_t *
2605 pm_psc_dip_to_direct(dev_info_t *dip, pscc_t **psccp)
2606 {
2607 	pscc_t *p;
2608 	psce_t *psce;
2609 
2610 	rw_enter(&pm_pscc_direct_rwlock, RW_READER);
2611 	for (p = pm_pscc_direct; p; p = p->pscc_next) {
2612 		if (p->pscc_dip == dip) {
2613 			*psccp = p;
2614 			psce = p->pscc_entries;
2615 			mutex_enter(&psce->psce_lock);
2616 			ASSERT(psce);
2617 			rw_exit(&pm_pscc_direct_rwlock);
2618 			return (psce);
2619 		}
2620 	}
2621 	rw_exit(&pm_pscc_direct_rwlock);
2622 	panic("sunpm: no entry for dip %p in direct list", (void *)dip);
2623 	/*NOTREACHED*/
2624 }
2625 
2626 /*
2627  * Write an entry indicating a power level change (to be passed to a process
2628  * later) in the given psce.
2629  * If we were called in the path that brings up the console fb in the
2630  * case of entering the prom, we don't want to sleep.  If the alloc fails, then
2631  * we create a record that has a size of -1, a physaddr of NULL, and that
2632  * has the overflow flag set.
2633  */
2634 static int
2635 psc_entry(ushort_t event, psce_t *psce, dev_info_t *dip, int comp, int new,
2636     int old, int which, pm_canblock_t canblock)
2637 {
2638 	char	buf[MAXNAMELEN];
2639 	pm_state_change_t *p;
2640 	size_t	size;
2641 	caddr_t physpath = NULL;
2642 	int	overrun = 0;
2643 
2644 	ASSERT(MUTEX_HELD(&psce->psce_lock));
2645 	(void) ddi_pathname(dip, buf);
2646 	size = strlen(buf) + 1;
2647 	p = psce->psce_in;
2648 	if (canblock == PM_CANBLOCK_BYPASS) {
2649 		physpath = kmem_alloc(size, KM_NOSLEEP);
2650 		if (physpath == NULL) {
2651 			/*
2652 			 * mark current entry as overrun
2653 			 */
2654 			p->flags |= PSC_EVENT_LOST;
2655 			size = (size_t)-1;
2656 		}
2657 	} else
2658 		physpath = kmem_alloc(size, KM_SLEEP);
2659 	if (p->size) {	/* overflow; mark the next entry */
2660 		if (p->size != (size_t)-1)
2661 			kmem_free(p->physpath, p->size);
2662 		ASSERT(psce->psce_out == p);
2663 		if (p == psce->psce_last) {
2664 			psce->psce_first->flags |= PSC_EVENT_LOST;
2665 			psce->psce_out = psce->psce_first;
2666 		} else {
2667 			(p + 1)->flags |= PSC_EVENT_LOST;
2668 			psce->psce_out = (p + 1);
2669 		}
2670 		overrun++;
2671 	} else if (physpath == NULL) {	/* alloc failed, mark this entry */
2672 		p->flags |= PSC_EVENT_LOST;
2673 		p->size = 0;
2674 		p->physpath = NULL;
2675 	}
2676 	if (which == PSC_INTEREST) {
2677 		mutex_enter(&pm_compcnt_lock);
2678 		if (pm_comps_notlowest == 0)
2679 			p->flags |= PSC_ALL_LOWEST;
2680 		else
2681 			p->flags &= ~PSC_ALL_LOWEST;
2682 		mutex_exit(&pm_compcnt_lock);
2683 	}
2684 	p->event = event;
2685 	p->timestamp = gethrestime_sec();
2686 	p->component = comp;
2687 	p->old_level = old;
2688 	p->new_level = new;
2689 	p->physpath = physpath;
2690 	p->size = size;
2691 	if (physpath != NULL)
2692 		(void) strcpy(p->physpath, buf);
2693 	if (p == psce->psce_last)
2694 		psce->psce_in = psce->psce_first;
2695 	else
2696 		psce->psce_in = ++p;
2697 	mutex_exit(&psce->psce_lock);
2698 	return (overrun);
2699 }
2700 
2701 /*
2702  * Find the next entry on the interest list.  We keep a pointer to the item we
2703  * last returned in the user's cooke.  Returns a locked entries struct.
2704  */
2705 static psce_t *
2706 psc_interest(void **cookie, pscc_t **psccp)
2707 {
2708 	pscc_t *pscc;
2709 	pscc_t **cookiep = (pscc_t **)cookie;
2710 
2711 	if (*cookiep == NULL)
2712 		pscc = pm_pscc_interest;
2713 	else
2714 		pscc = (*cookiep)->pscc_next;
2715 	if (pscc) {
2716 		*cookiep = pscc;
2717 		*psccp = pscc;
2718 		mutex_enter(&pscc->pscc_entries->psce_lock);
2719 		return (pscc->pscc_entries);
2720 	} else {
2721 		return (NULL);
2722 	}
2723 }
2724 
2725 /*
2726  * Create an entry for a process to pick up indicating a power level change.
2727  */
2728 static void
2729 pm_enqueue_notify(ushort_t cmd, dev_info_t *dip, int comp,
2730     int newlevel, int oldlevel, pm_canblock_t canblock)
2731 {
2732 	PMD_FUNC(pmf, "enqueue_notify")
2733 	pscc_t	*pscc;
2734 	psce_t	*psce;
2735 	void		*cookie = NULL;
2736 	int	overrun;
2737 
2738 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
2739 	switch (cmd) {
2740 	case PSC_PENDING_CHANGE:	/* only for controlling process */
2741 		PMD(PMD_DPM, ("%s: PENDING %s@%s(%s#%d), comp %d, %d -> %d\n",
2742 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2743 		psce = pm_psc_dip_to_direct(dip, &pscc);
2744 		ASSERT(psce);
2745 		PMD(PMD_IOCTL, ("%s: PENDING: %s@%s(%s#%d) pm_poll_cnt[%d] "
2746 		    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2747 		    pm_poll_cnt[pscc->pscc_clone]))
2748 		overrun = psc_entry(cmd, psce, dip, comp, newlevel, oldlevel,
2749 		    PSC_DIRECT, canblock);
2750 		PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2751 		mutex_enter(&pm_clone_lock);
2752 		if (!overrun)
2753 			pm_poll_cnt[pscc->pscc_clone]++;
2754 		cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2755 		pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2756 		mutex_exit(&pm_clone_lock);
2757 		break;
2758 	case PSC_HAS_CHANGED:
2759 		PMD(PMD_DPM, ("%s: HAS %s@%s(%s#%d), comp %d, %d -> %d\n",
2760 		    pmf, PM_DEVICE(dip), comp, oldlevel, newlevel))
2761 		if (PM_ISDIRECT(dip) && canblock != PM_CANBLOCK_BYPASS) {
2762 			psce = pm_psc_dip_to_direct(dip, &pscc);
2763 			PMD(PMD_IOCTL, ("%s: HAS: %s@%s(%s#%d) pm_poll_cnt[%d] "
2764 			    "%d\n", pmf, PM_DEVICE(dip), pscc->pscc_clone,
2765 			    pm_poll_cnt[pscc->pscc_clone]))
2766 			overrun = psc_entry(cmd, psce, dip, comp, newlevel,
2767 			    oldlevel, PSC_DIRECT, canblock);
2768 			PMD(PMD_DPM, ("%s: sig %d\n", pmf, pscc->pscc_clone))
2769 			mutex_enter(&pm_clone_lock);
2770 			if (!overrun)
2771 				pm_poll_cnt[pscc->pscc_clone]++;
2772 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2773 			pollwakeup(&pm_pollhead, (POLLRDNORM | POLLIN));
2774 			mutex_exit(&pm_clone_lock);
2775 		}
2776 		mutex_enter(&pm_clone_lock);
2777 		rw_enter(&pm_pscc_interest_rwlock, RW_READER);
2778 		while ((psce = psc_interest(&cookie, &pscc)) != NULL) {
2779 			(void) psc_entry(cmd, psce, dip, comp, newlevel,
2780 			    oldlevel, PSC_INTEREST, canblock);
2781 			cv_signal(&pm_clones_cv[pscc->pscc_clone]);
2782 		}
2783 		rw_exit(&pm_pscc_interest_rwlock);
2784 		mutex_exit(&pm_clone_lock);
2785 		break;
2786 #ifdef DEBUG
2787 	default:
2788 		ASSERT(0);
2789 #endif
2790 	}
2791 }
2792 
2793 static void
2794 pm_enqueue_notify_others(pm_ppm_devlist_t **listp, pm_canblock_t canblock)
2795 {
2796 	if (listp) {
2797 		pm_ppm_devlist_t *p, *next = NULL;
2798 
2799 		for (p = *listp; p; p = next) {
2800 			next = p->ppd_next;
2801 			pm_enqueue_notify(PSC_HAS_CHANGED, p->ppd_who,
2802 			    p->ppd_cmpt, p->ppd_new_level, p->ppd_old_level,
2803 			    canblock);
2804 			kmem_free(p, sizeof (pm_ppm_devlist_t));
2805 		}
2806 		*listp = NULL;
2807 	}
2808 }
2809 
2810 /*
2811  * Try to get the power locks of the parent node and target (child)
2812  * node.  Return true if successful (with both locks held) or false
2813  * (with no locks held).
2814  */
2815 static int
2816 pm_try_parent_child_locks(dev_info_t *pdip,
2817     dev_info_t *dip, int *pcircp, int *circp)
2818 {
2819 	if (ndi_devi_tryenter(pdip, pcircp))
2820 		if (PM_TRY_LOCK_POWER(dip, circp)) {
2821 			return (1);
2822 		} else {
2823 			ndi_devi_exit(pdip, *pcircp);
2824 		}
2825 	return (0);
2826 }
2827 
2828 /*
2829  * Determine if the power lock owner is blocked by current thread.
2830  * returns :
2831  * 	1 - If the thread owning the effective power lock (the first lock on
2832  *          which a thread blocks when it does PM_LOCK_POWER) is blocked by
2833  *          a mutex held by the current thread.
2834  *
2835  *	0 - otherwise
2836  *
2837  * Note : This function is called by pm_power_has_changed to determine whether
2838  * it is executing in parallel with pm_set_power.
2839  */
2840 static int
2841 pm_blocked_by_us(dev_info_t *dip)
2842 {
2843 	power_req_t power_req;
2844 	kthread_t *owner;
2845 	int result;
2846 	kmutex_t *mp;
2847 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
2848 
2849 	power_req.request_type = PMR_PPM_POWER_LOCK_OWNER;
2850 	power_req.req.ppm_power_lock_owner_req.who = dip;
2851 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req, &result) !=
2852 	    DDI_SUCCESS) {
2853 		/*
2854 		 * It is assumed that if the device is claimed by ppm, ppm
2855 		 * will always implement this request type and it'll always
2856 		 * return success. We panic here, if it fails.
2857 		 */
2858 		panic("pm: Can't determine power lock owner of %s@%s(%s#%d)\n",
2859 		    PM_DEVICE(dip));
2860 		/*NOTREACHED*/
2861 	}
2862 
2863 	if ((owner = power_req.req.ppm_power_lock_owner_req.owner) != NULL &&
2864 	    owner->t_state == TS_SLEEP &&
2865 	    owner->t_sobj_ops &&
2866 	    SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_MUTEX &&
2867 	    (mp = (kmutex_t *)owner->t_wchan) &&
2868 	    mutex_owner(mp) == curthread)
2869 		return (1);
2870 
2871 	return (0);
2872 }
2873 
2874 /*
2875  * Notify parent which wants to hear about a child's power changes.
2876  */
2877 static void
2878 pm_notify_parent(dev_info_t *dip,
2879     dev_info_t *pdip, int comp, int old_level, int level)
2880 {
2881 	pm_bp_has_changed_t bphc;
2882 	pm_sp_misc_t pspm;
2883 	char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2884 	int result = DDI_SUCCESS;
2885 
2886 	bphc.bphc_dip = dip;
2887 	bphc.bphc_path = ddi_pathname(dip, pathbuf);
2888 	bphc.bphc_comp = comp;
2889 	bphc.bphc_olevel = old_level;
2890 	bphc.bphc_nlevel = level;
2891 	pspm.pspm_canblock = PM_CANBLOCK_BLOCK;
2892 	pspm.pspm_scan = 0;
2893 	bphc.bphc_private = &pspm;
2894 	(void) (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
2895 	    BUS_POWER_HAS_CHANGED, (void *)&bphc, (void *)&result);
2896 	kmem_free(pathbuf, MAXPATHLEN);
2897 }
2898 
2899 /*
2900  * Check if we need to resume a BC device, and make the attach call as required.
2901  */
2902 static int
2903 pm_check_and_resume(dev_info_t *dip, int comp, int old_level, int level)
2904 {
2905 	int ret = DDI_SUCCESS;
2906 
2907 	if (PM_ISBC(dip) && comp == 0 && old_level == 0 && level != 0) {
2908 		ASSERT(DEVI(dip)->devi_pm_flags & PMC_SUSPENDED);
2909 		/* ppm is not interested in DDI_PM_RESUME */
2910 		if ((ret = devi_attach(dip, DDI_PM_RESUME)) != DDI_SUCCESS)
2911 			/* XXX Should we mark it resumed, */
2912 			/* even though it failed? */
2913 			cmn_err(CE_WARN, "!pm: Can't resume %s@%s",
2914 			    PM_NAME(dip), PM_ADDR(dip));
2915 		DEVI(dip)->devi_pm_flags &= ~PMC_SUSPENDED;
2916 	}
2917 
2918 	return (ret);
2919 }
2920 
2921 /*
2922  * Tests outside the lock to see if we should bother to enqueue an entry
2923  * for any watching process.  If yes, then caller will take the lock and
2924  * do the full protocol
2925  */
2926 static int
2927 pm_watchers()
2928 {
2929 	if (pm_processes_stopped)
2930 		return (0);
2931 	return (pm_pscc_direct || pm_pscc_interest);
2932 }
2933 
2934 static int pm_phc_impl(dev_info_t *, int, int, int);
2935 
2936 /*
2937  * A driver is reporting that the power of one of its device's components
2938  * has changed.  Update the power state accordingly.
2939  */
2940 int
2941 pm_power_has_changed(dev_info_t *dip, int comp, int level)
2942 {
2943 	PMD_FUNC(pmf, "pm_power_has_changed")
2944 	int ret;
2945 	dev_info_t *pdip = ddi_get_parent(dip);
2946 	struct pm_component *cp;
2947 	int blocked, circ, pcirc, old_level;
2948 
2949 	if (level < 0) {
2950 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d): bad level=%d\n", pmf,
2951 		    PM_DEVICE(dip), level))
2952 		return (DDI_FAILURE);
2953 	}
2954 
2955 	PMD(PMD_KIDSUP | PMD_DEP, ("%s: %s@%s(%s#%d), comp=%d, level=%d\n", pmf,
2956 	    PM_DEVICE(dip), comp, level))
2957 
2958 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, &cp) ||
2959 	    !e_pm_valid_power(dip, comp, level))
2960 		return (DDI_FAILURE);
2961 
2962 	/*
2963 	 * A driver thread calling pm_power_has_changed and another thread
2964 	 * calling pm_set_power can deadlock.  The problem is not resolvable
2965 	 * by changing lock order, so we use pm_blocked_by_us() to detect
2966 	 * this specific deadlock.  If we can't get the lock immediately
2967 	 * and we are deadlocked, just update the component's level, do
2968 	 * notifications, and return.  We intend to update the total power
2969 	 * state later (if the other thread fails to set power to the
2970 	 * desired level).  If we were called because of a power change on a
2971 	 * component that isn't involved in a set_power op, update all state
2972 	 * immediately.
2973 	 */
2974 	cp = PM_CP(dip, comp);
2975 	while (!pm_try_parent_child_locks(pdip, dip, &pcirc, &circ)) {
2976 		if (((blocked = pm_blocked_by_us(dip)) != 0) &&
2977 		    (cp->pmc_flags & PM_POWER_OP)) {
2978 			if (pm_watchers()) {
2979 				mutex_enter(&pm_rsvp_lock);
2980 				pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp,
2981 				    level, cur_power(cp), PM_CANBLOCK_BLOCK);
2982 				mutex_exit(&pm_rsvp_lock);
2983 			}
2984 			if (pdip && PM_WANTS_NOTIFICATION(pdip))
2985 				pm_notify_parent(dip,
2986 				    pdip, comp, cur_power(cp), level);
2987 			(void) pm_check_and_resume(dip,
2988 			    comp, cur_power(cp), level);
2989 
2990 			/*
2991 			 * Stash the old power index, update curpwr, and flag
2992 			 * that the total power state needs to be synched.
2993 			 */
2994 			cp->pmc_flags |= PM_PHC_WHILE_SET_POWER;
2995 			/*
2996 			 * Several pm_power_has_changed calls could arrive
2997 			 * while the set power path remains blocked.  Keep the
2998 			 * oldest old power and the newest new power of any
2999 			 * sequence of phc calls which arrive during deadlock.
3000 			 */
3001 			if (cp->pmc_phc_pwr == PM_LEVEL_UNKNOWN)
3002 				cp->pmc_phc_pwr = cp->pmc_cur_pwr;
3003 			cp->pmc_cur_pwr =
3004 			    pm_level_to_index(dip, cp, level);
3005 			PMD(PMD_PHC, ("%s: deadlock for %s@%s(%s#%d), comp=%d, "
3006 			    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3007 			return (DDI_SUCCESS);
3008 		} else
3009 			if (blocked) {	/* blocked, but different cmpt? */
3010 				if (!ndi_devi_tryenter(pdip, &pcirc)) {
3011 					cmn_err(CE_NOTE,
3012 					    "!pm: parent kuc not updated due "
3013 					    "to possible deadlock.\n");
3014 					return (pm_phc_impl(dip,
3015 					    comp, level, 1));
3016 				}
3017 				old_level = cur_power(cp);
3018 				if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3019 				    (!PM_ISBC(dip) || comp == 0) &&
3020 				    POWERING_ON(old_level, level))
3021 					pm_hold_power(pdip);
3022 				ret = pm_phc_impl(dip, comp, level, 1);
3023 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3024 					if ((!PM_ISBC(dip) ||
3025 					    comp == 0) && level == 0 &&
3026 					    old_level != PM_LEVEL_UNKNOWN)
3027 						pm_rele_power(pdip);
3028 				}
3029 				ndi_devi_exit(pdip, pcirc);
3030 				/* child lock not held: deadlock */
3031 				return (ret);
3032 			}
3033 		delay(1);
3034 		PMD(PMD_PHC, ("%s: try lock again\n", pmf))
3035 	}
3036 
3037 	/* non-deadlock case */
3038 	old_level = cur_power(cp);
3039 	if (pdip && !PM_WANTS_NOTIFICATION(pdip) &&
3040 	    (!PM_ISBC(dip) || comp == 0) && POWERING_ON(old_level, level))
3041 		pm_hold_power(pdip);
3042 	ret = pm_phc_impl(dip, comp, level, 1);
3043 	if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
3044 		if ((!PM_ISBC(dip) || comp == 0) && level == 0 &&
3045 		    old_level != PM_LEVEL_UNKNOWN)
3046 			pm_rele_power(pdip);
3047 	}
3048 	PM_UNLOCK_POWER(dip, circ);
3049 	ndi_devi_exit(pdip, pcirc);
3050 	return (ret);
3051 }
3052 
3053 /*
3054  * Account for power changes to a component of the the console frame buffer.
3055  * If lowering power from full (or "unkown", which is treatd as full)
3056  * we will increment the "components off" count of the fb device.
3057  * Subsequent lowering of the same component doesn't affect the count.  If
3058  * raising a component back to full power, we will decrement the count.
3059  *
3060  * Return: the increment value for pm_cfb_comps_off (-1, 0, or 1)
3061  */
3062 static int
3063 calc_cfb_comps_incr(dev_info_t *dip, int cmpt, int old, int new)
3064 {
3065 	struct pm_component *cp = PM_CP(dip, cmpt);
3066 	int on = (old == PM_LEVEL_UNKNOWN || old == cp->pmc_norm_pwr);
3067 	int want_normal = (new == cp->pmc_norm_pwr);
3068 	int incr = 0;
3069 
3070 	if (on && !want_normal)
3071 		incr = 1;
3072 	else if (!on && want_normal)
3073 		incr = -1;
3074 	return (incr);
3075 }
3076 
3077 /*
3078  * Adjust the count of console frame buffer components < full power.
3079  */
3080 static void
3081 update_comps_off(int incr, dev_info_t *dip)
3082 {
3083 		mutex_enter(&pm_cfb_lock);
3084 		pm_cfb_comps_off += incr;
3085 		ASSERT(pm_cfb_comps_off <= PM_NUMCMPTS(dip));
3086 		mutex_exit(&pm_cfb_lock);
3087 }
3088 
3089 /*
3090  * Update the power state in the framework (via the ppm).  The 'notify'
3091  * argument tells whether to notify watchers.  Power lock is already held.
3092  */
3093 static int
3094 pm_phc_impl(dev_info_t *dip, int comp, int level, int notify)
3095 {
3096 	PMD_FUNC(pmf, "phc_impl")
3097 	power_req_t power_req;
3098 	int i, dodeps = 0;
3099 	dev_info_t *pdip = ddi_get_parent(dip);
3100 	int result;
3101 	int old_level;
3102 	struct pm_component *cp;
3103 	int incr = 0;
3104 	dev_info_t *ppm = (dev_info_t *)DEVI(dip)->devi_pm_ppm;
3105 	int work_type = 0;
3106 	char *pathbuf;
3107 
3108 	/* Must use "official" power level for this test. */
3109 	cp = PM_CP(dip, comp);
3110 	old_level = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
3111 	    cp->pmc_phc_pwr : cp->pmc_cur_pwr);
3112 	if (old_level != PM_LEVEL_UNKNOWN)
3113 		old_level = cp->pmc_comp.pmc_lvals[old_level];
3114 
3115 	if (level == old_level) {
3116 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), comp=%d is already at "
3117 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3118 		return (DDI_SUCCESS);
3119 	}
3120 
3121 	/*
3122 	 * Tell ppm about this.
3123 	 */
3124 	power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3125 	power_req.req.ppm_notify_level_req.who = dip;
3126 	power_req.req.ppm_notify_level_req.cmpt = comp;
3127 	power_req.req.ppm_notify_level_req.new_level = level;
3128 	power_req.req.ppm_notify_level_req.old_level = old_level;
3129 	if (pm_ctlops(ppm, dip, DDI_CTLOPS_POWER, &power_req,
3130 	    &result) == DDI_FAILURE) {
3131 		PMD(PMD_FAIL, ("%s: pm_ctlops %s@%s(%s#%d) to %d failed\n",
3132 		    pmf, PM_DEVICE(dip), level))
3133 		return (DDI_FAILURE);
3134 	}
3135 
3136 	if (PM_IS_CFB(dip)) {
3137 		incr = calc_cfb_comps_incr(dip, comp, old_level, level);
3138 
3139 		if (incr) {
3140 			update_comps_off(incr, dip);
3141 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) comp=%d %d->%d "
3142 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
3143 			    comp, old_level, level, pm_cfb_comps_off))
3144 		}
3145 	}
3146 	e_pm_set_cur_pwr(dip, PM_CP(dip, comp), level);
3147 	result = DDI_SUCCESS;
3148 
3149 	if (notify) {
3150 		if (pdip && PM_WANTS_NOTIFICATION(pdip))
3151 			pm_notify_parent(dip, pdip, comp, old_level, level);
3152 		(void) pm_check_and_resume(dip, comp, old_level, level);
3153 	}
3154 
3155 	/*
3156 	 * Decrement the dependency kidsup count if we turn a device
3157 	 * off.
3158 	 */
3159 	if (POWERING_OFF(old_level, level)) {
3160 		dodeps = 1;
3161 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3162 			cp = PM_CP(dip, i);
3163 			if (cur_power(cp)) {
3164 				dodeps = 0;
3165 				break;
3166 			}
3167 		}
3168 		if (dodeps)
3169 			work_type = PM_DEP_WK_POWER_OFF;
3170 	}
3171 
3172 	/*
3173 	 * Increment if we turn it on. Check to see
3174 	 * if other comps are already on, if so,
3175 	 * dont increment.
3176 	 */
3177 	if (POWERING_ON(old_level, level)) {
3178 		dodeps = 1;
3179 		for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3180 			cp = PM_CP(dip, i);
3181 			if (comp == i)
3182 				continue;
3183 			/* -1 also treated as 0 in this case */
3184 			if (cur_power(cp) > 0) {
3185 				dodeps = 0;
3186 				break;
3187 			}
3188 		}
3189 		if (dodeps)
3190 			work_type = PM_DEP_WK_POWER_ON;
3191 	}
3192 
3193 	if (dodeps) {
3194 		pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3195 		(void) ddi_pathname(dip, pathbuf);
3196 		pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
3197 		    PM_DEP_NOWAIT, NULL, 0);
3198 		kmem_free(pathbuf, MAXPATHLEN);
3199 	}
3200 
3201 	if (notify && (level != old_level) && pm_watchers()) {
3202 		mutex_enter(&pm_rsvp_lock);
3203 		pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, level, old_level,
3204 		    PM_CANBLOCK_BLOCK);
3205 		mutex_exit(&pm_rsvp_lock);
3206 	}
3207 
3208 	PMD(PMD_RESCAN, ("%s: %s@%s(%s#%d): pm_rescan\n", pmf, PM_DEVICE(dip)))
3209 	pm_rescan(dip);
3210 	return (DDI_SUCCESS);
3211 }
3212 
3213 /*
3214  * This function is called at startup time to notify pm of the existence
3215  * of any platform power managers for this platform.  As a result of
3216  * this registration, each function provided will be called each time
3217  * a device node is attached, until one returns true, and it must claim the
3218  * device node (by returning non-zero) if it wants to be involved in the
3219  * node's power management.  If it does claim the node, then it will
3220  * subsequently be notified of attach and detach events.
3221  *
3222  */
3223 
3224 int
3225 pm_register_ppm(int (*func)(dev_info_t *), dev_info_t *dip)
3226 {
3227 	PMD_FUNC(pmf, "register_ppm")
3228 	struct ppm_callbacks *ppmcp;
3229 	pm_component_t *cp;
3230 	int i, pwr, result, circ;
3231 	power_req_t power_req;
3232 	struct ppm_notify_level_req *p = &power_req.req.ppm_notify_level_req;
3233 	void pm_ppm_claim(dev_info_t *);
3234 
3235 	mutex_enter(&ppm_lock);
3236 	ppmcp = ppm_callbacks;
3237 	for (i = 0; i < MAX_PPM_HANDLERS; i++, ppmcp++) {
3238 		if (ppmcp->ppmc_func == NULL) {
3239 			ppmcp->ppmc_func = func;
3240 			ppmcp->ppmc_dip = dip;
3241 			break;
3242 		}
3243 	}
3244 	mutex_exit(&ppm_lock);
3245 
3246 	if (i >= MAX_PPM_HANDLERS)
3247 		return (DDI_FAILURE);
3248 	while ((dip = ddi_get_parent(dip)) != NULL) {
3249 		if (dip != ddi_root_node() && PM_GET_PM_INFO(dip) == NULL)
3250 			continue;
3251 		pm_ppm_claim(dip);
3252 		/* don't bother with the not power-manageable nodes */
3253 		if (pm_ppm_claimed(dip) && PM_GET_PM_INFO(dip)) {
3254 			/*
3255 			 * Tell ppm about this.
3256 			 */
3257 			power_req.request_type = PMR_PPM_POWER_CHANGE_NOTIFY;
3258 			p->old_level = PM_LEVEL_UNKNOWN;
3259 			p->who = dip;
3260 			PM_LOCK_POWER(dip, &circ);
3261 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3262 				cp = PM_CP(dip, i);
3263 				pwr = cp->pmc_cur_pwr;
3264 				if (pwr != PM_LEVEL_UNKNOWN) {
3265 					p->cmpt = i;
3266 					p->new_level = cur_power(cp);
3267 					p->old_level = PM_LEVEL_UNKNOWN;
3268 					if (pm_ctlops(PPM(dip), dip,
3269 					    DDI_CTLOPS_POWER, &power_req,
3270 					    &result) == DDI_FAILURE) {
3271 						PMD(PMD_FAIL, ("%s: pc "
3272 						    "%s@%s(%s#%d) to %d "
3273 						    "fails\n", pmf,
3274 						    PM_DEVICE(dip), pwr))
3275 					}
3276 				}
3277 			}
3278 			PM_UNLOCK_POWER(dip, circ);
3279 		}
3280 	}
3281 	return (DDI_SUCCESS);
3282 }
3283 
3284 /*
3285  * Call the ppm's that have registered and adjust the devinfo struct as
3286  * appropriate.  First one to claim it gets it.  The sets of devices claimed
3287  * by each ppm are assumed to be disjoint.
3288  */
3289 void
3290 pm_ppm_claim(dev_info_t *dip)
3291 {
3292 	struct ppm_callbacks *ppmcp;
3293 
3294 	if (PPM(dip)) {
3295 		return;
3296 	}
3297 	mutex_enter(&ppm_lock);
3298 	for (ppmcp = ppm_callbacks; ppmcp->ppmc_func; ppmcp++) {
3299 		if ((*ppmcp->ppmc_func)(dip)) {
3300 			DEVI(dip)->devi_pm_ppm =
3301 			    (struct dev_info *)ppmcp->ppmc_dip;
3302 			mutex_exit(&ppm_lock);
3303 			return;
3304 		}
3305 	}
3306 	mutex_exit(&ppm_lock);
3307 }
3308 
3309 /*
3310  * Node is being detached so stop autopm until we see if it succeeds, in which
3311  * case pm_stop will be called.  For backwards compatible devices we bring the
3312  * device up to full power on the assumption the detach will succeed.
3313  */
3314 void
3315 pm_detaching(dev_info_t *dip)
3316 {
3317 	PMD_FUNC(pmf, "detaching")
3318 	pm_info_t *info = PM_GET_PM_INFO(dip);
3319 	int iscons;
3320 
3321 	PMD(PMD_REMDEV, ("%s: %s@%s(%s#%d), %d comps\n", pmf, PM_DEVICE(dip),
3322 	    PM_NUMCMPTS(dip)))
3323 	if (info == NULL)
3324 		return;
3325 	ASSERT(DEVI_IS_DETACHING(dip));
3326 	PM_LOCK_DIP(dip);
3327 	info->pmi_dev_pm_state |= PM_DETACHING;
3328 	PM_UNLOCK_DIP(dip);
3329 	if (!PM_ISBC(dip))
3330 		pm_scan_stop(dip);
3331 
3332 	/*
3333 	 * console and old-style devices get brought up when detaching.
3334 	 */
3335 	iscons = PM_IS_CFB(dip);
3336 	if (iscons || PM_ISBC(dip)) {
3337 		(void) pm_all_to_normal(dip, PM_CANBLOCK_BYPASS);
3338 		if (iscons) {
3339 			mutex_enter(&pm_cfb_lock);
3340 			while (cfb_inuse) {
3341 				mutex_exit(&pm_cfb_lock);
3342 				PMD(PMD_CFB, ("%s: delay; cfb_inuse\n", pmf))
3343 				delay(1);
3344 				mutex_enter(&pm_cfb_lock);
3345 			}
3346 			ASSERT(cfb_dip_detaching == NULL);
3347 			ASSERT(cfb_dip);
3348 			cfb_dip_detaching = cfb_dip;	/* case detach fails */
3349 			cfb_dip = NULL;
3350 			mutex_exit(&pm_cfb_lock);
3351 		}
3352 	}
3353 }
3354 
3355 /*
3356  * Node failed to detach.  If it used to be autopm'd, make it so again.
3357  */
3358 void
3359 pm_detach_failed(dev_info_t *dip)
3360 {
3361 	PMD_FUNC(pmf, "detach_failed")
3362 	pm_info_t *info = PM_GET_PM_INFO(dip);
3363 	int pm_all_at_normal(dev_info_t *);
3364 
3365 	if (info == NULL)
3366 		return;
3367 	ASSERT(DEVI_IS_DETACHING(dip));
3368 	if (info->pmi_dev_pm_state & PM_DETACHING) {
3369 		info->pmi_dev_pm_state &= ~PM_DETACHING;
3370 		if (info->pmi_dev_pm_state & PM_ALLNORM_DEFERRED) {
3371 			/* Make sure the operation is still needed */
3372 			if (!pm_all_at_normal(dip)) {
3373 				if (pm_all_to_normal(dip,
3374 				    PM_CANBLOCK_FAIL) != DDI_SUCCESS) {
3375 					PMD(PMD_ERROR, ("%s: could not bring "
3376 					    "%s@%s(%s#%d) to normal\n", pmf,
3377 					    PM_DEVICE(dip)))
3378 				}
3379 			}
3380 			info->pmi_dev_pm_state &= ~PM_ALLNORM_DEFERRED;
3381 		}
3382 	}
3383 	if (!PM_ISBC(dip)) {
3384 		mutex_enter(&pm_scan_lock);
3385 		if (PM_SCANABLE(dip))
3386 			pm_scan_init(dip);
3387 		mutex_exit(&pm_scan_lock);
3388 		pm_rescan(dip);
3389 	}
3390 }
3391 
3392 /* generic Backwards Compatible component */
3393 static char *bc_names[] = {"off", "on"};
3394 
3395 static pm_comp_t bc_comp = {"unknown", 2, NULL, NULL, &bc_names[0]};
3396 
3397 static void
3398 e_pm_default_levels(dev_info_t *dip, pm_component_t *cp, int norm)
3399 {
3400 	pm_comp_t *pmc;
3401 	pmc = &cp->pmc_comp;
3402 	pmc->pmc_numlevels = 2;
3403 	pmc->pmc_lvals[0] = 0;
3404 	pmc->pmc_lvals[1] = norm;
3405 	e_pm_set_cur_pwr(dip, cp, norm);
3406 }
3407 
3408 static void
3409 e_pm_default_components(dev_info_t *dip, int cmpts)
3410 {
3411 	int i;
3412 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3413 
3414 	p = DEVI(dip)->devi_pm_components;
3415 	for (i = 0; i < cmpts; i++, p++) {
3416 		p->pmc_comp = bc_comp;	/* struct assignment */
3417 		p->pmc_comp.pmc_lvals = kmem_zalloc(2 * sizeof (int),
3418 		    KM_SLEEP);
3419 		p->pmc_comp.pmc_thresh = kmem_alloc(2 * sizeof (int),
3420 		    KM_SLEEP);
3421 		p->pmc_comp.pmc_numlevels = 2;
3422 		p->pmc_comp.pmc_thresh[0] = INT_MAX;
3423 		p->pmc_comp.pmc_thresh[1] = INT_MAX;
3424 	}
3425 }
3426 
3427 /*
3428  * Called from functions that require components to exist already to allow
3429  * for their creation by parsing the pm-components property.
3430  * Device will not be power managed as a result of this call
3431  * No locking needed because we're single threaded by the ndi_devi_enter
3432  * done while attaching, and the device isn't visible until after it has
3433  * attached
3434  */
3435 int
3436 pm_premanage(dev_info_t *dip, int style)
3437 {
3438 	PMD_FUNC(pmf, "premanage")
3439 	pm_comp_t	*pcp, *compp;
3440 	int		cmpts, i, norm, error;
3441 	pm_component_t *p = DEVI(dip)->devi_pm_components;
3442 	pm_comp_t *pm_autoconfig(dev_info_t *, int *);
3443 
3444 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3445 	/*
3446 	 * If this dip has already been processed, don't mess with it
3447 	 */
3448 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE)
3449 		return (DDI_SUCCESS);
3450 	if (DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_FAILED) {
3451 		return (DDI_FAILURE);
3452 	}
3453 	/*
3454 	 * Look up pm-components property and create components accordingly
3455 	 * If that fails, fall back to backwards compatibility
3456 	 */
3457 	if ((compp = pm_autoconfig(dip, &error)) == NULL) {
3458 		/*
3459 		 * If error is set, the property existed but was not well formed
3460 		 */
3461 		if (error || (style == PM_STYLE_NEW)) {
3462 			DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_FAILED;
3463 			return (DDI_FAILURE);
3464 		}
3465 		/*
3466 		 * If they don't have the pm-components property, then we
3467 		 * want the old "no pm until PM_SET_DEVICE_THRESHOLDS ioctl"
3468 		 * behavior driver must have called pm_create_components, and
3469 		 * we need to flesh out dummy components
3470 		 */
3471 		if ((cmpts = PM_NUMCMPTS(dip)) == 0) {
3472 			/*
3473 			 * Not really failure, but we don't want the
3474 			 * caller to treat it as success
3475 			 */
3476 			return (DDI_FAILURE);
3477 		}
3478 		DEVI(dip)->devi_pm_flags |= PMC_BC;
3479 		e_pm_default_components(dip, cmpts);
3480 		for (i = 0; i < cmpts; i++) {
3481 			/*
3482 			 * if normal power not set yet, we don't really know
3483 			 * what *ANY* of the power values are.  If normal
3484 			 * power is set, then we assume for this backwards
3485 			 * compatible case that the values are 0, normal power.
3486 			 */
3487 			norm = pm_get_normal_power(dip, i);
3488 			if (norm == (uint_t)-1) {
3489 				PMD(PMD_ERROR, ("%s: %s@%s(%s#%d)[%d]\n", pmf,
3490 				    PM_DEVICE(dip), i))
3491 				return (DDI_FAILURE);
3492 			}
3493 			/*
3494 			 * Components of BC devices start at their normal power,
3495 			 * so count them to be not at their lowest power.
3496 			 */
3497 			PM_INCR_NOTLOWEST(dip);
3498 			e_pm_default_levels(dip, PM_CP(dip, i), norm);
3499 		}
3500 	} else {
3501 		/*
3502 		 * e_pm_create_components was called from pm_autoconfig(), it
3503 		 * creates components with no descriptions (or known levels)
3504 		 */
3505 		cmpts = PM_NUMCMPTS(dip);
3506 		ASSERT(cmpts != 0);
3507 		pcp = compp;
3508 		p = DEVI(dip)->devi_pm_components;
3509 		for (i = 0; i < cmpts; i++, p++) {
3510 			p->pmc_comp = *pcp++;   /* struct assignment */
3511 			ASSERT(PM_CP(dip, i)->pmc_cur_pwr == 0);
3512 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
3513 		}
3514 		if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3515 			pm_set_device_threshold(dip, pm_cpu_idle_threshold,
3516 			    PMC_CPU_THRESH);
3517 		else
3518 			pm_set_device_threshold(dip, pm_system_idle_threshold,
3519 			    PMC_DEF_THRESH);
3520 		kmem_free(compp, cmpts * sizeof (pm_comp_t));
3521 	}
3522 	return (DDI_SUCCESS);
3523 }
3524 
3525 /*
3526  * Called from during or after the device's attach to let us know it is ready
3527  * to play autopm.   Look up the pm model and manage the device accordingly.
3528  * Returns system call errno value.
3529  * If DDI_ATTACH and DDI_DETACH were in same namespace, this would be
3530  * a little cleaner
3531  *
3532  * Called with dip lock held, return with dip lock unheld.
3533  */
3534 
3535 int
3536 e_pm_manage(dev_info_t *dip, int style)
3537 {
3538 	PMD_FUNC(pmf, "e_manage")
3539 	pm_info_t	*info;
3540 	dev_info_t	*pdip = ddi_get_parent(dip);
3541 	int	pm_thresh_specd(dev_info_t *);
3542 	int	count;
3543 	char	*pathbuf;
3544 
3545 	if (pm_premanage(dip, style) != DDI_SUCCESS) {
3546 		return (DDI_FAILURE);
3547 	}
3548 	PMD(PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3549 	ASSERT(PM_GET_PM_INFO(dip) == NULL);
3550 	info = kmem_zalloc(sizeof (pm_info_t), KM_SLEEP);
3551 
3552 	/*
3553 	 * Now set up parent's kidsupcnt.  BC nodes are assumed to start
3554 	 * out at their normal power, so they are "up", others start out
3555 	 * unknown, which is effectively "up".  Parent which want notification
3556 	 * get kidsupcnt of 0 always.
3557 	 */
3558 	count = (PM_ISBC(dip)) ? 1 : PM_NUMCMPTS(dip);
3559 	if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
3560 		e_pm_hold_rele_power(pdip, count);
3561 
3562 	pm_set_pm_info(dip, info);
3563 	/*
3564 	 * Apply any recorded thresholds
3565 	 */
3566 	(void) pm_thresh_specd(dip);
3567 
3568 	/*
3569 	 * Do dependency processing.
3570 	 */
3571 	pathbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3572 	(void) ddi_pathname(dip, pathbuf);
3573 	pm_dispatch_to_dep_thread(PM_DEP_WK_ATTACH, pathbuf, pathbuf,
3574 	    PM_DEP_NOWAIT, NULL, 0);
3575 	kmem_free(pathbuf, MAXPATHLEN);
3576 
3577 	if (!PM_ISBC(dip)) {
3578 		mutex_enter(&pm_scan_lock);
3579 		if (PM_SCANABLE(dip)) {
3580 			pm_scan_init(dip);
3581 			mutex_exit(&pm_scan_lock);
3582 			pm_rescan(dip);
3583 		} else {
3584 			mutex_exit(&pm_scan_lock);
3585 		}
3586 	}
3587 	return (0);
3588 }
3589 
3590 /*
3591  * This is the obsolete exported interface for a driver to find out its
3592  * "normal" (max) power.
3593  * We only get components destroyed while no power management is
3594  * going on (and the device is detached), so we don't need a mutex here
3595  */
3596 int
3597 pm_get_normal_power(dev_info_t *dip, int comp)
3598 {
3599 
3600 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3601 		return (PM_CP(dip, comp)->pmc_norm_pwr);
3602 	}
3603 	return (DDI_FAILURE);
3604 }
3605 
3606 /*
3607  * Fetches the current power level.  Return DDI_SUCCESS or DDI_FAILURE.
3608  */
3609 int
3610 pm_get_current_power(dev_info_t *dip, int comp, int *levelp)
3611 {
3612 	if (comp >= 0 && comp < PM_NUMCMPTS(dip)) {
3613 		*levelp = PM_CURPOWER(dip, comp);
3614 		return (DDI_SUCCESS);
3615 	}
3616 	return (DDI_FAILURE);
3617 }
3618 
3619 /*
3620  * Returns current threshold of indicated component
3621  */
3622 static int
3623 cur_threshold(dev_info_t *dip, int comp)
3624 {
3625 	pm_component_t *cp = PM_CP(dip, comp);
3626 	int pwr;
3627 
3628 	if (PM_ISBC(dip)) {
3629 		/*
3630 		 * backwards compatible nodes only have one threshold
3631 		 */
3632 		return (cp->pmc_comp.pmc_thresh[1]);
3633 	}
3634 	pwr = cp->pmc_cur_pwr;
3635 	if (pwr == PM_LEVEL_UNKNOWN) {
3636 		int thresh;
3637 		if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH)
3638 			thresh = pm_default_nexus_threshold;
3639 		else if (DEVI(dip)->devi_pm_flags & PMC_CPU_THRESH)
3640 			thresh = pm_cpu_idle_threshold;
3641 		else
3642 			thresh = pm_system_idle_threshold;
3643 		return (thresh);
3644 	}
3645 	ASSERT(cp->pmc_comp.pmc_thresh);
3646 	return (cp->pmc_comp.pmc_thresh[pwr]);
3647 }
3648 
3649 /*
3650  * Compute next lower component power level given power index.
3651  */
3652 static int
3653 pm_next_lower_power(pm_component_t *cp, int pwrndx)
3654 {
3655 	int nxt_pwr;
3656 
3657 	if (pwrndx == PM_LEVEL_UNKNOWN) {
3658 		nxt_pwr = cp->pmc_comp.pmc_lvals[0];
3659 	} else {
3660 		pwrndx--;
3661 		ASSERT(pwrndx >= 0);
3662 		nxt_pwr = cp->pmc_comp.pmc_lvals[pwrndx];
3663 	}
3664 	return (nxt_pwr);
3665 }
3666 
3667 /*
3668  * Update the maxpower (normal) power of a component. Note that the
3669  * component's power level is only changed if it's current power level
3670  * is higher than the new max power.
3671  */
3672 int
3673 pm_update_maxpower(dev_info_t *dip, int comp, int level)
3674 {
3675 	PMD_FUNC(pmf, "update_maxpower")
3676 	int old;
3677 	int result;
3678 
3679 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, comp, NULL) ||
3680 	    !e_pm_valid_power(dip, comp, level)) {
3681 		PMD(PMD_FAIL, ("%s: validation checks failed for %s@%s(%s#%d) "
3682 		    "comp=%d level=%d\n", pmf, PM_DEVICE(dip), comp, level))
3683 		return (DDI_FAILURE);
3684 	}
3685 	old = e_pm_get_max_power(dip, comp);
3686 	e_pm_set_max_power(dip, comp, level);
3687 
3688 	if (pm_set_power(dip, comp, level, PM_LEVEL_DOWNONLY,
3689 	    PM_CANBLOCK_BLOCK, 0, &result) != DDI_SUCCESS) {
3690 		e_pm_set_max_power(dip, comp, old);
3691 		PMD(PMD_FAIL, ("%s: %s@%s(%s#%d) pm_set_power failed\n", pmf,
3692 		    PM_DEVICE(dip)))
3693 		return (DDI_FAILURE);
3694 	}
3695 	return (DDI_SUCCESS);
3696 }
3697 
3698 /*
3699  * Bring all components of device to normal power
3700  */
3701 int
3702 pm_all_to_normal(dev_info_t *dip, pm_canblock_t canblock)
3703 {
3704 	PMD_FUNC(pmf, "all_to_normal")
3705 	int		*normal;
3706 	int		i, ncomps, result;
3707 	size_t		size;
3708 	int		changefailed = 0;
3709 
3710 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3711 	ASSERT(PM_GET_PM_INFO(dip));
3712 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3713 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs for "
3714 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3715 		return (DDI_FAILURE);
3716 	}
3717 	ncomps = PM_NUMCMPTS(dip);
3718 	for (i = 0; i < ncomps; i++) {
3719 		if (pm_set_power(dip, i, normal[i],
3720 		    PM_LEVEL_UPONLY, canblock, 0, &result) != DDI_SUCCESS) {
3721 			changefailed++;
3722 			PMD(PMD_ALLNORM | PMD_FAIL, ("%s: failed to set "
3723 			    "%s@%s(%s#%d)[%d] to %d, errno %d\n", pmf,
3724 			    PM_DEVICE(dip), i, normal[i], result))
3725 		}
3726 	}
3727 	kmem_free(normal, size);
3728 	if (changefailed) {
3729 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
3730 		    "to full power\n", pmf, changefailed, PM_DEVICE(dip)))
3731 		return (DDI_FAILURE);
3732 	}
3733 	return (DDI_SUCCESS);
3734 }
3735 
3736 /*
3737  * Returns true if all components of device are at normal power
3738  */
3739 int
3740 pm_all_at_normal(dev_info_t *dip)
3741 {
3742 	PMD_FUNC(pmf, "all_at_normal")
3743 	int		*normal;
3744 	int		i;
3745 	size_t		size;
3746 
3747 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
3748 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
3749 		PMD(PMD_ALLNORM, ("%s: can't get normal power\n", pmf))
3750 		return (DDI_FAILURE);
3751 	}
3752 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
3753 		int current = PM_CURPOWER(dip, i);
3754 		if (normal[i] > current) {
3755 			PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d) comp=%d, "
3756 			    "norm=%d, cur=%d\n", pmf, PM_DEVICE(dip), i,
3757 			    normal[i], current))
3758 			break;
3759 		}
3760 	}
3761 	kmem_free(normal, size);
3762 	if (i != PM_NUMCMPTS(dip)) {
3763 		return (0);
3764 	}
3765 	return (1);
3766 }
3767 
3768 static void bring_pmdep_up(dev_info_t *, int);
3769 
3770 static void
3771 bring_wekeeps_up(char *keeper)
3772 {
3773 	PMD_FUNC(pmf, "bring_wekeeps_up")
3774 	int i;
3775 	pm_pdr_t *dp;
3776 	pm_info_t *wku_info;
3777 	char *kept_path;
3778 	dev_info_t *kept;
3779 
3780 	if (panicstr) {
3781 		return;
3782 	}
3783 	/*
3784 	 * We process the request even if the keeper detaches because
3785 	 * detach processing expects this to increment kidsupcnt of kept.
3786 	 */
3787 	PMD(PMD_BRING, ("%s: keeper= %s\n", pmf, keeper))
3788 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
3789 		if (strcmp(dp->pdr_keeper, keeper) != 0)
3790 			continue;
3791 		for (i = 0; i < dp->pdr_kept_count; i++) {
3792 			kept_path = dp->pdr_kept_paths[i];
3793 			if (kept_path == NULL)
3794 				continue;
3795 			ASSERT(kept_path[0] != '\0');
3796 			if ((kept = pm_name_to_dip(kept_path, 1)) == NULL)
3797 				continue;
3798 			wku_info = PM_GET_PM_INFO(kept);
3799 			if (wku_info == NULL) {
3800 				if (kept)
3801 					ddi_release_devi(kept);
3802 				continue;
3803 			}
3804 			/*
3805 			 * Don't mess with it if it is being detached, it isn't
3806 			 * safe to call its power entry point
3807 			 */
3808 			if (wku_info->pmi_dev_pm_state & PM_DETACHING) {
3809 				if (kept)
3810 					ddi_release_devi(kept);
3811 				continue;
3812 			}
3813 			bring_pmdep_up(kept, 1);
3814 			ddi_release_devi(kept);
3815 		}
3816 	}
3817 }
3818 
3819 /*
3820  * Bring up the 'kept' device passed as argument
3821  */
3822 static void
3823 bring_pmdep_up(dev_info_t *kept_dip, int hold)
3824 {
3825 	PMD_FUNC(pmf, "bring_pmdep_up")
3826 	int is_all_at_normal = 0;
3827 
3828 	/*
3829 	 * If the kept device has been unmanaged, do nothing.
3830 	 */
3831 	if (!PM_GET_PM_INFO(kept_dip))
3832 		return;
3833 
3834 	/* Just ignore DIRECT PM device till they are released. */
3835 	if (!pm_processes_stopped && PM_ISDIRECT(kept_dip) &&
3836 	    !(is_all_at_normal = pm_all_at_normal(kept_dip))) {
3837 		PMD(PMD_BRING, ("%s: can't bring up PM_DIRECT %s@%s(%s#%d) "
3838 		    "controlling process did something else\n", pmf,
3839 		    PM_DEVICE(kept_dip)))
3840 		DEVI(kept_dip)->devi_pm_flags |= PMC_SKIP_BRINGUP;
3841 		return;
3842 	}
3843 	/* if we got here the keeper had a transition from OFF->ON */
3844 	if (hold)
3845 		pm_hold_power(kept_dip);
3846 
3847 	if (!is_all_at_normal)
3848 		(void) pm_all_to_normal(kept_dip, PM_CANBLOCK_FAIL);
3849 }
3850 
3851 /*
3852  * A bunch of stuff that belongs only to the next routine (or two)
3853  */
3854 
3855 static const char namestr[] = "NAME=";
3856 static const int nameln = sizeof (namestr) - 1;
3857 static const char pmcompstr[] = "pm-components";
3858 
3859 struct pm_comp_pkg {
3860 	pm_comp_t		*comp;
3861 	struct pm_comp_pkg	*next;
3862 };
3863 
3864 #define	isdigit(ch)	((ch) >= '0' && (ch) <= '9')
3865 
3866 #define	isxdigit(ch)	(isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
3867 			((ch) >= 'A' && (ch) <= 'F'))
3868 
3869 /*
3870  * Rather than duplicate this code ...
3871  * (this code excerpted from the function that follows it)
3872  */
3873 #define	FINISH_COMP { \
3874 	ASSERT(compp); \
3875 	compp->pmc_lnames_sz = size; \
3876 	tp = compp->pmc_lname_buf = kmem_alloc(size, KM_SLEEP); \
3877 	compp->pmc_numlevels = level; \
3878 	compp->pmc_lnames = kmem_alloc(level * sizeof (char *), KM_SLEEP); \
3879 	compp->pmc_lvals = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3880 	compp->pmc_thresh = kmem_alloc(level * sizeof (int), KM_SLEEP); \
3881 	/* copy string out of prop array into buffer */ \
3882 	for (j = 0; j < level; j++) { \
3883 		compp->pmc_thresh[j] = INT_MAX;		/* only [0] sticks */ \
3884 		compp->pmc_lvals[j] = lvals[j]; \
3885 		(void) strcpy(tp, lnames[j]); \
3886 		compp->pmc_lnames[j] = tp; \
3887 		tp += lszs[j]; \
3888 	} \
3889 	ASSERT(tp > compp->pmc_lname_buf && tp <= \
3890 	    compp->pmc_lname_buf + compp->pmc_lnames_sz); \
3891 	}
3892 
3893 /*
3894  * Create (empty) component data structures.
3895  */
3896 static void
3897 e_pm_create_components(dev_info_t *dip, int num_components)
3898 {
3899 	struct pm_component *compp, *ocompp;
3900 	int i, size = 0;
3901 
3902 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
3903 	ASSERT(!DEVI(dip)->devi_pm_components);
3904 	ASSERT(!(DEVI(dip)->devi_pm_flags & PMC_COMPONENTS_DONE));
3905 	size = sizeof (struct pm_component) * num_components;
3906 
3907 	compp = kmem_zalloc(size, KM_SLEEP);
3908 	ocompp = compp;
3909 	DEVI(dip)->devi_pm_comp_size = size;
3910 	DEVI(dip)->devi_pm_num_components = num_components;
3911 	PM_LOCK_BUSY(dip);
3912 	for (i = 0; i < num_components;  i++) {
3913 		compp->pmc_timestamp = gethrestime_sec();
3914 		compp->pmc_norm_pwr = (uint_t)-1;
3915 		compp++;
3916 	}
3917 	PM_UNLOCK_BUSY(dip);
3918 	DEVI(dip)->devi_pm_components = ocompp;
3919 	DEVI(dip)->devi_pm_flags |= PMC_COMPONENTS_DONE;
3920 }
3921 
3922 /*
3923  * Parse hex or decimal value from char string
3924  */
3925 static char *
3926 pm_parsenum(char *cp, int *valp)
3927 {
3928 	int ch, offset;
3929 	char numbuf[256];
3930 	char *np = numbuf;
3931 	int value = 0;
3932 
3933 	ch = *cp++;
3934 	if (isdigit(ch)) {
3935 		if (ch == '0') {
3936 			if ((ch = *cp++) == 'x' || ch == 'X') {
3937 				ch = *cp++;
3938 				while (isxdigit(ch)) {
3939 					*np++ = (char)ch;
3940 					ch = *cp++;
3941 				}
3942 				*np = 0;
3943 				cp--;
3944 				goto hexval;
3945 			} else {
3946 				goto digit;
3947 			}
3948 		} else {
3949 digit:
3950 			while (isdigit(ch)) {
3951 				*np++ = (char)ch;
3952 				ch = *cp++;
3953 			}
3954 			*np = 0;
3955 			cp--;
3956 			goto decval;
3957 		}
3958 	} else
3959 		return (NULL);
3960 
3961 hexval:
3962 	for (np = numbuf; *np; np++) {
3963 		if (*np >= 'a' && *np <= 'f')
3964 			offset = 'a' - 10;
3965 		else if (*np >= 'A' && *np <= 'F')
3966 			offset = 'A' - 10;
3967 		else if (*np >= '0' && *np <= '9')
3968 			offset = '0';
3969 		value *= 16;
3970 		value += *np - offset;
3971 	}
3972 	*valp = value;
3973 	return (cp);
3974 
3975 decval:
3976 	offset = '0';
3977 	for (np = numbuf; *np; np++) {
3978 		value *= 10;
3979 		value += *np - offset;
3980 	}
3981 	*valp = value;
3982 	return (cp);
3983 }
3984 
3985 /*
3986  * Set max (previously documented as "normal") power.
3987  */
3988 static void
3989 e_pm_set_max_power(dev_info_t *dip, int component_number, int level)
3990 {
3991 	PM_CP(dip, component_number)->pmc_norm_pwr = level;
3992 }
3993 
3994 /*
3995  * Get max (previously documented as "normal") power.
3996  */
3997 static int
3998 e_pm_get_max_power(dev_info_t *dip, int component_number)
3999 {
4000 	return (PM_CP(dip, component_number)->pmc_norm_pwr);
4001 }
4002 
4003 /*
4004  * Internal routine for destroying components
4005  * It is called even when there might not be any, so it must be forgiving.
4006  */
4007 static void
4008 e_pm_destroy_components(dev_info_t *dip)
4009 {
4010 	int i;
4011 	struct pm_component *cp;
4012 
4013 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4014 	if (PM_NUMCMPTS(dip) == 0)
4015 		return;
4016 	cp = DEVI(dip)->devi_pm_components;
4017 	ASSERT(cp);
4018 	for (i = 0; i < PM_NUMCMPTS(dip); i++, cp++) {
4019 		int nlevels = cp->pmc_comp.pmc_numlevels;
4020 		kmem_free(cp->pmc_comp.pmc_lvals, nlevels * sizeof (int));
4021 		kmem_free(cp->pmc_comp.pmc_thresh, nlevels * sizeof (int));
4022 		/*
4023 		 * For BC nodes, the rest is static in bc_comp, so skip it
4024 		 */
4025 		if (PM_ISBC(dip))
4026 			continue;
4027 		kmem_free(cp->pmc_comp.pmc_name, cp->pmc_comp.pmc_name_sz);
4028 		kmem_free(cp->pmc_comp.pmc_lnames, nlevels * sizeof (char *));
4029 		kmem_free(cp->pmc_comp.pmc_lname_buf,
4030 		    cp->pmc_comp.pmc_lnames_sz);
4031 	}
4032 	kmem_free(DEVI(dip)->devi_pm_components, DEVI(dip)->devi_pm_comp_size);
4033 	DEVI(dip)->devi_pm_components = NULL;
4034 	DEVI(dip)->devi_pm_num_components = 0;
4035 	DEVI(dip)->devi_pm_flags &=
4036 	    ~(PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4037 }
4038 
4039 /*
4040  * Read the pm-components property (if there is one) and use it to set up
4041  * components.  Returns a pointer to an array of component structures if
4042  * pm-components found and successfully parsed, else returns NULL.
4043  * Sets error return *errp to true to indicate a failure (as opposed to no
4044  * property being present).
4045  */
4046 pm_comp_t *
4047 pm_autoconfig(dev_info_t *dip, int *errp)
4048 {
4049 	PMD_FUNC(pmf, "autoconfig")
4050 	uint_t nelems;
4051 	char **pp;
4052 	pm_comp_t *compp = NULL;
4053 	int i, j, level, components = 0;
4054 	size_t size = 0;
4055 	struct pm_comp_pkg *p, *ptail;
4056 	struct pm_comp_pkg *phead = NULL;
4057 	int *lvals = NULL;
4058 	int *lszs = NULL;
4059 	int *np = NULL;
4060 	int npi = 0;
4061 	char **lnames = NULL;
4062 	char *cp, *tp;
4063 	pm_comp_t *ret = NULL;
4064 
4065 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4066 	*errp = 0;	/* assume success */
4067 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4068 	    (char *)pmcompstr, &pp, &nelems) != DDI_PROP_SUCCESS) {
4069 		return (NULL);
4070 	}
4071 
4072 	if (nelems < 3) {	/* need at least one name and two levels */
4073 		goto errout;
4074 	}
4075 
4076 	/*
4077 	 * pm_create_components is no longer allowed
4078 	 */
4079 	if (PM_NUMCMPTS(dip) != 0) {
4080 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) has %d comps\n",
4081 		    pmf, PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4082 		goto errout;
4083 	}
4084 
4085 	lvals = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4086 	lszs = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4087 	lnames = kmem_alloc(nelems * sizeof (char *), KM_SLEEP);
4088 	np = kmem_alloc(nelems * sizeof (int), KM_SLEEP);
4089 
4090 	level = 0;
4091 	phead = NULL;
4092 	for (i = 0; i < nelems; i++) {
4093 		cp = pp[i];
4094 		if (!isdigit(*cp)) {	/*  must be name */
4095 			if (strncmp(cp, namestr, nameln) != 0) {
4096 				goto errout;
4097 			}
4098 			if (i != 0) {
4099 				if (level == 0) {	/* no level spec'd */
4100 					PMD(PMD_ERROR, ("%s: no level spec'd\n",
4101 					    pmf))
4102 					goto errout;
4103 				}
4104 				np[npi++] = lvals[level - 1];
4105 				/* finish up previous component levels */
4106 				FINISH_COMP;
4107 			}
4108 			cp += nameln;
4109 			if (!*cp) {
4110 				PMD(PMD_ERROR, ("%s: nsa\n", pmf))
4111 				goto errout;
4112 			}
4113 			p = kmem_zalloc(sizeof (*phead), KM_SLEEP);
4114 			if (phead == NULL) {
4115 				phead = ptail = p;
4116 			} else {
4117 				ptail->next = p;
4118 				ptail = p;
4119 			}
4120 			compp = p->comp = kmem_zalloc(sizeof (pm_comp_t),
4121 			    KM_SLEEP);
4122 			compp->pmc_name_sz = strlen(cp) + 1;
4123 			compp->pmc_name = kmem_zalloc(compp->pmc_name_sz,
4124 			    KM_SLEEP);
4125 			(void) strncpy(compp->pmc_name, cp, compp->pmc_name_sz);
4126 			components++;
4127 			level = 0;
4128 		} else {	/* better be power level <num>=<name> */
4129 #ifdef DEBUG
4130 			tp = cp;
4131 #endif
4132 			if (i == 0 ||
4133 			    (cp = pm_parsenum(cp, &lvals[level])) == NULL) {
4134 				PMD(PMD_ERROR, ("%s: parsenum(%s)\n", pmf, tp))
4135 				goto errout;
4136 			}
4137 #ifdef DEBUG
4138 			tp = cp;
4139 #endif
4140 			if (*cp++ != '=' || !*cp) {
4141 				PMD(PMD_ERROR, ("%s: ex =, got %s\n", pmf, tp))
4142 				goto errout;
4143 			}
4144 
4145 			lszs[level] = strlen(cp) + 1;
4146 			size += lszs[level];
4147 			lnames[level] = cp;	/* points into prop string */
4148 			level++;
4149 		}
4150 	}
4151 	np[npi++] = lvals[level - 1];
4152 	if (level == 0) {	/* ended with a name */
4153 		PMD(PMD_ERROR, ("%s: ewn\n", pmf))
4154 		goto errout;
4155 	}
4156 	FINISH_COMP;
4157 
4158 
4159 	/*
4160 	 * Now we have a list of components--we have to return instead an
4161 	 * array of them, but we can just copy the top level and leave
4162 	 * the rest as is
4163 	 */
4164 	(void) e_pm_create_components(dip, components);
4165 	for (i = 0; i < components; i++)
4166 		e_pm_set_max_power(dip, i, np[i]);
4167 
4168 	ret = kmem_zalloc(components * sizeof (pm_comp_t), KM_SLEEP);
4169 	for (i = 0, p = phead; i < components; i++) {
4170 		ASSERT(p);
4171 		/*
4172 		 * Now sanity-check values:  levels must be monotonically
4173 		 * increasing
4174 		 */
4175 		if (p->comp->pmc_numlevels < 2) {
4176 			PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) only %d "
4177 			    "levels\n", pmf,
4178 			    p->comp->pmc_name, PM_DEVICE(dip),
4179 			    p->comp->pmc_numlevels))
4180 			goto errout;
4181 		}
4182 		for (j = 0; j < p->comp->pmc_numlevels; j++) {
4183 			if ((p->comp->pmc_lvals[j] < 0) || ((j > 0) &&
4184 			    (p->comp->pmc_lvals[j] <=
4185 			    p->comp->pmc_lvals[j - 1]))) {
4186 				PMD(PMD_ERROR, ("%s: comp %s of %s@%s(%s#%d) "
4187 				    "not mono. incr, %d follows %d\n", pmf,
4188 				    p->comp->pmc_name, PM_DEVICE(dip),
4189 				    p->comp->pmc_lvals[j],
4190 				    p->comp->pmc_lvals[j - 1]))
4191 				goto errout;
4192 			}
4193 		}
4194 		ret[i] = *p->comp;	/* struct assignment */
4195 		for (j = 0; j < i; j++) {
4196 			/*
4197 			 * Test for unique component names
4198 			 */
4199 			if (strcmp(ret[j].pmc_name, ret[i].pmc_name) == 0) {
4200 				PMD(PMD_ERROR, ("%s: %s of %s@%s(%s#%d) not "
4201 				    "unique\n", pmf, ret[j].pmc_name,
4202 				    PM_DEVICE(dip)))
4203 				goto errout;
4204 			}
4205 		}
4206 		ptail = p;
4207 		p = p->next;
4208 		phead = p;	/* errout depends on phead making sense */
4209 		kmem_free(ptail->comp, sizeof (*ptail->comp));
4210 		kmem_free(ptail, sizeof (*ptail));
4211 	}
4212 out:
4213 	ddi_prop_free(pp);
4214 	if (lvals)
4215 		kmem_free(lvals, nelems * sizeof (int));
4216 	if (lszs)
4217 		kmem_free(lszs, nelems * sizeof (int));
4218 	if (lnames)
4219 		kmem_free(lnames, nelems * sizeof (char *));
4220 	if (np)
4221 		kmem_free(np, nelems * sizeof (int));
4222 	return (ret);
4223 
4224 errout:
4225 	e_pm_destroy_components(dip);
4226 	*errp = 1;	/* signal failure */
4227 	cmn_err(CE_CONT, "!pm: %s property ", pmcompstr);
4228 	for (i = 0; i < nelems - 1; i++)
4229 		cmn_err(CE_CONT, "!'%s', ", pp[i]);
4230 	if (nelems != 0)
4231 		cmn_err(CE_CONT, "!'%s'", pp[nelems - 1]);
4232 	cmn_err(CE_CONT, "! for %s@%s(%s#%d) is ill-formed.\n", PM_DEVICE(dip));
4233 	for (p = phead; p; ) {
4234 		pm_comp_t *pp;
4235 		int n;
4236 
4237 		ptail = p;
4238 		/*
4239 		 * Free component data structures
4240 		 */
4241 		pp = p->comp;
4242 		n = pp->pmc_numlevels;
4243 		if (pp->pmc_name_sz) {
4244 			kmem_free(pp->pmc_name, pp->pmc_name_sz);
4245 		}
4246 		if (pp->pmc_lnames_sz) {
4247 			kmem_free(pp->pmc_lname_buf, pp->pmc_lnames_sz);
4248 		}
4249 		if (pp->pmc_lnames) {
4250 			kmem_free(pp->pmc_lnames, n * (sizeof (char *)));
4251 		}
4252 		if (pp->pmc_thresh) {
4253 			kmem_free(pp->pmc_thresh, n * (sizeof (int)));
4254 		}
4255 		if (pp->pmc_lvals) {
4256 			kmem_free(pp->pmc_lvals, n * (sizeof (int)));
4257 		}
4258 		p = ptail->next;
4259 		kmem_free(ptail, sizeof (*ptail));
4260 	}
4261 	if (ret != NULL)
4262 		kmem_free(ret, components * sizeof (pm_comp_t));
4263 	ret = NULL;
4264 	goto out;
4265 }
4266 
4267 /*
4268  * Set threshold values for a devices components by dividing the target
4269  * threshold (base) by the number of transitions and assign each transition
4270  * that threshold.  This will get the entire device down in the target time if
4271  * all components are idle and even if there are dependencies among components.
4272  *
4273  * Devices may well get powered all the way down before the target time, but
4274  * at least the EPA will be happy.
4275  */
4276 void
4277 pm_set_device_threshold(dev_info_t *dip, int base, int flag)
4278 {
4279 	PMD_FUNC(pmf, "set_device_threshold")
4280 	int target_threshold = (base * 95) / 100;
4281 	int level, comp;		/* loop counters */
4282 	int transitions = 0;
4283 	int ncomp = PM_NUMCMPTS(dip);
4284 	int thresh;
4285 	int remainder;
4286 	pm_comp_t *pmc;
4287 	int i, circ;
4288 
4289 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4290 	PM_LOCK_DIP(dip);
4291 	/*
4292 	 * First we handle the easy one.  If we're setting the default
4293 	 * threshold for a node with children, then we set it to the
4294 	 * default nexus threshold (currently 0) and mark it as default
4295 	 * nexus threshold instead
4296 	 */
4297 	if (PM_IS_NEXUS(dip)) {
4298 		if (flag == PMC_DEF_THRESH) {
4299 			PMD(PMD_THRESH, ("%s: [%s@%s(%s#%d) NEXDEF]\n", pmf,
4300 			    PM_DEVICE(dip)))
4301 			thresh = pm_default_nexus_threshold;
4302 			for (comp = 0; comp < ncomp; comp++) {
4303 				pmc = &PM_CP(dip, comp)->pmc_comp;
4304 				for (level = 1; level < pmc->pmc_numlevels;
4305 				    level++) {
4306 					pmc->pmc_thresh[level] = thresh;
4307 				}
4308 			}
4309 			DEVI(dip)->devi_pm_dev_thresh =
4310 			    pm_default_nexus_threshold;
4311 			/*
4312 			 * If the nexus node is being reconfigured back to
4313 			 * the default threshold, adjust the notlowest count.
4314 			 */
4315 			if (DEVI(dip)->devi_pm_flags &
4316 			    (PMC_DEV_THRESH|PMC_COMP_THRESH)) {
4317 				PM_LOCK_POWER(dip, &circ);
4318 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4319 					if (PM_CURPOWER(dip, i) == 0)
4320 						continue;
4321 					mutex_enter(&pm_compcnt_lock);
4322 					ASSERT(pm_comps_notlowest);
4323 					pm_comps_notlowest--;
4324 					PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) decr "
4325 					    "notlowest to %d\n", pmf,
4326 					    PM_DEVICE(dip), pm_comps_notlowest))
4327 					if (pm_comps_notlowest == 0)
4328 						pm_ppm_notify_all_lowest(dip,
4329 						    PM_ALL_LOWEST);
4330 					mutex_exit(&pm_compcnt_lock);
4331 				}
4332 				PM_UNLOCK_POWER(dip, circ);
4333 			}
4334 			DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4335 			DEVI(dip)->devi_pm_flags |= PMC_NEXDEF_THRESH;
4336 			PM_UNLOCK_DIP(dip);
4337 			return;
4338 		} else if (DEVI(dip)->devi_pm_flags & PMC_NEXDEF_THRESH) {
4339 			/*
4340 			 * If the nexus node is being configured for a
4341 			 * non-default threshold, include that node in
4342 			 * the notlowest accounting.
4343 			 */
4344 			PM_LOCK_POWER(dip, &circ);
4345 			for (i = 0; i < PM_NUMCMPTS(dip); i++) {
4346 				if (PM_CURPOWER(dip, i) == 0)
4347 					continue;
4348 				mutex_enter(&pm_compcnt_lock);
4349 				if (pm_comps_notlowest == 0)
4350 					pm_ppm_notify_all_lowest(dip,
4351 					    PM_NOT_ALL_LOWEST);
4352 				pm_comps_notlowest++;
4353 				PMD(PMD_LEVEL, ("%s: %s@%s(%s#%d) incr "
4354 				    "notlowest to %d\n", pmf,
4355 				    PM_DEVICE(dip), pm_comps_notlowest))
4356 				mutex_exit(&pm_compcnt_lock);
4357 			}
4358 			PM_UNLOCK_POWER(dip, circ);
4359 		}
4360 	}
4361 	/*
4362 	 * Compute the total number of transitions for all components
4363 	 * of the device.  Distribute the threshold evenly over them
4364 	 */
4365 	for (comp = 0; comp < ncomp; comp++) {
4366 		pmc = &PM_CP(dip, comp)->pmc_comp;
4367 		ASSERT(pmc->pmc_numlevels > 1);
4368 		transitions += pmc->pmc_numlevels - 1;
4369 	}
4370 	ASSERT(transitions);
4371 	thresh = target_threshold / transitions;
4372 
4373 	for (comp = 0; comp < ncomp; comp++) {
4374 		pmc = &PM_CP(dip, comp)->pmc_comp;
4375 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4376 			pmc->pmc_thresh[level] = thresh;
4377 		}
4378 	}
4379 
4380 #ifdef DEBUG
4381 	for (comp = 0; comp < ncomp; comp++) {
4382 		pmc = &PM_CP(dip, comp)->pmc_comp;
4383 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4384 			PMD(PMD_THRESH, ("%s: thresh before %s@%s(%s#%d) "
4385 			    "comp=%d, level=%d, %d\n", pmf, PM_DEVICE(dip),
4386 			    comp, level, pmc->pmc_thresh[level]))
4387 		}
4388 	}
4389 #endif
4390 	/*
4391 	 * Distribute any remainder till they are all gone
4392 	 */
4393 	remainder = target_threshold - thresh * transitions;
4394 	level = 1;
4395 #ifdef DEBUG
4396 	PMD(PMD_THRESH, ("%s: remainder=%d target_threshold=%d thresh=%d "
4397 	    "trans=%d\n", pmf, remainder, target_threshold, thresh,
4398 	    transitions))
4399 #endif
4400 	while (remainder > 0) {
4401 		comp = 0;
4402 		while (remainder && (comp < ncomp)) {
4403 			pmc = &PM_CP(dip, comp)->pmc_comp;
4404 			if (level < pmc->pmc_numlevels) {
4405 				pmc->pmc_thresh[level] += 1;
4406 				remainder--;
4407 			}
4408 			comp++;
4409 		}
4410 		level++;
4411 	}
4412 #ifdef DEBUG
4413 	for (comp = 0; comp < ncomp; comp++) {
4414 		pmc = &PM_CP(dip, comp)->pmc_comp;
4415 		for (level = 1; level < pmc->pmc_numlevels; level++) {
4416 			PMD(PMD_THRESH, ("%s: thresh after %s@%s(%s#%d) "
4417 			    "comp=%d level=%d, %d\n", pmf, PM_DEVICE(dip),
4418 			    comp, level, pmc->pmc_thresh[level]))
4419 		}
4420 	}
4421 #endif
4422 	ASSERT(PM_IAM_LOCKING_DIP(dip));
4423 	DEVI(dip)->devi_pm_dev_thresh = base;
4424 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
4425 	DEVI(dip)->devi_pm_flags |= flag;
4426 	PM_UNLOCK_DIP(dip);
4427 }
4428 
4429 /*
4430  * Called when there is no old-style platform power management driver
4431  */
4432 static int
4433 ddi_no_platform_power(power_req_t *req)
4434 {
4435 	_NOTE(ARGUNUSED(req))
4436 	return (DDI_FAILURE);
4437 }
4438 
4439 /*
4440  * This function calls the entry point supplied by the platform-specific
4441  * pm driver to bring the device component 'pm_cmpt' to power level 'pm_level'.
4442  * The use of global for getting the  function name from platform-specific
4443  * pm driver is not ideal, but it is simple and efficient.
4444  * The previous property lookup was being done in the idle loop on swift
4445  * systems without pmc chips and hurt deskbench performance as well as
4446  * violating scheduler locking rules
4447  */
4448 int	(*pm_platform_power)(power_req_t *) = ddi_no_platform_power;
4449 
4450 /*
4451  * Old obsolete interface for a device to request a power change (but only
4452  * an increase in power)
4453  */
4454 int
4455 ddi_dev_is_needed(dev_info_t *dip, int cmpt, int level)
4456 {
4457 	return (pm_raise_power(dip, cmpt, level));
4458 }
4459 
4460 /*
4461  * The old obsolete interface to platform power management.  Only used by
4462  * Gypsy platform and APM on X86.
4463  */
4464 int
4465 ddi_power(dev_info_t *dip, int pm_cmpt, int pm_level)
4466 {
4467 	power_req_t	request;
4468 
4469 	request.request_type = PMR_SET_POWER;
4470 	request.req.set_power_req.who = dip;
4471 	request.req.set_power_req.cmpt = pm_cmpt;
4472 	request.req.set_power_req.level = pm_level;
4473 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4474 }
4475 
4476 /*
4477  * A driver can invoke this from its detach routine when DDI_SUSPEND is
4478  * passed.  Returns true if subsequent processing could result in power being
4479  * removed from the device.  The arg is not currently used because it is
4480  * implicit in the operation of cpr/DR.
4481  */
4482 int
4483 ddi_removing_power(dev_info_t *dip)
4484 {
4485 	_NOTE(ARGUNUSED(dip))
4486 	return (pm_powering_down);
4487 }
4488 
4489 /*
4490  * Returns true if a device indicates that its parent handles suspend/resume
4491  * processing for it.
4492  */
4493 int
4494 e_ddi_parental_suspend_resume(dev_info_t *dip)
4495 {
4496 	return (DEVI(dip)->devi_pm_flags & PMC_PARENTAL_SR);
4497 }
4498 
4499 /*
4500  * Called for devices which indicate that their parent does suspend/resume
4501  * handling for them
4502  */
4503 int
4504 e_ddi_suspend(dev_info_t *dip, ddi_detach_cmd_t cmd)
4505 {
4506 	power_req_t	request;
4507 	request.request_type = PMR_SUSPEND;
4508 	request.req.suspend_req.who = dip;
4509 	request.req.suspend_req.cmd = cmd;
4510 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4511 }
4512 
4513 /*
4514  * Called for devices which indicate that their parent does suspend/resume
4515  * handling for them
4516  */
4517 int
4518 e_ddi_resume(dev_info_t *dip, ddi_attach_cmd_t cmd)
4519 {
4520 	power_req_t	request;
4521 	request.request_type = PMR_RESUME;
4522 	request.req.resume_req.who = dip;
4523 	request.req.resume_req.cmd = cmd;
4524 	return (ddi_ctlops(dip, dip, DDI_CTLOPS_POWER, &request, NULL));
4525 }
4526 
4527 /*
4528  * Old obsolete exported interface for drivers to create components.
4529  * This is now handled by exporting the pm-components property.
4530  */
4531 int
4532 pm_create_components(dev_info_t *dip, int num_components)
4533 {
4534 	PMD_FUNC(pmf, "pm_create_components")
4535 
4536 	if (num_components < 1)
4537 		return (DDI_FAILURE);
4538 
4539 	if (!DEVI_IS_ATTACHING(dip)) {
4540 		return (DDI_FAILURE);
4541 	}
4542 
4543 	/* don't need to lock dip because attach is single threaded */
4544 	if (DEVI(dip)->devi_pm_components) {
4545 		PMD(PMD_ERROR, ("%s: %s@%s(%s#%d) already has %d\n", pmf,
4546 		    PM_DEVICE(dip), PM_NUMCMPTS(dip)))
4547 		return (DDI_FAILURE);
4548 	}
4549 	e_pm_create_components(dip, num_components);
4550 	DEVI(dip)->devi_pm_flags |= PMC_BC;
4551 	e_pm_default_components(dip, num_components);
4552 	return (DDI_SUCCESS);
4553 }
4554 
4555 /*
4556  * Obsolete interface previously called by drivers to destroy their components
4557  * at detach time.  This is now done automatically.  However, we need to keep
4558  * this for the old drivers.
4559  */
4560 void
4561 pm_destroy_components(dev_info_t *dip)
4562 {
4563 	PMD_FUNC(pmf, "pm_destroy_components")
4564 	dev_info_t *pdip = ddi_get_parent(dip);
4565 
4566 	PMD(PMD_REMDEV | PMD_KIDSUP, ("%s: %s@%s(%s#%d)\n", pmf,
4567 	    PM_DEVICE(dip)))
4568 	ASSERT(DEVI_IS_DETACHING(dip));
4569 #ifdef DEBUG
4570 	if (!PM_ISBC(dip))
4571 		cmn_err(CE_WARN, "!driver exporting pm-components property "
4572 		    "(%s@%s) calls pm_destroy_components", PM_NAME(dip),
4573 		    PM_ADDR(dip));
4574 #endif
4575 	/*
4576 	 * We ignore this unless this is an old-style driver, except for
4577 	 * printing the message above
4578 	 */
4579 	if (PM_NUMCMPTS(dip) == 0 || !PM_ISBC(dip)) {
4580 		PMD(PMD_REMDEV, ("%s: ignore %s@%s(%s#%d)\n", pmf,
4581 		    PM_DEVICE(dip)))
4582 		return;
4583 	}
4584 	ASSERT(PM_GET_PM_INFO(dip));
4585 
4586 	/*
4587 	 * pm_unmanage will clear info pointer later, after dealing with
4588 	 * dependencies
4589 	 */
4590 	ASSERT(!PM_GET_PM_SCAN(dip));	/* better be gone already */
4591 	/*
4592 	 * Now adjust parent's kidsupcnt.  We check only comp 0.
4593 	 * Parents that get notification are not adjusted because their
4594 	 * kidsupcnt is always 0 (or 1 during probe and attach).
4595 	 */
4596 	if ((PM_CURPOWER(dip, 0) != 0) && pdip && !PM_WANTS_NOTIFICATION(pdip))
4597 		pm_rele_power(pdip);
4598 #ifdef DEBUG
4599 	else {
4600 		PMD(PMD_KIDSUP, ("%s: kuc stays %s@%s(%s#%d) comps gone\n",
4601 		    pmf, PM_DEVICE(dip)))
4602 	}
4603 #endif
4604 	e_pm_destroy_components(dip);
4605 	/*
4606 	 * Forget we ever knew anything about the components of this  device
4607 	 */
4608 	DEVI(dip)->devi_pm_flags &=
4609 	    ~(PMC_BC | PMC_COMPONENTS_DONE | PMC_COMPONENTS_FAILED);
4610 }
4611 
4612 /*
4613  * Exported interface for a driver to set a component busy.
4614  */
4615 int
4616 pm_busy_component(dev_info_t *dip, int cmpt)
4617 {
4618 	struct pm_component *cp;
4619 
4620 	ASSERT(dip != NULL);
4621 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4622 		return (DDI_FAILURE);
4623 	PM_LOCK_BUSY(dip);
4624 	cp->pmc_busycount++;
4625 	cp->pmc_timestamp = 0;
4626 	PM_UNLOCK_BUSY(dip);
4627 	return (DDI_SUCCESS);
4628 }
4629 
4630 /*
4631  * Exported interface for a driver to set a component idle.
4632  */
4633 int
4634 pm_idle_component(dev_info_t *dip, int cmpt)
4635 {
4636 	PMD_FUNC(pmf, "pm_idle_component")
4637 	struct pm_component *cp;
4638 	pm_scan_t	*scanp = PM_GET_PM_SCAN(dip);
4639 
4640 	if (!e_pm_valid_info(dip, NULL) || !e_pm_valid_comp(dip, cmpt, &cp))
4641 		return (DDI_FAILURE);
4642 
4643 	PM_LOCK_BUSY(dip);
4644 	if (cp->pmc_busycount) {
4645 		if (--(cp->pmc_busycount) == 0)
4646 			cp->pmc_timestamp = gethrestime_sec();
4647 	} else {
4648 		cp->pmc_timestamp = gethrestime_sec();
4649 	}
4650 
4651 	PM_UNLOCK_BUSY(dip);
4652 
4653 	/*
4654 	 * if device becomes idle during idle down period, try scan it down
4655 	 */
4656 	if (scanp && PM_IS_PID(dip)) {
4657 		PMD(PMD_IDLEDOWN, ("%s: %s@%s(%s#%d) idle.\n", pmf,
4658 		    PM_DEVICE(dip)))
4659 		pm_rescan(dip);
4660 		return (DDI_SUCCESS);
4661 	}
4662 
4663 	/*
4664 	 * handle scan not running with nexus threshold == 0
4665 	 */
4666 
4667 	if (PM_IS_NEXUS(dip) && (cp->pmc_busycount == 0)) {
4668 		pm_rescan(dip);
4669 	}
4670 
4671 	return (DDI_SUCCESS);
4672 }
4673 
4674 /*
4675  * This is the old  obsolete interface called by drivers to set their normal
4676  * power.  Thus we can't fix its behavior or return a value.
4677  * This functionality is replaced by the pm-component property.
4678  * We'll only get components destroyed while no power management is
4679  * going on (and the device is detached), so we don't need a mutex here
4680  */
4681 void
4682 pm_set_normal_power(dev_info_t *dip, int comp, int level)
4683 {
4684 	PMD_FUNC(pmf, "set_normal_power")
4685 #ifdef DEBUG
4686 	if (!PM_ISBC(dip))
4687 		cmn_err(CE_WARN, "!call to pm_set_normal_power() by %s@%s "
4688 		    "(driver exporting pm-components property) ignored",
4689 		    PM_NAME(dip), PM_ADDR(dip));
4690 #endif
4691 	if (PM_ISBC(dip)) {
4692 		PMD(PMD_NORM, ("%s: %s@%s(%s#%d) set normal power comp=%d, "
4693 		    "level=%d\n", pmf, PM_DEVICE(dip), comp, level))
4694 		e_pm_set_max_power(dip, comp, level);
4695 		e_pm_default_levels(dip, PM_CP(dip, comp), level);
4696 	}
4697 }
4698 
4699 /*
4700  * Called on a successfully detached driver to free pm resources
4701  */
4702 static void
4703 pm_stop(dev_info_t *dip)
4704 {
4705 	PMD_FUNC(pmf, "stop")
4706 	dev_info_t *pdip = ddi_get_parent(dip);
4707 
4708 	ASSERT(!PM_IAM_LOCKING_DIP(dip));
4709 	/* stopping scan, destroy scan data structure */
4710 	if (!PM_ISBC(dip)) {
4711 		pm_scan_stop(dip);
4712 		pm_scan_fini(dip);
4713 	}
4714 
4715 	if (PM_GET_PM_INFO(dip) != NULL) {
4716 		if (pm_unmanage(dip) == DDI_SUCCESS) {
4717 			/*
4718 			 * Old style driver may have called
4719 			 * pm_destroy_components already, but just in case ...
4720 			 */
4721 			e_pm_destroy_components(dip);
4722 		} else {
4723 			PMD(PMD_FAIL, ("%s: can't pm_unmanage %s@%s(%s#%d)\n",
4724 			    pmf, PM_DEVICE(dip)))
4725 		}
4726 	} else {
4727 		if (PM_NUMCMPTS(dip))
4728 			e_pm_destroy_components(dip);
4729 		else {
4730 			if (DEVI(dip)->devi_pm_flags & PMC_NOPMKID) {
4731 				DEVI(dip)->devi_pm_flags &= ~PMC_NOPMKID;
4732 				if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
4733 					pm_rele_power(pdip);
4734 				} else if (pdip &&
4735 				    MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
4736 					(void) mdi_power(pdip,
4737 					    MDI_PM_RELE_POWER,
4738 					    (void *)dip, NULL, 0);
4739 				}
4740 			}
4741 		}
4742 	}
4743 }
4744 
4745 /*
4746  * The node is the subject of a reparse pm props ioctl. Throw away the old
4747  * info and start over.
4748  */
4749 int
4750 e_new_pm_props(dev_info_t *dip)
4751 {
4752 	if (PM_GET_PM_INFO(dip) != NULL) {
4753 		pm_stop(dip);
4754 
4755 		if (e_pm_manage(dip, PM_STYLE_NEW) != DDI_SUCCESS) {
4756 			return (DDI_FAILURE);
4757 		}
4758 	}
4759 	e_pm_props(dip);
4760 	return (DDI_SUCCESS);
4761 }
4762 
4763 /*
4764  * Device has been attached, so process its pm properties
4765  */
4766 void
4767 e_pm_props(dev_info_t *dip)
4768 {
4769 	char *pp;
4770 	int len;
4771 	int flags = 0;
4772 	int propflag = DDI_PROP_DONTPASS|DDI_PROP_CANSLEEP;
4773 
4774 	/*
4775 	 * It doesn't matter if we do this more than once, we should always
4776 	 * get the same answers, and if not, then the last one in is the
4777 	 * best one.
4778 	 */
4779 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-hardware-state",
4780 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4781 		if (strcmp(pp, "needs-suspend-resume") == 0) {
4782 			flags = PMC_NEEDS_SR;
4783 		} else if (strcmp(pp, "no-suspend-resume") == 0) {
4784 			flags = PMC_NO_SR;
4785 		} else if (strcmp(pp, "parental-suspend-resume") == 0) {
4786 			flags = PMC_PARENTAL_SR;
4787 		} else {
4788 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4789 			    "%s property value '%s'", PM_NAME(dip),
4790 			    PM_ADDR(dip), "pm-hardware-state", pp);
4791 		}
4792 		kmem_free(pp, len);
4793 	}
4794 	/*
4795 	 * This next segment (PMC_WANTS_NOTIFY) is in
4796 	 * support of nexus drivers which will want to be involved in
4797 	 * (or at least notified of) their child node's power level transitions.
4798 	 * "pm-want-child-notification?" is defined by the parent.
4799 	 */
4800 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4801 	    "pm-want-child-notification?") && PM_HAS_BUS_POWER(dip))
4802 		flags |= PMC_WANTS_NOTIFY;
4803 	ASSERT(PM_HAS_BUS_POWER(dip) || !ddi_prop_exists(DDI_DEV_T_ANY,
4804 	    dip, propflag, "pm-want-child-notification?"));
4805 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, propflag,
4806 	    "no-involuntary-power-cycles"))
4807 		flags |= PMC_NO_INVOL;
4808 	/*
4809 	 * Is the device a CPU device?
4810 	 */
4811 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, propflag, "pm-class",
4812 	    (caddr_t)&pp, &len) == DDI_PROP_SUCCESS) {
4813 		if (strcmp(pp, "CPU") == 0) {
4814 			flags |= PMC_CPU_DEVICE;
4815 		} else {
4816 			cmn_err(CE_NOTE, "!device %s@%s has unrecognized "
4817 			    "%s property value '%s'", PM_NAME(dip),
4818 			    PM_ADDR(dip), "pm-class", pp);
4819 		}
4820 		kmem_free(pp, len);
4821 	}
4822 	/* devfs single threads us */
4823 	DEVI(dip)->devi_pm_flags |= flags;
4824 }
4825 
4826 /*
4827  * This is the DDI_CTLOPS_POWER handler that is used when there is no ppm
4828  * driver which has claimed a node.
4829  * Sets old_power in arg struct.
4830  */
4831 static int
4832 pm_default_ctlops(dev_info_t *dip, dev_info_t *rdip,
4833     ddi_ctl_enum_t ctlop, void *arg, void *result)
4834 {
4835 	_NOTE(ARGUNUSED(dip))
4836 	PMD_FUNC(pmf, "ctlops")
4837 	power_req_t *reqp = (power_req_t *)arg;
4838 	int retval;
4839 	dev_info_t *target_dip;
4840 	int new_level, old_level, cmpt;
4841 #ifdef PMDDEBUG
4842 	char *format;
4843 #endif
4844 
4845 	/*
4846 	 * The interface for doing the actual power level changes is now
4847 	 * through the DDI_CTLOPS_POWER bus_ctl, so that we can plug in
4848 	 * different platform-specific power control drivers.
4849 	 *
4850 	 * This driver implements the "default" version of this interface.
4851 	 * If no ppm driver has been installed then this interface is called
4852 	 * instead.
4853 	 */
4854 	ASSERT(dip == NULL);
4855 	switch (ctlop) {
4856 	case DDI_CTLOPS_POWER:
4857 		switch (reqp->request_type) {
4858 		case PMR_PPM_SET_POWER:
4859 		{
4860 			target_dip = reqp->req.ppm_set_power_req.who;
4861 			ASSERT(target_dip == rdip);
4862 			new_level = reqp->req.ppm_set_power_req.new_level;
4863 			cmpt = reqp->req.ppm_set_power_req.cmpt;
4864 			/* pass back old power for the PM_LEVEL_UNKNOWN case */
4865 			old_level = PM_CURPOWER(target_dip, cmpt);
4866 			reqp->req.ppm_set_power_req.old_level = old_level;
4867 			retval = pm_power(target_dip, cmpt, new_level);
4868 			PMD(PMD_PPM, ("%s: PPM_SET_POWER %s@%s(%s#%d)[%d] %d->"
4869 			    "%d %s\n", pmf, PM_DEVICE(target_dip), cmpt,
4870 			    old_level, new_level, (retval == DDI_SUCCESS ?
4871 			    "chd" : "no chg")))
4872 			return (retval);
4873 		}
4874 
4875 		case PMR_PPM_PRE_DETACH:
4876 		case PMR_PPM_POST_DETACH:
4877 		case PMR_PPM_PRE_ATTACH:
4878 		case PMR_PPM_POST_ATTACH:
4879 		case PMR_PPM_PRE_PROBE:
4880 		case PMR_PPM_POST_PROBE:
4881 		case PMR_PPM_PRE_RESUME:
4882 		case PMR_PPM_INIT_CHILD:
4883 		case PMR_PPM_UNINIT_CHILD:
4884 #ifdef PMDDEBUG
4885 			switch (reqp->request_type) {
4886 				case PMR_PPM_PRE_DETACH:
4887 					format = "%s: PMR_PPM_PRE_DETACH "
4888 					    "%s@%s(%s#%d)\n";
4889 					break;
4890 				case PMR_PPM_POST_DETACH:
4891 					format = "%s: PMR_PPM_POST_DETACH "
4892 					    "%s@%s(%s#%d) rets %d\n";
4893 					break;
4894 				case PMR_PPM_PRE_ATTACH:
4895 					format = "%s: PMR_PPM_PRE_ATTACH "
4896 					    "%s@%s(%s#%d)\n";
4897 					break;
4898 				case PMR_PPM_POST_ATTACH:
4899 					format = "%s: PMR_PPM_POST_ATTACH "
4900 					    "%s@%s(%s#%d) rets %d\n";
4901 					break;
4902 				case PMR_PPM_PRE_PROBE:
4903 					format = "%s: PMR_PPM_PRE_PROBE "
4904 					    "%s@%s(%s#%d)\n";
4905 					break;
4906 				case PMR_PPM_POST_PROBE:
4907 					format = "%s: PMR_PPM_POST_PROBE "
4908 					    "%s@%s(%s#%d) rets %d\n";
4909 					break;
4910 				case PMR_PPM_PRE_RESUME:
4911 					format = "%s: PMR_PPM_PRE_RESUME "
4912 					    "%s@%s(%s#%d) rets %d\n";
4913 					break;
4914 				case PMR_PPM_INIT_CHILD:
4915 					format = "%s: PMR_PPM_INIT_CHILD "
4916 					    "%s@%s(%s#%d)\n";
4917 					break;
4918 				case PMR_PPM_UNINIT_CHILD:
4919 					format = "%s: PMR_PPM_UNINIT_CHILD "
4920 					    "%s@%s(%s#%d)\n";
4921 					break;
4922 				default:
4923 					break;
4924 			}
4925 			PMD(PMD_PPM, (format, pmf, PM_DEVICE(rdip),
4926 			    reqp->req.ppm_config_req.result))
4927 #endif
4928 			return (DDI_SUCCESS);
4929 
4930 		case PMR_PPM_POWER_CHANGE_NOTIFY:
4931 			/*
4932 			 * Nothing for us to do
4933 			 */
4934 			ASSERT(reqp->req.ppm_notify_level_req.who == rdip);
4935 			PMD(PMD_PPM, ("%s: PMR_PPM_POWER_CHANGE_NOTIFY "
4936 			    "%s@%s(%s#%d)[%d] %d->%d\n", pmf,
4937 			    PM_DEVICE(reqp->req.ppm_notify_level_req.who),
4938 			    reqp->req.ppm_notify_level_req.cmpt,
4939 			    PM_CURPOWER(reqp->req.ppm_notify_level_req.who,
4940 			    reqp->req.ppm_notify_level_req.cmpt),
4941 			    reqp->req.ppm_notify_level_req.new_level))
4942 			return (DDI_SUCCESS);
4943 
4944 		case PMR_PPM_UNMANAGE:
4945 			PMD(PMD_PPM, ("%s: PMR_PPM_UNMANAGE %s@%s(%s#%d)\n",
4946 			    pmf, PM_DEVICE(rdip)))
4947 			return (DDI_SUCCESS);
4948 
4949 		case PMR_PPM_LOCK_POWER:
4950 			pm_lock_power_single(reqp->req.ppm_lock_power_req.who,
4951 			    reqp->req.ppm_lock_power_req.circp);
4952 			return (DDI_SUCCESS);
4953 
4954 		case PMR_PPM_UNLOCK_POWER:
4955 			pm_unlock_power_single(
4956 			    reqp->req.ppm_unlock_power_req.who,
4957 			    reqp->req.ppm_unlock_power_req.circ);
4958 			return (DDI_SUCCESS);
4959 
4960 		case PMR_PPM_TRY_LOCK_POWER:
4961 			*(int *)result = pm_try_locking_power_single(
4962 			    reqp->req.ppm_lock_power_req.who,
4963 			    reqp->req.ppm_lock_power_req.circp);
4964 			return (DDI_SUCCESS);
4965 
4966 		case PMR_PPM_POWER_LOCK_OWNER:
4967 			target_dip = reqp->req.ppm_power_lock_owner_req.who;
4968 			ASSERT(target_dip == rdip);
4969 			reqp->req.ppm_power_lock_owner_req.owner =
4970 			    DEVI(rdip)->devi_busy_thread;
4971 			return (DDI_SUCCESS);
4972 		default:
4973 			PMD(PMD_ERROR, ("%s: default!\n", pmf))
4974 			return (DDI_FAILURE);
4975 		}
4976 
4977 	default:
4978 		PMD(PMD_ERROR, ("%s: unknown\n", pmf))
4979 		return (DDI_FAILURE);
4980 	}
4981 }
4982 
4983 /*
4984  * We overload the bus_ctl ops here--perhaps we ought to have a distinct
4985  * power_ops struct for this functionality instead?
4986  * However, we only ever do this on a ppm driver.
4987  */
4988 int
4989 pm_ctlops(dev_info_t *d, dev_info_t *r, ddi_ctl_enum_t op, void *a, void *v)
4990 {
4991 	int (*fp)();
4992 
4993 	/* if no ppm handler, call the default routine */
4994 	if (d == NULL) {
4995 		return (pm_default_ctlops(d, r, op, a, v));
4996 	}
4997 	if (!d || !r)
4998 		return (DDI_FAILURE);
4999 	ASSERT(DEVI(d)->devi_ops && DEVI(d)->devi_ops->devo_bus_ops &&
5000 	    DEVI(d)->devi_ops->devo_bus_ops->bus_ctl);
5001 
5002 	fp = DEVI(d)->devi_ops->devo_bus_ops->bus_ctl;
5003 	return ((*fp)(d, r, op, a, v));
5004 }
5005 
5006 /*
5007  * Called on a node when attach completes or the driver makes its first pm
5008  * call (whichever comes first).
5009  * In the attach case, device may not be power manageable at all.
5010  * Don't need to lock the dip because we're single threaded by the devfs code
5011  */
5012 static int
5013 pm_start(dev_info_t *dip)
5014 {
5015 	PMD_FUNC(pmf, "start")
5016 	int ret;
5017 	dev_info_t *pdip = ddi_get_parent(dip);
5018 	int e_pm_manage(dev_info_t *, int);
5019 	void pm_noinvol_specd(dev_info_t *dip);
5020 
5021 	e_pm_props(dip);
5022 	pm_noinvol_specd(dip);
5023 	/*
5024 	 * If this dip has already been processed, don't mess with it
5025 	 * (but decrement the speculative count we did above, as whatever
5026 	 * code put it under pm already will have dealt with it)
5027 	 */
5028 	if (PM_GET_PM_INFO(dip)) {
5029 		PMD(PMD_KIDSUP, ("%s: pm already done for %s@%s(%s#%d)\n",
5030 		    pmf, PM_DEVICE(dip)))
5031 		return (0);
5032 	}
5033 	ret = e_pm_manage(dip, PM_STYLE_UNKNOWN);
5034 
5035 	if (PM_GET_PM_INFO(dip) == NULL) {
5036 		/*
5037 		 * keep the kidsupcount increment as is
5038 		 */
5039 		DEVI(dip)->devi_pm_flags |= PMC_NOPMKID;
5040 		if (pdip && !PM_WANTS_NOTIFICATION(pdip)) {
5041 			pm_hold_power(pdip);
5042 		} else if (pdip && MDI_VHCI(pdip) && MDI_CLIENT(dip)) {
5043 			(void) mdi_power(pdip, MDI_PM_HOLD_POWER,
5044 			    (void *)dip, NULL, 0);
5045 		}
5046 
5047 		PMD(PMD_KIDSUP, ("%s: pm of %s@%s(%s#%d) failed, parent "
5048 		    "left up\n", pmf, PM_DEVICE(dip)))
5049 	}
5050 
5051 	return (ret);
5052 }
5053 
5054 /*
5055  * Keep a list of recorded thresholds.  For now we just keep a list and
5056  * search it linearly.  We don't expect too many entries.  Can always hash it
5057  * later if we need to.
5058  */
5059 void
5060 pm_record_thresh(pm_thresh_rec_t *rp)
5061 {
5062 	pm_thresh_rec_t *pptr, *ptr;
5063 
5064 	ASSERT(*rp->ptr_physpath);
5065 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
5066 	for (pptr = NULL, ptr = pm_thresh_head;
5067 	    ptr; pptr = ptr,  ptr = ptr->ptr_next) {
5068 		if (strcmp(rp->ptr_physpath, ptr->ptr_physpath) == 0) {
5069 			/* replace this one */
5070 			rp->ptr_next = ptr->ptr_next;
5071 			if (pptr) {
5072 				pptr->ptr_next = rp;
5073 			} else {
5074 				pm_thresh_head = rp;
5075 			}
5076 			rw_exit(&pm_thresh_rwlock);
5077 			kmem_free(ptr, ptr->ptr_size);
5078 			return;
5079 		}
5080 		continue;
5081 	}
5082 	/*
5083 	 * There was not a match in the list, insert this one in front
5084 	 */
5085 	if (pm_thresh_head) {
5086 		rp->ptr_next = pm_thresh_head;
5087 		pm_thresh_head = rp;
5088 	} else {
5089 		rp->ptr_next = NULL;
5090 		pm_thresh_head = rp;
5091 	}
5092 	rw_exit(&pm_thresh_rwlock);
5093 }
5094 
5095 /*
5096  * Create a new dependency record and hang a new dependency entry off of it
5097  */
5098 pm_pdr_t *
5099 newpdr(char *kept, char *keeps, int isprop)
5100 {
5101 	size_t size = strlen(kept) + strlen(keeps) + 2 + sizeof (pm_pdr_t);
5102 	pm_pdr_t *p = kmem_zalloc(size, KM_SLEEP);
5103 	p->pdr_size = size;
5104 	p->pdr_isprop = isprop;
5105 	p->pdr_kept_paths = NULL;
5106 	p->pdr_kept_count = 0;
5107 	p->pdr_kept = (char *)((intptr_t)p + sizeof (pm_pdr_t));
5108 	(void) strcpy(p->pdr_kept, kept);
5109 	p->pdr_keeper = (char *)((intptr_t)p->pdr_kept + strlen(kept) + 1);
5110 	(void) strcpy(p->pdr_keeper, keeps);
5111 	ASSERT((intptr_t)p->pdr_keeper + strlen(p->pdr_keeper) + 1 <=
5112 	    (intptr_t)p + size);
5113 	ASSERT((intptr_t)p->pdr_kept + strlen(p->pdr_kept) + 1 <=
5114 	    (intptr_t)p + size);
5115 	return (p);
5116 }
5117 
5118 /*
5119  * Keep a list of recorded dependencies.  We only keep the
5120  * keeper -> kept list for simplification. At this point We do not
5121  * care about whether the devices are attached or not yet,
5122  * this would be done in pm_keeper() and pm_kept().
5123  * If a PM_RESET_PM happens, then we tear down and forget the dependencies,
5124  * and it is up to the user to issue the ioctl again if they want it
5125  * (e.g. pmconfig)
5126  * Returns true if dependency already exists in the list.
5127  */
5128 int
5129 pm_record_keeper(char *kept, char *keeper, int isprop)
5130 {
5131 	PMD_FUNC(pmf, "record_keeper")
5132 	pm_pdr_t *npdr, *ppdr, *pdr;
5133 
5134 	PMD(PMD_KEEPS, ("%s: %s, %s\n", pmf, kept, keeper))
5135 	ASSERT(kept && keeper);
5136 #ifdef DEBUG
5137 	if (pm_debug & PMD_KEEPS)
5138 		prdeps("pm_record_keeper entry");
5139 #endif
5140 	for (ppdr = NULL, pdr = pm_dep_head; pdr;
5141 	    ppdr = pdr, pdr = pdr->pdr_next) {
5142 		PMD(PMD_KEEPS, ("%s: check %s, %s\n", pmf, pdr->pdr_kept,
5143 		    pdr->pdr_keeper))
5144 		if (strcmp(kept, pdr->pdr_kept) == 0 &&
5145 		    strcmp(keeper, pdr->pdr_keeper) == 0) {
5146 			PMD(PMD_KEEPS, ("%s: match\n", pmf))
5147 			return (1);
5148 		}
5149 	}
5150 	/*
5151 	 * We did not find any match, so we have to make an entry
5152 	 */
5153 	npdr = newpdr(kept, keeper, isprop);
5154 	if (ppdr) {
5155 		ASSERT(ppdr->pdr_next == NULL);
5156 		ppdr->pdr_next = npdr;
5157 	} else {
5158 		ASSERT(pm_dep_head == NULL);
5159 		pm_dep_head = npdr;
5160 	}
5161 #ifdef DEBUG
5162 	if (pm_debug & PMD_KEEPS)
5163 		prdeps("pm_record_keeper after new record");
5164 #endif
5165 	if (!isprop)
5166 		pm_unresolved_deps++;
5167 	else
5168 		pm_prop_deps++;
5169 	return (0);
5170 }
5171 
5172 /*
5173  * Look up this device in the set of devices we've seen ioctls for
5174  * to see if we are holding a threshold spec for it.  If so, make it so.
5175  * At ioctl time, we were given the physical path of the device.
5176  */
5177 int
5178 pm_thresh_specd(dev_info_t *dip)
5179 {
5180 	void pm_apply_recorded_thresh(dev_info_t *, pm_thresh_rec_t *);
5181 	char *path = 0;
5182 	char pathbuf[MAXNAMELEN];
5183 	pm_thresh_rec_t *rp;
5184 
5185 	path = ddi_pathname(dip, pathbuf);
5186 
5187 	rw_enter(&pm_thresh_rwlock, RW_READER);
5188 	for (rp = pm_thresh_head; rp; rp = rp->ptr_next) {
5189 		if (strcmp(rp->ptr_physpath, path) != 0)
5190 			continue;
5191 		pm_apply_recorded_thresh(dip, rp);
5192 		rw_exit(&pm_thresh_rwlock);
5193 		return (1);
5194 	}
5195 	rw_exit(&pm_thresh_rwlock);
5196 	return (0);
5197 }
5198 
5199 static int
5200 pm_set_keeping(dev_info_t *keeper, dev_info_t *kept)
5201 {
5202 	PMD_FUNC(pmf, "set_keeping")
5203 	int j, up = 0, circ;
5204 	void prdeps(char *);
5205 
5206 	PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), kept=%s@%s(%s#%d)\n", pmf,
5207 	    PM_DEVICE(keeper), PM_DEVICE(kept)))
5208 #ifdef DEBUG
5209 	if (pm_debug & PMD_KEEPS)
5210 		prdeps("Before PAD\n");
5211 #endif
5212 	ASSERT(keeper != kept);
5213 	if (PM_GET_PM_INFO(keeper) == NULL) {
5214 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5215 		    "%s@%s(%s#%d), but the former is not power managed",
5216 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5217 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: keeper %s@%s(%s#%d) is not"
5218 		    "power managed\n", pmf, PM_DEVICE(keeper)))
5219 		return (0);
5220 	}
5221 	if (PM_GET_PM_INFO(kept) == NULL) {
5222 		cmn_err(CE_CONT, "!device %s@%s(%s#%d) keeps up device "
5223 		    "%s@%s(%s#%d), but the latter is not power managed",
5224 		    PM_DEVICE(keeper), PM_DEVICE(kept));
5225 		PMD((PMD_FAIL | PMD_KEEPS), ("%s: kept %s@%s(%s#%d) is not"
5226 		    "power managed\n", pmf, PM_DEVICE(kept)))
5227 		return (0);
5228 	}
5229 
5230 	PM_LOCK_POWER(keeper, &circ);
5231 	for (j = 0; j < PM_NUMCMPTS(keeper); j++) {
5232 		if (PM_CURPOWER(keeper, j)) {
5233 			up++;
5234 			break;
5235 		}
5236 	}
5237 	if (up) {
5238 		/* Bringup and maintain a hold on the kept */
5239 		PMD(PMD_KEEPS, ("%s: place a hold on kept %s@%s(%s#%d)\n", pmf,
5240 		    PM_DEVICE(kept)))
5241 		bring_pmdep_up(kept, 1);
5242 	}
5243 	PM_UNLOCK_POWER(keeper, circ);
5244 #ifdef DEBUG
5245 	if (pm_debug & PMD_KEEPS)
5246 		prdeps("After PAD\n");
5247 #endif
5248 	return (1);
5249 }
5250 
5251 /*
5252  * Should this device keep up another device?
5253  * Look up this device in the set of devices we've seen ioctls for
5254  * to see if we are holding a dependency spec for it.  If so, make it so.
5255  * Because we require the kept device to be attached already in order to
5256  * make the list entry (and hold it), we only need to look for keepers.
5257  * At ioctl time, we were given the physical path of the device.
5258  */
5259 int
5260 pm_keeper(char *keeper)
5261 {
5262 	PMD_FUNC(pmf, "keeper")
5263 	int pm_apply_recorded_dep(dev_info_t *, pm_pdr_t *);
5264 	dev_info_t *dip;
5265 	pm_pdr_t *dp;
5266 	dev_info_t *kept = NULL;
5267 	int ret = 0;
5268 	int i;
5269 
5270 	if (!pm_unresolved_deps && !pm_prop_deps)
5271 		return (0);
5272 	ASSERT(keeper != NULL);
5273 	dip = pm_name_to_dip(keeper, 1);
5274 	if (dip == NULL)
5275 		return (0);
5276 	PMD(PMD_KEEPS, ("%s: keeper=%s\n", pmf, keeper))
5277 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5278 		if (!dp->pdr_isprop) {
5279 			if (!pm_unresolved_deps)
5280 				continue;
5281 			PMD(PMD_KEEPS, ("%s: keeper %s\n", pmf, dp->pdr_keeper))
5282 			if (dp->pdr_satisfied) {
5283 				PMD(PMD_KEEPS, ("%s: satisfied\n", pmf))
5284 				continue;
5285 			}
5286 			if (strcmp(dp->pdr_keeper, keeper) == 0) {
5287 				ret += pm_apply_recorded_dep(dip, dp);
5288 			}
5289 		} else {
5290 			if (strcmp(dp->pdr_keeper, keeper) != 0)
5291 				continue;
5292 			for (i = 0; i < dp->pdr_kept_count; i++) {
5293 				if (dp->pdr_kept_paths[i] == NULL)
5294 					continue;
5295 				kept = pm_name_to_dip(dp->pdr_kept_paths[i], 1);
5296 				if (kept == NULL)
5297 					continue;
5298 				ASSERT(ddi_prop_exists(DDI_DEV_T_ANY, kept,
5299 				    DDI_PROP_DONTPASS, dp->pdr_kept));
5300 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d), "
5301 				    "kept=%s@%s(%s#%d) keptcnt=%d\n",
5302 				    pmf, PM_DEVICE(dip), PM_DEVICE(kept),
5303 				    dp->pdr_kept_count))
5304 				if (kept != dip) {
5305 					ret += pm_set_keeping(dip, kept);
5306 				}
5307 				ddi_release_devi(kept);
5308 			}
5309 
5310 		}
5311 	}
5312 	ddi_release_devi(dip);
5313 	return (ret);
5314 }
5315 
5316 /*
5317  * Should this device be kept up by another device?
5318  * Look up all dependency recorded from PM_ADD_DEPENDENT and
5319  * PM_ADD_DEPENDENT_PROPERTY ioctls. Record down on the keeper's
5320  * kept device lists.
5321  */
5322 static int
5323 pm_kept(char *keptp)
5324 {
5325 	PMD_FUNC(pmf, "kept")
5326 	pm_pdr_t *dp;
5327 	int found = 0;
5328 	int ret = 0;
5329 	dev_info_t *keeper;
5330 	dev_info_t *kept;
5331 	size_t length;
5332 	int i;
5333 	char **paths;
5334 	char *path;
5335 
5336 	ASSERT(keptp != NULL);
5337 	kept = pm_name_to_dip(keptp, 1);
5338 	if (kept == NULL)
5339 		return (0);
5340 	PMD(PMD_KEEPS, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
5341 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
5342 		if (dp->pdr_isprop) {
5343 			PMD(PMD_KEEPS, ("%s: property %s\n", pmf, dp->pdr_kept))
5344 			if (ddi_prop_exists(DDI_DEV_T_ANY, kept,
5345 			    DDI_PROP_DONTPASS, dp->pdr_kept)) {
5346 				/*
5347 				 * Dont allow self dependency.
5348 				 */
5349 				if (strcmp(dp->pdr_keeper, keptp) == 0)
5350 					continue;
5351 				keeper = pm_name_to_dip(dp->pdr_keeper, 1);
5352 				if (keeper == NULL)
5353 					continue;
5354 				PMD(PMD_KEEPS, ("%s: adding to kepts path list "
5355 				    "%p\n", pmf, (void *)kept))
5356 #ifdef DEBUG
5357 				if (pm_debug & PMD_DEP)
5358 					prdeps("Before Adding from pm_kept\n");
5359 #endif
5360 				/*
5361 				 * Add ourselves to the dip list.
5362 				 */
5363 				if (dp->pdr_kept_count == 0) {
5364 					length = strlen(keptp) + 1;
5365 					path =
5366 					    kmem_alloc(length, KM_SLEEP);
5367 					paths = kmem_alloc(sizeof (char **),
5368 					    KM_SLEEP);
5369 					(void) strcpy(path, keptp);
5370 					paths[0] = path;
5371 					dp->pdr_kept_paths = paths;
5372 					dp->pdr_kept_count++;
5373 				} else {
5374 					/* Check to see if already on list */
5375 					for (i = 0; i < dp->pdr_kept_count;
5376 					    i++) {
5377 						if (strcmp(keptp,
5378 						    dp->pdr_kept_paths[i])
5379 						    == 0) {
5380 							found++;
5381 							break;
5382 						}
5383 					}
5384 					if (found) {
5385 						ddi_release_devi(keeper);
5386 						continue;
5387 					}
5388 					length = dp->pdr_kept_count *
5389 					    sizeof (char **);
5390 					paths = kmem_alloc(
5391 					    length + sizeof (char **),
5392 					    KM_SLEEP);
5393 					if (dp->pdr_kept_count) {
5394 						bcopy(dp->pdr_kept_paths,
5395 						    paths, length);
5396 						kmem_free(dp->pdr_kept_paths,
5397 						    length);
5398 					}
5399 					dp->pdr_kept_paths = paths;
5400 					length = strlen(keptp) + 1;
5401 					path =
5402 					    kmem_alloc(length, KM_SLEEP);
5403 					(void) strcpy(path, keptp);
5404 					dp->pdr_kept_paths[i] = path;
5405 					dp->pdr_kept_count++;
5406 				}
5407 #ifdef DEBUG
5408 				if (pm_debug & PMD_DEP)
5409 					prdeps("After from pm_kept\n");
5410 #endif
5411 				if (keeper) {
5412 					ret += pm_set_keeping(keeper, kept);
5413 					ddi_release_devi(keeper);
5414 				}
5415 			}
5416 		} else {
5417 			/*
5418 			 * pm_keeper would be called later to do
5419 			 * the actual pm_set_keeping.
5420 			 */
5421 			PMD(PMD_KEEPS, ("%s: adding to kepts path list %p\n",
5422 			    pmf, (void *)kept))
5423 #ifdef DEBUG
5424 			if (pm_debug & PMD_DEP)
5425 				prdeps("Before Adding from pm_kept\n");
5426 #endif
5427 			if (strcmp(keptp, dp->pdr_kept) == 0) {
5428 				if (dp->pdr_kept_paths == NULL) {
5429 					length = strlen(keptp) + 1;
5430 					path =
5431 					    kmem_alloc(length, KM_SLEEP);
5432 					paths = kmem_alloc(sizeof (char **),
5433 					    KM_SLEEP);
5434 					(void) strcpy(path, keptp);
5435 					paths[0] = path;
5436 					dp->pdr_kept_paths = paths;
5437 					dp->pdr_kept_count++;
5438 				}
5439 			}
5440 #ifdef DEBUG
5441 			if (pm_debug & PMD_DEP)
5442 				prdeps("After from pm_kept\n");
5443 #endif
5444 		}
5445 	}
5446 	ddi_release_devi(kept);
5447 	return (ret);
5448 }
5449 
5450 /*
5451  * Apply a recorded dependency.  dp specifies the dependency, and
5452  * keeper is already known to be the device that keeps up the other (kept) one.
5453  * We have to the whole tree for the "kept" device, then apply
5454  * the dependency (which may already be applied).
5455  */
5456 int
5457 pm_apply_recorded_dep(dev_info_t *keeper, pm_pdr_t *dp)
5458 {
5459 	PMD_FUNC(pmf, "apply_recorded_dep")
5460 	dev_info_t *kept = NULL;
5461 	int ret = 0;
5462 	char *keptp = NULL;
5463 
5464 	/*
5465 	 * Device to Device dependency can only be 1 to 1.
5466 	 */
5467 	if (dp->pdr_kept_paths == NULL)
5468 		return (0);
5469 	keptp = dp->pdr_kept_paths[0];
5470 	if (keptp == NULL)
5471 		return (0);
5472 	ASSERT(*keptp != '\0');
5473 	kept = pm_name_to_dip(keptp, 1);
5474 	if (kept == NULL)
5475 		return (0);
5476 	if (kept) {
5477 		PMD(PMD_KEEPS, ("%s: keeper=%s, kept=%s\n", pmf,
5478 		    dp->pdr_keeper, keptp))
5479 		if (pm_set_keeping(keeper, kept)) {
5480 			ASSERT(dp->pdr_satisfied == 0);
5481 			dp->pdr_satisfied = 1;
5482 			ASSERT(pm_unresolved_deps);
5483 			pm_unresolved_deps--;
5484 			ret++;
5485 		}
5486 	}
5487 	ddi_release_devi(kept);
5488 
5489 	return (ret);
5490 }
5491 
5492 /*
5493  * Called from common/io/pm.c
5494  */
5495 int
5496 pm_cur_power(pm_component_t *cp)
5497 {
5498 	return (cur_power(cp));
5499 }
5500 
5501 /*
5502  * External interface to sanity-check a power level.
5503  */
5504 int
5505 pm_valid_power(dev_info_t *dip, int comp, int level)
5506 {
5507 	PMD_FUNC(pmf, "valid_power")
5508 
5509 	if (comp >= 0 && comp < PM_NUMCMPTS(dip) && level >= 0)
5510 		return (e_pm_valid_power(dip, comp, level));
5511 	else {
5512 		PMD(PMD_FAIL, ("%s: comp=%d, ncomp=%d, level=%d\n",
5513 		    pmf, comp, PM_NUMCMPTS(dip), level))
5514 		return (0);
5515 	}
5516 }
5517 
5518 /*
5519  * Called when a device that is direct power managed needs to change state.
5520  * This routine arranges to block the request until the process managing
5521  * the device makes the change (or some other incompatible change) or
5522  * the process closes /dev/pm.
5523  */
5524 static int
5525 pm_block(dev_info_t *dip, int comp, int newpower, int oldpower)
5526 {
5527 	pm_rsvp_t *new = kmem_zalloc(sizeof (*new), KM_SLEEP);
5528 	int ret = 0;
5529 	void pm_dequeue_blocked(pm_rsvp_t *);
5530 	void pm_enqueue_blocked(pm_rsvp_t *);
5531 
5532 	ASSERT(!pm_processes_stopped);
5533 	ASSERT(PM_IAM_LOCKING_DIP(dip));
5534 	new->pr_dip = dip;
5535 	new->pr_comp = comp;
5536 	new->pr_newlevel = newpower;
5537 	new->pr_oldlevel = oldpower;
5538 	cv_init(&new->pr_cv, NULL, CV_DEFAULT, NULL);
5539 	mutex_enter(&pm_rsvp_lock);
5540 	pm_enqueue_blocked(new);
5541 	pm_enqueue_notify(PSC_PENDING_CHANGE, dip, comp, newpower, oldpower,
5542 	    PM_CANBLOCK_BLOCK);
5543 	PM_UNLOCK_DIP(dip);
5544 	/*
5545 	 * truss may make the cv_wait_sig return prematurely
5546 	 */
5547 	while (ret == 0) {
5548 		/*
5549 		 * Normally there will be no user context involved, but if
5550 		 * there is (e.g. we are here via an ioctl call to a driver)
5551 		 * then we should allow the process to abort the request,
5552 		 * or we get an unkillable process if the same thread does
5553 		 * PM_DIRECT_PM and pm_raise_power
5554 		 */
5555 		if (cv_wait_sig(&new->pr_cv, &pm_rsvp_lock) == 0) {
5556 			ret = PMP_FAIL;
5557 		} else {
5558 			ret = new->pr_retval;
5559 		}
5560 	}
5561 	pm_dequeue_blocked(new);
5562 	mutex_exit(&pm_rsvp_lock);
5563 	cv_destroy(&new->pr_cv);
5564 	kmem_free(new, sizeof (*new));
5565 	return (ret);
5566 }
5567 
5568 /*
5569  * Returns true if the process is interested in power level changes (has issued
5570  * PM_GET_STATE_CHANGE ioctl).
5571  */
5572 int
5573 pm_interest_registered(int clone)
5574 {
5575 	ASSERT(clone >= 0 && clone < PM_MAX_CLONE - 1);
5576 	return (pm_interest[clone]);
5577 }
5578 
5579 static void pm_enqueue_pscc(pscc_t *, pscc_t **);
5580 
5581 /*
5582  * Process with clone has just done PM_DIRECT_PM on dip, or has asked to
5583  * watch all state transitions (dip == NULL).  Set up data
5584  * structs to communicate with process about state changes.
5585  */
5586 void
5587 pm_register_watcher(int clone, dev_info_t *dip)
5588 {
5589 	pscc_t	*p;
5590 	psce_t	*psce;
5591 
5592 	/*
5593 	 * We definitely need a control struct, then we have to search to see
5594 	 * there is already an entries struct (in the dip != NULL case).
5595 	 */
5596 	pscc_t	*pscc = kmem_zalloc(sizeof (*pscc), KM_SLEEP);
5597 	pscc->pscc_clone = clone;
5598 	pscc->pscc_dip = dip;
5599 
5600 	if (dip) {
5601 		int found = 0;
5602 		rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5603 		for (p = pm_pscc_direct; p; p = p->pscc_next) {
5604 			/*
5605 			 * Already an entry for this clone, so just use it
5606 			 * for the new one (for the case where a single
5607 			 * process is watching multiple devices)
5608 			 */
5609 			if (p->pscc_clone == clone) {
5610 				pscc->pscc_entries = p->pscc_entries;
5611 				pscc->pscc_entries->psce_references++;
5612 				found++;
5613 				break;
5614 			}
5615 		}
5616 		if (!found) {		/* create a new one */
5617 			psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5618 			mutex_init(&psce->psce_lock, NULL, MUTEX_DEFAULT, NULL);
5619 			psce->psce_first =
5620 			    kmem_zalloc(sizeof (pm_state_change_t) * PSCCOUNT,
5621 			    KM_SLEEP);
5622 			psce->psce_in = psce->psce_out = psce->psce_first;
5623 			psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5624 			psce->psce_references = 1;
5625 			pscc->pscc_entries = psce;
5626 		}
5627 		pm_enqueue_pscc(pscc, &pm_pscc_direct);
5628 		rw_exit(&pm_pscc_direct_rwlock);
5629 	} else {
5630 		ASSERT(!pm_interest_registered(clone));
5631 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5632 #ifdef DEBUG
5633 		for (p = pm_pscc_interest; p; p = p->pscc_next) {
5634 			/*
5635 			 * Should not be an entry for this clone!
5636 			 */
5637 			ASSERT(p->pscc_clone != clone);
5638 		}
5639 #endif
5640 		psce = kmem_zalloc(sizeof (psce_t), KM_SLEEP);
5641 		psce->psce_first = kmem_zalloc(sizeof (pm_state_change_t) *
5642 		    PSCCOUNT, KM_SLEEP);
5643 		psce->psce_in = psce->psce_out = psce->psce_first;
5644 		psce->psce_last = &psce->psce_first[PSCCOUNT - 1];
5645 		psce->psce_references = 1;
5646 		pscc->pscc_entries = psce;
5647 		pm_enqueue_pscc(pscc, &pm_pscc_interest);
5648 		pm_interest[clone] = 1;
5649 		rw_exit(&pm_pscc_interest_rwlock);
5650 	}
5651 }
5652 
5653 /*
5654  * Remove the given entry from the blocked list
5655  */
5656 void
5657 pm_dequeue_blocked(pm_rsvp_t *p)
5658 {
5659 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5660 	if (pm_blocked_list == p) {
5661 		ASSERT(p->pr_prev == NULL);
5662 		if (p->pr_next != NULL)
5663 			p->pr_next->pr_prev = NULL;
5664 		pm_blocked_list = p->pr_next;
5665 	} else {
5666 		ASSERT(p->pr_prev != NULL);
5667 		p->pr_prev->pr_next = p->pr_next;
5668 		if (p->pr_next != NULL)
5669 			p->pr_next->pr_prev = p->pr_prev;
5670 	}
5671 }
5672 
5673 /*
5674  * Remove the given control struct from the given list
5675  */
5676 static void
5677 pm_dequeue_pscc(pscc_t *p, pscc_t **list)
5678 {
5679 	if (*list == p) {
5680 		ASSERT(p->pscc_prev == NULL);
5681 		if (p->pscc_next != NULL)
5682 			p->pscc_next->pscc_prev = NULL;
5683 		*list = p->pscc_next;
5684 	} else {
5685 		ASSERT(p->pscc_prev != NULL);
5686 		p->pscc_prev->pscc_next = p->pscc_next;
5687 		if (p->pscc_next != NULL)
5688 			p->pscc_next->pscc_prev = p->pscc_prev;
5689 	}
5690 }
5691 
5692 /*
5693  * Stick the control struct specified on the front of the list
5694  */
5695 static void
5696 pm_enqueue_pscc(pscc_t *p, pscc_t **list)
5697 {
5698 	pscc_t *h;	/* entry at head of list */
5699 	if ((h = *list) == NULL) {
5700 		*list = p;
5701 		ASSERT(p->pscc_next == NULL);
5702 		ASSERT(p->pscc_prev == NULL);
5703 	} else {
5704 		p->pscc_next = h;
5705 		ASSERT(h->pscc_prev == NULL);
5706 		h->pscc_prev = p;
5707 		ASSERT(p->pscc_prev == NULL);
5708 		*list = p;
5709 	}
5710 }
5711 
5712 /*
5713  * If dip is NULL, process is closing "clone" clean up all its registrations.
5714  * Otherwise only clean up those for dip because process is just giving up
5715  * control of a direct device.
5716  */
5717 void
5718 pm_deregister_watcher(int clone, dev_info_t *dip)
5719 {
5720 	pscc_t	*p, *pn;
5721 	psce_t	*psce;
5722 	int found = 0;
5723 
5724 	if (dip == NULL) {
5725 		rw_enter(&pm_pscc_interest_rwlock, RW_WRITER);
5726 		for (p = pm_pscc_interest; p; p = pn) {
5727 			pn = p->pscc_next;
5728 			if (p->pscc_clone == clone) {
5729 				pm_dequeue_pscc(p, &pm_pscc_interest);
5730 				psce = p->pscc_entries;
5731 				ASSERT(psce->psce_references == 1);
5732 				mutex_destroy(&psce->psce_lock);
5733 				kmem_free(psce->psce_first,
5734 				    sizeof (pm_state_change_t) * PSCCOUNT);
5735 				kmem_free(psce, sizeof (*psce));
5736 				kmem_free(p, sizeof (*p));
5737 			}
5738 		}
5739 		pm_interest[clone] = 0;
5740 		rw_exit(&pm_pscc_interest_rwlock);
5741 	}
5742 	found = 0;
5743 	rw_enter(&pm_pscc_direct_rwlock, RW_WRITER);
5744 	for (p = pm_pscc_direct; p; p = pn) {
5745 		pn = p->pscc_next;
5746 		if ((dip && p->pscc_dip == dip) ||
5747 		    (dip == NULL && clone == p->pscc_clone)) {
5748 			ASSERT(clone == p->pscc_clone);
5749 			found++;
5750 			/*
5751 			 * Remove from control list
5752 			 */
5753 			pm_dequeue_pscc(p, &pm_pscc_direct);
5754 			/*
5755 			 * If we're the last reference, free the
5756 			 * entries struct.
5757 			 */
5758 			psce = p->pscc_entries;
5759 			ASSERT(psce);
5760 			if (psce->psce_references == 1) {
5761 				kmem_free(psce->psce_first,
5762 				    PSCCOUNT * sizeof (pm_state_change_t));
5763 				kmem_free(psce, sizeof (*psce));
5764 			} else {
5765 				psce->psce_references--;
5766 			}
5767 			kmem_free(p, sizeof (*p));
5768 		}
5769 	}
5770 	ASSERT(dip == NULL || found);
5771 	rw_exit(&pm_pscc_direct_rwlock);
5772 }
5773 
5774 /*
5775  * Search the indicated list for an entry that matches clone, and return a
5776  * pointer to it.  To be interesting, the entry must have something ready to
5777  * be passed up to the controlling process.
5778  * The returned entry will be locked upon return from this call.
5779  */
5780 static psce_t *
5781 pm_psc_find_clone(int clone, pscc_t **list, krwlock_t *lock)
5782 {
5783 	pscc_t	*p;
5784 	psce_t	*psce;
5785 	rw_enter(lock, RW_READER);
5786 	for (p = *list; p; p = p->pscc_next) {
5787 		if (clone == p->pscc_clone) {
5788 			psce = p->pscc_entries;
5789 			mutex_enter(&psce->psce_lock);
5790 			if (psce->psce_out->size) {
5791 				rw_exit(lock);
5792 				return (psce);
5793 			} else {
5794 				mutex_exit(&psce->psce_lock);
5795 			}
5796 		}
5797 	}
5798 	rw_exit(lock);
5799 	return (NULL);
5800 }
5801 
5802 static psce_t *pm_psc_find_clone(int, pscc_t **, krwlock_t *);
5803 /*
5804  * Find an entry for a particular clone in the direct list.
5805  */
5806 psce_t *
5807 pm_psc_clone_to_direct(int clone)
5808 {
5809 	return (pm_psc_find_clone(clone, &pm_pscc_direct,
5810 	    &pm_pscc_direct_rwlock));
5811 }
5812 
5813 /*
5814  * Find an entry for a particular clone in the interest list.
5815  */
5816 psce_t *
5817 pm_psc_clone_to_interest(int clone)
5818 {
5819 	return (pm_psc_find_clone(clone, &pm_pscc_interest,
5820 	    &pm_pscc_interest_rwlock));
5821 }
5822 
5823 /*
5824  * Put the given entry at the head of the blocked list
5825  */
5826 void
5827 pm_enqueue_blocked(pm_rsvp_t *p)
5828 {
5829 	ASSERT(MUTEX_HELD(&pm_rsvp_lock));
5830 	ASSERT(p->pr_next == NULL);
5831 	ASSERT(p->pr_prev == NULL);
5832 	if (pm_blocked_list != NULL) {
5833 		p->pr_next = pm_blocked_list;
5834 		ASSERT(pm_blocked_list->pr_prev == NULL);
5835 		pm_blocked_list->pr_prev = p;
5836 		pm_blocked_list = p;
5837 	} else {
5838 		pm_blocked_list = p;
5839 	}
5840 }
5841 
5842 /*
5843  * Sets every power managed device back to its default threshold
5844  */
5845 void
5846 pm_all_to_default_thresholds(void)
5847 {
5848 	ddi_walk_devs(ddi_root_node(), pm_set_dev_thr_walk,
5849 	    (void *) &pm_system_idle_threshold);
5850 }
5851 
5852 static int
5853 pm_set_dev_thr_walk(dev_info_t *dip, void *arg)
5854 {
5855 	int thr = (int)(*(int *)arg);
5856 
5857 	if (!PM_GET_PM_INFO(dip))
5858 		return (DDI_WALK_CONTINUE);
5859 	pm_set_device_threshold(dip, thr, PMC_DEF_THRESH);
5860 	return (DDI_WALK_CONTINUE);
5861 }
5862 
5863 /*
5864  * Returns the current threshold value (in seconds) for the indicated component
5865  */
5866 int
5867 pm_current_threshold(dev_info_t *dip, int comp, int *threshp)
5868 {
5869 	if (comp < 0 || comp >= PM_NUMCMPTS(dip)) {
5870 		return (DDI_FAILURE);
5871 	} else {
5872 		*threshp = cur_threshold(dip, comp);
5873 		return (DDI_SUCCESS);
5874 	}
5875 }
5876 
5877 /*
5878  * To be called when changing the power level of a component of a device.
5879  * On some platforms, changing power on one device may require that power
5880  * be changed on other, related devices in the same transaction.  Thus, we
5881  * always pass this request to the platform power manager so that all the
5882  * affected devices will be locked.
5883  */
5884 void
5885 pm_lock_power(dev_info_t *dip, int *circp)
5886 {
5887 	power_req_t power_req;
5888 	int result;
5889 
5890 	power_req.request_type = PMR_PPM_LOCK_POWER;
5891 	power_req.req.ppm_lock_power_req.who = dip;
5892 	power_req.req.ppm_lock_power_req.circp = circp;
5893 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5894 }
5895 
5896 /*
5897  * Release the lock (or locks) acquired to change the power of a device.
5898  * See comments for pm_lock_power.
5899  */
5900 void
5901 pm_unlock_power(dev_info_t *dip, int circ)
5902 {
5903 	power_req_t power_req;
5904 	int result;
5905 
5906 	power_req.request_type = PMR_PPM_UNLOCK_POWER;
5907 	power_req.req.ppm_unlock_power_req.who = dip;
5908 	power_req.req.ppm_unlock_power_req.circ = circ;
5909 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5910 }
5911 
5912 
5913 /*
5914  * Attempt (without blocking) to acquire the lock(s) needed to change the
5915  * power of a component of a device.  See comments for pm_lock_power.
5916  *
5917  * Return: 1 if lock(s) acquired, 0 if not.
5918  */
5919 int
5920 pm_try_locking_power(dev_info_t *dip, int *circp)
5921 {
5922 	power_req_t power_req;
5923 	int result;
5924 
5925 	power_req.request_type = PMR_PPM_TRY_LOCK_POWER;
5926 	power_req.req.ppm_lock_power_req.who = dip;
5927 	power_req.req.ppm_lock_power_req.circp = circp;
5928 	(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req, &result);
5929 	return (result);
5930 }
5931 
5932 
5933 /*
5934  * Lock power state of a device.
5935  *
5936  * The implementation handles a special case where another thread may have
5937  * acquired the lock and created/launched this thread to do the work.  If
5938  * the lock cannot be acquired immediately, we check to see if this thread
5939  * is registered as a borrower of the lock.  If so, we may proceed without
5940  * the lock.  This assumes that the lending thread blocks on the completion
5941  * of this thread.
5942  *
5943  * Note 1: for use by ppm only.
5944  *
5945  * Note 2: On failing to get the lock immediately, we search lock_loan list
5946  * for curthread (as borrower of the lock).  On a hit, we check that the
5947  * lending thread already owns the lock we want.  It is safe to compare
5948  * devi_busy_thread and thread id of the lender because in the == case (the
5949  * only one we care about) we know that the owner is blocked.  Similarly,
5950  * If we find that curthread isn't registered as a lock borrower, it is safe
5951  * to use the blocking call (ndi_devi_enter) because we know that if we
5952  * weren't already listed as a borrower (upstream on the call stack) we won't
5953  * become one.
5954  */
5955 void
5956 pm_lock_power_single(dev_info_t *dip, int *circp)
5957 {
5958 	lock_loan_t *cur;
5959 
5960 	/* if the lock is available, we are done. */
5961 	if (ndi_devi_tryenter(dip, circp))
5962 		return;
5963 
5964 	mutex_enter(&pm_loan_lock);
5965 	/* see if our thread is registered as a lock borrower. */
5966 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
5967 		if (cur->pmlk_borrower == curthread)
5968 			break;
5969 	mutex_exit(&pm_loan_lock);
5970 
5971 	/* if this thread not already registered, it is safe to block */
5972 	if (cur == NULL)
5973 		ndi_devi_enter(dip, circp);
5974 	else {
5975 		/* registered: does lender own the lock we want? */
5976 		if (cur->pmlk_lender == DEVI(dip)->devi_busy_thread) {
5977 			ASSERT(cur->pmlk_dip == NULL || cur->pmlk_dip == dip);
5978 			cur->pmlk_dip = dip;
5979 		} else /* no: just block for it */
5980 			ndi_devi_enter(dip, circp);
5981 
5982 	}
5983 }
5984 
5985 /*
5986  * Drop the lock on the device's power state.  See comment for
5987  * pm_lock_power_single() for special implementation considerations.
5988  *
5989  * Note: for use by ppm only.
5990  */
5991 void
5992 pm_unlock_power_single(dev_info_t *dip, int circ)
5993 {
5994 	lock_loan_t *cur;
5995 
5996 	/* optimization: mutex not needed to check empty list */
5997 	if (lock_loan_head.pmlk_next == NULL) {
5998 		ndi_devi_exit(dip, circ);
5999 		return;
6000 	}
6001 
6002 	mutex_enter(&pm_loan_lock);
6003 	/* see if our thread is registered as a lock borrower. */
6004 	for (cur = lock_loan_head.pmlk_next; cur; cur = cur->pmlk_next)
6005 		if (cur->pmlk_borrower == curthread)
6006 			break;
6007 	mutex_exit(&pm_loan_lock);
6008 
6009 	if (cur == NULL || cur->pmlk_dip != dip)
6010 		/* we acquired the lock directly, so return it */
6011 		ndi_devi_exit(dip, circ);
6012 }
6013 
6014 /*
6015  * Try to take the lock for changing the power level of a component.
6016  *
6017  * Note: for use by ppm only.
6018  */
6019 int
6020 pm_try_locking_power_single(dev_info_t *dip, int *circp)
6021 {
6022 	return (ndi_devi_tryenter(dip, circp));
6023 }
6024 
6025 #ifdef	DEBUG
6026 /*
6027  * The following are used only to print out data structures for debugging
6028  */
6029 void
6030 prdeps(char *msg)
6031 {
6032 
6033 	pm_pdr_t *rp;
6034 	int i;
6035 
6036 	pm_log("pm_dep_head %s %p\n", msg, (void *)pm_dep_head);
6037 	for (rp = pm_dep_head; rp; rp = rp->pdr_next) {
6038 		pm_log("%p: %s keeper %s, kept %s, kept count %d, next %p\n",
6039 		    (void *)rp, (rp->pdr_isprop ? "property" : "device"),
6040 		    rp->pdr_keeper, rp->pdr_kept, rp->pdr_kept_count,
6041 		    (void *)rp->pdr_next);
6042 		if (rp->pdr_kept_count != 0) {
6043 			pm_log("kept list = ");
6044 			i = 0;
6045 			while (i < rp->pdr_kept_count) {
6046 				pm_log("%s ", rp->pdr_kept_paths[i]);
6047 				i++;
6048 			}
6049 			pm_log("\n");
6050 		}
6051 	}
6052 }
6053 
6054 void
6055 pr_noinvol(char *hdr)
6056 {
6057 	pm_noinvol_t *ip;
6058 
6059 	pm_log("%s\n", hdr);
6060 	rw_enter(&pm_noinvol_rwlock, RW_READER);
6061 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next)
6062 		pm_log("\tmaj %d, flags %x, noinvolpm %d %s\n",
6063 		    ip->ni_major, ip->ni_flags, ip->ni_noinvolpm, ip->ni_path);
6064 	rw_exit(&pm_noinvol_rwlock);
6065 }
6066 #endif
6067 
6068 /*
6069  * Attempt to apply the thresholds indicated by rp to the node specified by
6070  * dip.
6071  */
6072 void
6073 pm_apply_recorded_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6074 {
6075 	PMD_FUNC(pmf, "apply_recorded_thresh")
6076 	int i, j;
6077 	int comps = PM_NUMCMPTS(dip);
6078 	struct pm_component *cp;
6079 	pm_pte_t *ep;
6080 	int pm_valid_thresh(dev_info_t *, pm_thresh_rec_t *);
6081 
6082 	PMD(PMD_THRESH, ("%s: part: %s@%s(%s#%d), rp %p, %s\n", pmf,
6083 	    PM_DEVICE(dip), (void *)rp, rp->ptr_physpath))
6084 	PM_LOCK_DIP(dip);
6085 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip) || !pm_valid_thresh(dip, rp)) {
6086 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_GET_PM_INFO %p\n",
6087 		    pmf, PM_DEVICE(dip), (void*)PM_GET_PM_INFO(dip)))
6088 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) PM_ISBC %d\n",
6089 		    pmf, PM_DEVICE(dip), PM_ISBC(dip)))
6090 		PMD(PMD_FAIL, ("%s: part: %s@%s(%s#%d) pm_valid_thresh %d\n",
6091 		    pmf, PM_DEVICE(dip), pm_valid_thresh(dip, rp)))
6092 		PM_UNLOCK_DIP(dip);
6093 		return;
6094 	}
6095 
6096 	ep = rp->ptr_entries;
6097 	/*
6098 	 * Here we do the special case of a device threshold
6099 	 */
6100 	if (rp->ptr_numcomps == 0) {	/* PM_SET_DEVICE_THRESHOLD product */
6101 		ASSERT(ep && ep->pte_numthresh == 1);
6102 		PMD(PMD_THRESH, ("%s: set dev thr %s@%s(%s#%d) to 0x%x\n",
6103 		    pmf, PM_DEVICE(dip), ep->pte_thresh[0]))
6104 		PM_UNLOCK_DIP(dip);
6105 		pm_set_device_threshold(dip, ep->pte_thresh[0], PMC_DEV_THRESH);
6106 		if (PM_SCANABLE(dip))
6107 			pm_rescan(dip);
6108 		return;
6109 	}
6110 	for (i = 0; i < comps; i++) {
6111 		cp = PM_CP(dip, i);
6112 		for (j = 0; j < ep->pte_numthresh; j++) {
6113 			PMD(PMD_THRESH, ("%s: set thr %d for %s@%s(%s#%d)[%d] "
6114 			    "to %x\n", pmf, j, PM_DEVICE(dip),
6115 			    i, ep->pte_thresh[j]))
6116 			cp->pmc_comp.pmc_thresh[j + 1] = ep->pte_thresh[j];
6117 		}
6118 		ep++;
6119 	}
6120 	DEVI(dip)->devi_pm_flags &= PMC_THRESH_NONE;
6121 	DEVI(dip)->devi_pm_flags |= PMC_COMP_THRESH;
6122 	PM_UNLOCK_DIP(dip);
6123 
6124 	if (PM_SCANABLE(dip))
6125 		pm_rescan(dip);
6126 }
6127 
6128 /*
6129  * Returns true if the threshold specified by rp could be applied to dip
6130  * (that is, the number of components and transitions are the same)
6131  */
6132 int
6133 pm_valid_thresh(dev_info_t *dip, pm_thresh_rec_t *rp)
6134 {
6135 	PMD_FUNC(pmf, "valid_thresh")
6136 	int comps, i;
6137 	pm_component_t *cp;
6138 	pm_pte_t *ep;
6139 
6140 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip)) {
6141 		PMD(PMD_ERROR, ("%s: %s: no pm_info or BC\n", pmf,
6142 		    rp->ptr_physpath))
6143 		return (0);
6144 	}
6145 	/*
6146 	 * Special case: we represent the PM_SET_DEVICE_THRESHOLD case by
6147 	 * an entry with numcomps == 0, (since we don't know how many
6148 	 * components there are in advance).  This is always a valid
6149 	 * spec.
6150 	 */
6151 	if (rp->ptr_numcomps == 0) {
6152 		ASSERT(rp->ptr_entries && rp->ptr_entries->pte_numthresh == 1);
6153 		return (1);
6154 	}
6155 	if (rp->ptr_numcomps != (comps = PM_NUMCMPTS(dip))) {
6156 		PMD(PMD_ERROR, ("%s: comp # mm (dip %d cmd %d) for %s\n",
6157 		    pmf, PM_NUMCMPTS(dip), rp->ptr_numcomps, rp->ptr_physpath))
6158 		return (0);
6159 	}
6160 	ep = rp->ptr_entries;
6161 	for (i = 0; i < comps; i++) {
6162 		cp = PM_CP(dip, i);
6163 		if ((ep + i)->pte_numthresh !=
6164 		    cp->pmc_comp.pmc_numlevels - 1) {
6165 			PMD(PMD_ERROR, ("%s: %s[%d]: thresh=%d, record=%d\n",
6166 			    pmf, rp->ptr_physpath, i,
6167 			    cp->pmc_comp.pmc_numlevels - 1,
6168 			    (ep + i)->pte_numthresh))
6169 			return (0);
6170 		}
6171 	}
6172 	return (1);
6173 }
6174 
6175 /*
6176  * Remove any recorded threshold for device physpath
6177  * We know there will be at most one.
6178  */
6179 void
6180 pm_unrecord_threshold(char *physpath)
6181 {
6182 	pm_thresh_rec_t *pptr, *ptr;
6183 
6184 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6185 	for (pptr = NULL, ptr = pm_thresh_head; ptr; ptr = ptr->ptr_next) {
6186 		if (strcmp(physpath, ptr->ptr_physpath) == 0) {
6187 			if (pptr) {
6188 				pptr->ptr_next = ptr->ptr_next;
6189 			} else {
6190 				ASSERT(pm_thresh_head == ptr);
6191 				pm_thresh_head = ptr->ptr_next;
6192 			}
6193 			kmem_free(ptr, ptr->ptr_size);
6194 			break;
6195 		}
6196 		pptr = ptr;
6197 	}
6198 	rw_exit(&pm_thresh_rwlock);
6199 }
6200 
6201 /*
6202  * Discard all recorded thresholds.  We are returning to the default pm state.
6203  */
6204 void
6205 pm_discard_thresholds(void)
6206 {
6207 	pm_thresh_rec_t *rp;
6208 	rw_enter(&pm_thresh_rwlock, RW_WRITER);
6209 	while (pm_thresh_head) {
6210 		rp = pm_thresh_head;
6211 		pm_thresh_head = rp->ptr_next;
6212 		kmem_free(rp, rp->ptr_size);
6213 	}
6214 	rw_exit(&pm_thresh_rwlock);
6215 }
6216 
6217 /*
6218  * Discard all recorded dependencies.  We are returning to the default pm state.
6219  */
6220 void
6221 pm_discard_dependencies(void)
6222 {
6223 	pm_pdr_t *rp;
6224 	int i;
6225 	size_t length;
6226 
6227 #ifdef DEBUG
6228 	if (pm_debug & PMD_DEP)
6229 		prdeps("Before discard\n");
6230 #endif
6231 	ddi_walk_devs(ddi_root_node(), pm_discard_dep_walk, NULL);
6232 
6233 #ifdef DEBUG
6234 	if (pm_debug & PMD_DEP)
6235 		prdeps("After discard\n");
6236 #endif
6237 	while (pm_dep_head) {
6238 		rp = pm_dep_head;
6239 		if (!rp->pdr_isprop) {
6240 			ASSERT(rp->pdr_satisfied == 0);
6241 			ASSERT(pm_unresolved_deps);
6242 			pm_unresolved_deps--;
6243 		} else {
6244 			ASSERT(pm_prop_deps);
6245 			pm_prop_deps--;
6246 		}
6247 		pm_dep_head = rp->pdr_next;
6248 		if (rp->pdr_kept_count)  {
6249 			for (i = 0; i < rp->pdr_kept_count; i++) {
6250 				length = strlen(rp->pdr_kept_paths[i]) + 1;
6251 				kmem_free(rp->pdr_kept_paths[i], length);
6252 			}
6253 			kmem_free(rp->pdr_kept_paths,
6254 			    rp->pdr_kept_count * sizeof (char **));
6255 		}
6256 		kmem_free(rp, rp->pdr_size);
6257 	}
6258 }
6259 
6260 
6261 static int
6262 pm_discard_dep_walk(dev_info_t *dip, void *arg)
6263 {
6264 	_NOTE(ARGUNUSED(arg))
6265 	char *pathbuf;
6266 
6267 	if (PM_GET_PM_INFO(dip) == NULL)
6268 		return (DDI_WALK_CONTINUE);
6269 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6270 	(void) ddi_pathname(dip, pathbuf);
6271 	pm_free_keeper(pathbuf, 0);
6272 	kmem_free(pathbuf, MAXPATHLEN);
6273 	return (DDI_WALK_CONTINUE);
6274 }
6275 
6276 static int
6277 pm_kept_walk(dev_info_t *dip, void *arg)
6278 {
6279 	_NOTE(ARGUNUSED(arg))
6280 	char *pathbuf;
6281 
6282 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6283 	(void) ddi_pathname(dip, pathbuf);
6284 	(void) pm_kept(pathbuf);
6285 	kmem_free(pathbuf, MAXPATHLEN);
6286 
6287 	return (DDI_WALK_CONTINUE);
6288 }
6289 
6290 static int
6291 pm_keeper_walk(dev_info_t *dip, void *arg)
6292 {
6293 	_NOTE(ARGUNUSED(arg))
6294 	char *pathbuf;
6295 
6296 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6297 	(void) ddi_pathname(dip, pathbuf);
6298 	(void) pm_keeper(pathbuf);
6299 	kmem_free(pathbuf, MAXPATHLEN);
6300 
6301 	return (DDI_WALK_CONTINUE);
6302 }
6303 
6304 static char *
6305 pdw_type_decode(int type)
6306 {
6307 	switch (type) {
6308 	case PM_DEP_WK_POWER_ON:
6309 		return ("power on");
6310 	case PM_DEP_WK_POWER_OFF:
6311 		return ("power off");
6312 	case PM_DEP_WK_DETACH:
6313 		return ("detach");
6314 	case PM_DEP_WK_REMOVE_DEP:
6315 		return ("remove dep");
6316 	case PM_DEP_WK_BRINGUP_SELF:
6317 		return ("bringup self");
6318 	case PM_DEP_WK_RECORD_KEEPER:
6319 		return ("add dependent");
6320 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6321 		return ("add dependent property");
6322 	case PM_DEP_WK_KEPT:
6323 		return ("kept");
6324 	case PM_DEP_WK_KEEPER:
6325 		return ("keeper");
6326 	case PM_DEP_WK_ATTACH:
6327 		return ("attach");
6328 	case PM_DEP_WK_CHECK_KEPT:
6329 		return ("check kept");
6330 	case PM_DEP_WK_CPR_SUSPEND:
6331 		return ("suspend");
6332 	case PM_DEP_WK_CPR_RESUME:
6333 		return ("resume");
6334 	default:
6335 		return ("unknown");
6336 	}
6337 
6338 }
6339 
6340 static void
6341 pm_rele_dep(char *keeper)
6342 {
6343 	PMD_FUNC(pmf, "rele_dep")
6344 	pm_pdr_t *dp;
6345 	char *kept_path = NULL;
6346 	dev_info_t *kept = NULL;
6347 	int count = 0;
6348 
6349 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6350 		if (strcmp(dp->pdr_keeper, keeper) != 0)
6351 			continue;
6352 		for (count = 0; count < dp->pdr_kept_count; count++) {
6353 			kept_path = dp->pdr_kept_paths[count];
6354 			if (kept_path == NULL)
6355 				continue;
6356 			kept = pm_name_to_dip(kept_path, 1);
6357 			if (kept) {
6358 				PMD(PMD_KEEPS, ("%s: release kept=%s@%s(%s#%d) "
6359 				    "of keeper=%s\n", pmf, PM_DEVICE(kept),
6360 				    keeper))
6361 				ASSERT(DEVI(kept)->devi_pm_kidsupcnt > 0);
6362 				pm_rele_power(kept);
6363 				ddi_release_devi(kept);
6364 			}
6365 		}
6366 	}
6367 }
6368 
6369 /*
6370  * Called when we are just released from direct PM.  Bring ourself up
6371  * if our keeper is up since dependency is not honored while a kept
6372  * device is under direct PM.
6373  */
6374 static void
6375 pm_bring_self_up(char *keptpath)
6376 {
6377 	PMD_FUNC(pmf, "bring_self_up")
6378 	dev_info_t *kept;
6379 	dev_info_t *keeper;
6380 	pm_pdr_t *dp;
6381 	int i, j;
6382 	int up = 0, circ;
6383 
6384 	kept = pm_name_to_dip(keptpath, 1);
6385 	if (kept == NULL)
6386 		return;
6387 	PMD(PMD_KEEPS, ("%s: kept=%s@%s(%s#%d)\n", pmf, PM_DEVICE(kept)))
6388 	for (dp = pm_dep_head; dp; dp = dp->pdr_next) {
6389 		if (dp->pdr_kept_count == 0)
6390 			continue;
6391 		for (i = 0; i < dp->pdr_kept_count; i++) {
6392 			if (strcmp(dp->pdr_kept_paths[i], keptpath) != 0)
6393 				continue;
6394 			keeper = pm_name_to_dip(dp->pdr_keeper, 1);
6395 			if (keeper) {
6396 				PMD(PMD_KEEPS, ("%s: keeper=%s@%s(%s#%d)\n",
6397 				    pmf, PM_DEVICE(keeper)))
6398 				PM_LOCK_POWER(keeper, &circ);
6399 				for (j = 0; j < PM_NUMCMPTS(keeper);
6400 				    j++) {
6401 					if (PM_CURPOWER(keeper, j)) {
6402 						PMD(PMD_KEEPS, ("%s: comp="
6403 						    "%d is up\n", pmf, j))
6404 						up++;
6405 					}
6406 				}
6407 				if (up) {
6408 					if (PM_SKBU(kept))
6409 						DEVI(kept)->devi_pm_flags &=
6410 						    ~PMC_SKIP_BRINGUP;
6411 					bring_pmdep_up(kept, 1);
6412 				}
6413 				PM_UNLOCK_POWER(keeper, circ);
6414 				ddi_release_devi(keeper);
6415 			}
6416 		}
6417 	}
6418 	ddi_release_devi(kept);
6419 }
6420 
6421 static void
6422 pm_process_dep_request(pm_dep_wk_t *work)
6423 {
6424 	PMD_FUNC(pmf, "dep_req")
6425 	int ret;
6426 
6427 	PMD(PMD_DEP, ("%s: work=%s\n", pmf,
6428 	    pdw_type_decode(work->pdw_type)))
6429 	PMD(PMD_DEP, ("%s: keeper=%s, kept=%s\n", pmf,
6430 	    (work->pdw_keeper ? work->pdw_keeper : "NULL"),
6431 	    (work->pdw_kept ? work->pdw_kept : "NULL")))
6432 
6433 	switch (work->pdw_type) {
6434 	case PM_DEP_WK_POWER_ON:
6435 		/* Bring up the kept devices and put a hold on them */
6436 		bring_wekeeps_up(work->pdw_keeper);
6437 		break;
6438 	case PM_DEP_WK_POWER_OFF:
6439 		/* Release the kept devices */
6440 		pm_rele_dep(work->pdw_keeper);
6441 		break;
6442 	case PM_DEP_WK_DETACH:
6443 		pm_free_keeps(work->pdw_keeper, work->pdw_pwr);
6444 		break;
6445 	case PM_DEP_WK_REMOVE_DEP:
6446 		pm_discard_dependencies();
6447 		break;
6448 	case PM_DEP_WK_BRINGUP_SELF:
6449 		/*
6450 		 * We deferred satisfying our dependency till now, so satisfy
6451 		 * it again and bring ourselves up.
6452 		 */
6453 		pm_bring_self_up(work->pdw_kept);
6454 		break;
6455 	case PM_DEP_WK_RECORD_KEEPER:
6456 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 0);
6457 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6458 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6459 		break;
6460 	case PM_DEP_WK_RECORD_KEEPER_PROP:
6461 		(void) pm_record_keeper(work->pdw_kept, work->pdw_keeper, 1);
6462 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6463 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6464 		break;
6465 	case PM_DEP_WK_KEPT:
6466 		ret = pm_kept(work->pdw_kept);
6467 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEPT: pm_kept returns %d\n", pmf,
6468 		    ret))
6469 		break;
6470 	case PM_DEP_WK_KEEPER:
6471 		ret = pm_keeper(work->pdw_keeper);
6472 		PMD(PMD_DEP, ("%s: PM_DEP_WK_KEEPER: pm_keeper returns %d\n",
6473 		    pmf, ret))
6474 		break;
6475 	case PM_DEP_WK_ATTACH:
6476 		ret = pm_keeper(work->pdw_keeper);
6477 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_keeper returns %d\n",
6478 		    pmf, ret))
6479 		ret = pm_kept(work->pdw_kept);
6480 		PMD(PMD_DEP, ("%s: PM_DEP_WK_ATTACH: pm_kept returns %d\n",
6481 		    pmf, ret))
6482 		break;
6483 	case PM_DEP_WK_CHECK_KEPT:
6484 		ret = pm_is_kept(work->pdw_kept);
6485 		PMD(PMD_DEP, ("%s: PM_DEP_WK_CHECK_KEPT: kept=%s, ret=%d\n",
6486 		    pmf, work->pdw_kept, ret))
6487 		break;
6488 	case PM_DEP_WK_CPR_SUSPEND:
6489 		pm_discard_dependencies();
6490 		break;
6491 	case PM_DEP_WK_CPR_RESUME:
6492 		ddi_walk_devs(ddi_root_node(), pm_kept_walk, NULL);
6493 		ddi_walk_devs(ddi_root_node(), pm_keeper_walk, NULL);
6494 		break;
6495 	default:
6496 		ASSERT(0);
6497 		break;
6498 	}
6499 	/*
6500 	 * Free the work structure if the requester is not waiting
6501 	 * Otherwise it is the requester's responsiblity to free it.
6502 	 */
6503 	if (!work->pdw_wait) {
6504 		if (work->pdw_keeper)
6505 			kmem_free(work->pdw_keeper,
6506 			    strlen(work->pdw_keeper) + 1);
6507 		if (work->pdw_kept)
6508 			kmem_free(work->pdw_kept, strlen(work->pdw_kept) + 1);
6509 		kmem_free(work, sizeof (pm_dep_wk_t));
6510 	} else {
6511 		/*
6512 		 * Notify requester if it is waiting for it.
6513 		 */
6514 		work->pdw_ret = ret;
6515 		work->pdw_done = 1;
6516 		cv_signal(&work->pdw_cv);
6517 	}
6518 }
6519 
6520 /*
6521  * Process PM dependency requests.
6522  */
6523 static void
6524 pm_dep_thread(void)
6525 {
6526 	pm_dep_wk_t *work;
6527 	callb_cpr_t cprinfo;
6528 
6529 	CALLB_CPR_INIT(&cprinfo, &pm_dep_thread_lock, callb_generic_cpr,
6530 	    "pm_dep_thread");
6531 	for (;;) {
6532 		mutex_enter(&pm_dep_thread_lock);
6533 		if (pm_dep_thread_workq == NULL) {
6534 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
6535 			cv_wait(&pm_dep_thread_cv, &pm_dep_thread_lock);
6536 			CALLB_CPR_SAFE_END(&cprinfo, &pm_dep_thread_lock);
6537 		}
6538 		work = pm_dep_thread_workq;
6539 		pm_dep_thread_workq = work->pdw_next;
6540 		if (pm_dep_thread_tail == work)
6541 			pm_dep_thread_tail = work->pdw_next;
6542 		mutex_exit(&pm_dep_thread_lock);
6543 		pm_process_dep_request(work);
6544 
6545 	}
6546 	/*NOTREACHED*/
6547 }
6548 
6549 /*
6550  * Set the power level of the indicated device to unknown (if it is not a
6551  * backwards compatible device), as it has just been resumed, and it won't
6552  * know if the power was removed or not. Adjust parent's kidsupcnt if necessary.
6553  */
6554 void
6555 pm_forget_power_level(dev_info_t *dip)
6556 {
6557 	dev_info_t *pdip = ddi_get_parent(dip);
6558 	int i, count = 0;
6559 
6560 	if (!PM_ISBC(dip)) {
6561 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6562 			count += (PM_CURPOWER(dip, i) == 0);
6563 
6564 		if (count && pdip && !PM_WANTS_NOTIFICATION(pdip))
6565 			e_pm_hold_rele_power(pdip, count);
6566 
6567 		/*
6568 		 * Count this as a power cycle if we care
6569 		 */
6570 		if (DEVI(dip)->devi_pm_volpmd &&
6571 		    PM_CP(dip, 0)->pmc_cur_pwr == 0)
6572 			DEVI(dip)->devi_pm_volpmd = 0;
6573 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
6574 			e_pm_set_cur_pwr(dip, PM_CP(dip, i), PM_LEVEL_UNKNOWN);
6575 	}
6576 }
6577 
6578 /*
6579  * This function advises the caller whether it should make a power-off
6580  * transition at this time or not.  If the transition is not advised
6581  * at this time, the time that the next power-off transition can
6582  * be made from now is returned through "intervalp" pointer.
6583  * This function returns:
6584  *
6585  *  1  power-off advised
6586  *  0  power-off not advised, intervalp will point to seconds from
6587  *	  now that a power-off is advised.  If it is passed the number
6588  *	  of years that policy specifies the device should last,
6589  *	  a large number is returned as the time interval.
6590  *  -1  error
6591  */
6592 int
6593 pm_trans_check(struct pm_trans_data *datap, time_t *intervalp)
6594 {
6595 	PMD_FUNC(pmf, "pm_trans_check")
6596 	char dbuf[DC_SCSI_MFR_LEN];
6597 	struct pm_scsi_cycles *scp;
6598 	int service_years, service_weeks, full_years;
6599 	time_t now, service_seconds, tdiff;
6600 	time_t within_year, when_allowed;
6601 	char *ptr;
6602 	int lower_bound_cycles, upper_bound_cycles, cycles_allowed;
6603 	int cycles_diff, cycles_over;
6604 	struct pm_smart_count *smart_p;
6605 
6606 	if (datap == NULL) {
6607 		PMD(PMD_TCHECK, ("%s: NULL data pointer!\n", pmf))
6608 		return (-1);
6609 	}
6610 
6611 	if (datap->format == DC_SCSI_FORMAT) {
6612 		/*
6613 		 * Power cycles of the scsi drives are distributed
6614 		 * over 5 years with the following percentage ratio:
6615 		 *
6616 		 *	30%, 25%, 20%, 15%, and 10%
6617 		 *
6618 		 * The power cycle quota for each year is distributed
6619 		 * linearly through out the year.  The equation for
6620 		 * determining the expected cycles is:
6621 		 *
6622 		 *	e = a * (n / y)
6623 		 *
6624 		 * e = expected cycles
6625 		 * a = allocated cycles for this year
6626 		 * n = number of seconds since beginning of this year
6627 		 * y = number of seconds in a year
6628 		 *
6629 		 * Note that beginning of the year starts the day that
6630 		 * the drive has been put on service.
6631 		 *
6632 		 * If the drive has passed its expected cycles, we
6633 		 * can determine when it can start to power cycle
6634 		 * again to keep it on track to meet the 5-year
6635 		 * life expectancy.  The equation for determining
6636 		 * when to power cycle is:
6637 		 *
6638 		 *	w = y * (c / a)
6639 		 *
6640 		 * w = when it can power cycle again
6641 		 * y = number of seconds in a year
6642 		 * c = current number of cycles
6643 		 * a = allocated cycles for the year
6644 		 *
6645 		 */
6646 		char pcnt[DC_SCSI_NPY] = { 30, 55, 75, 90, 100 };
6647 
6648 		scp = &datap->un.scsi_cycles;
6649 		PMD(PMD_TCHECK, ("%s: format=%d, lifemax=%d, ncycles=%d, "
6650 		    "svc_date=%s, svc_flag=%d\n", pmf, datap->format,
6651 		    scp->lifemax, scp->ncycles, scp->svc_date, scp->flag))
6652 		if (scp->ncycles < 0 || scp->flag != 0) {
6653 			PMD(PMD_TCHECK, ("%s: ncycles < 0 || flag != 0\n", pmf))
6654 			return (-1);
6655 		}
6656 
6657 		if (scp->ncycles > scp->lifemax) {
6658 			*intervalp = (LONG_MAX / hz);
6659 			return (0);
6660 		}
6661 
6662 		/*
6663 		 * convert service date to time_t
6664 		 */
6665 		bcopy(scp->svc_date, dbuf, DC_SCSI_YEAR_LEN);
6666 		dbuf[DC_SCSI_YEAR_LEN] = '\0';
6667 		ptr = dbuf;
6668 		service_years = stoi(&ptr) - EPOCH_YEAR;
6669 		bcopy(&scp->svc_date[DC_SCSI_YEAR_LEN], dbuf,
6670 		    DC_SCSI_WEEK_LEN);
6671 		dbuf[DC_SCSI_WEEK_LEN] = '\0';
6672 
6673 		/*
6674 		 * scsi standard does not specify WW data,
6675 		 * could be (00-51) or (01-52)
6676 		 */
6677 		ptr = dbuf;
6678 		service_weeks = stoi(&ptr);
6679 		if (service_years < 0 ||
6680 		    service_weeks < 0 || service_weeks > 52) {
6681 			PMD(PMD_TCHECK, ("%s: service year %d and week %d\n",
6682 			    pmf, service_years, service_weeks))
6683 			return (-1);
6684 		}
6685 
6686 		/*
6687 		 * calculate service date in seconds-since-epoch,
6688 		 * adding one day for each leap-year.
6689 		 *
6690 		 * (years-since-epoch + 2) fixes integer truncation,
6691 		 * example: (8) leap-years during [1972, 2000]
6692 		 * (2000 - 1970) = 30;  and  (30 + 2) / 4 = 8;
6693 		 */
6694 		service_seconds = (service_years * DC_SPY) +
6695 		    (service_weeks * DC_SPW) +
6696 		    (((service_years + 2) / 4) * DC_SPD);
6697 
6698 		now = gethrestime_sec();
6699 		/*
6700 		 * since the granularity of 'svc_date' is day not second,
6701 		 * 'now' should be rounded up to full day.
6702 		 */
6703 		now = ((now + DC_SPD -1) / DC_SPD) * DC_SPD;
6704 		if (service_seconds > now) {
6705 			PMD(PMD_TCHECK, ("%s: service date (%ld) later "
6706 			    "than now (%ld)!\n", pmf, service_seconds, now))
6707 			return (-1);
6708 		}
6709 
6710 		tdiff = now - service_seconds;
6711 		PMD(PMD_TCHECK, ("%s: age is %ld sec\n", pmf, tdiff))
6712 
6713 		/*
6714 		 * NOTE - Leap years are not considered in the calculations
6715 		 * below.
6716 		 */
6717 		full_years = (tdiff / DC_SPY);
6718 		if ((full_years >= DC_SCSI_NPY) &&
6719 		    (scp->ncycles <= scp->lifemax))
6720 			return (1);
6721 
6722 		/*
6723 		 * Determine what is the normal cycle usage for the
6724 		 * device at the beginning and the end of this year.
6725 		 */
6726 		lower_bound_cycles = (!full_years) ? 0 :
6727 		    ((scp->lifemax * pcnt[full_years - 1]) / 100);
6728 		upper_bound_cycles = (scp->lifemax * pcnt[full_years]) / 100;
6729 
6730 		if (scp->ncycles <= lower_bound_cycles)
6731 			return (1);
6732 
6733 		/*
6734 		 * The linear slope that determines how many cycles
6735 		 * are allowed this year is number of seconds
6736 		 * passed this year over total number of seconds in a year.
6737 		 */
6738 		cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6739 		within_year = (tdiff % DC_SPY);
6740 		cycles_allowed = lower_bound_cycles +
6741 		    (((uint64_t)cycles_diff * (uint64_t)within_year) / DC_SPY);
6742 		PMD(PMD_TCHECK, ("%s: lived %d yrs and %ld secs\n", pmf,
6743 		    full_years, within_year))
6744 		PMD(PMD_TCHECK, ("%s: # of cycles allowed %d\n", pmf,
6745 		    cycles_allowed))
6746 
6747 		if (scp->ncycles <= cycles_allowed)
6748 			return (1);
6749 
6750 		/*
6751 		 * The transition is not advised now but we can
6752 		 * determine when the next transition can be made.
6753 		 *
6754 		 * Depending on how many cycles the device has been
6755 		 * over-used, we may need to skip years with
6756 		 * different percentage quota in order to determine
6757 		 * when the next transition can be made.
6758 		 */
6759 		cycles_over = (scp->ncycles - lower_bound_cycles);
6760 		while (cycles_over > cycles_diff) {
6761 			full_years++;
6762 			if (full_years >= DC_SCSI_NPY) {
6763 				*intervalp = (LONG_MAX / hz);
6764 				return (0);
6765 			}
6766 			cycles_over -= cycles_diff;
6767 			lower_bound_cycles = upper_bound_cycles;
6768 			upper_bound_cycles =
6769 			    (scp->lifemax * pcnt[full_years]) / 100;
6770 			cycles_diff = (upper_bound_cycles - lower_bound_cycles);
6771 		}
6772 
6773 		/*
6774 		 * The linear slope that determines when the next transition
6775 		 * can be made is the relative position of used cycles within a
6776 		 * year over total number of cycles within that year.
6777 		 */
6778 		when_allowed = service_seconds + (full_years * DC_SPY) +
6779 		    (((uint64_t)DC_SPY * (uint64_t)cycles_over) / cycles_diff);
6780 		*intervalp = (when_allowed - now);
6781 		if (*intervalp > (LONG_MAX / hz))
6782 			*intervalp = (LONG_MAX / hz);
6783 		PMD(PMD_TCHECK, ("%s: no cycle is allowed in %ld secs\n", pmf,
6784 		    *intervalp))
6785 		return (0);
6786 	} else if (datap->format == DC_SMART_FORMAT) {
6787 		/*
6788 		 * power cycles of SATA disks are reported from SMART
6789 		 * attributes.
6790 		 */
6791 		smart_p = &datap->un.smart_count;
6792 		if (smart_p->consumed >= smart_p->allowed) {
6793 			*intervalp = (LONG_MAX / hz);
6794 			PMD(PMD_TCHECK, ("%s: exceeded lifemax cycles.\n", pmf))
6795 			return (0);
6796 		} else
6797 			return (1);
6798 	}
6799 
6800 	PMD(PMD_TCHECK, ("%s: unknown format!\n", pmf))
6801 	return (-1);
6802 }
6803 
6804 /*
6805  * Nexus drivers call into pm framework to indicate which child driver is about
6806  * to be installed.  In some platforms, ppm may need to configure the hardware
6807  * for successful installation of a driver.
6808  */
6809 int
6810 pm_init_child(dev_info_t *dip)
6811 {
6812 	power_req_t power_req;
6813 
6814 	ASSERT(ddi_binding_name(dip));
6815 	ASSERT(ddi_get_name_addr(dip));
6816 	pm_ppm_claim(dip);
6817 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6818 		power_req.request_type = PMR_PPM_INIT_CHILD;
6819 		power_req.req.ppm_config_req.who = dip;
6820 		ASSERT(PPM(dip) != NULL);
6821 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
6822 		    NULL));
6823 	} else {
6824 #ifdef DEBUG
6825 		/* pass it to the default handler so we can debug things */
6826 		power_req.request_type = PMR_PPM_INIT_CHILD;
6827 		power_req.req.ppm_config_req.who = dip;
6828 		(void) pm_ctlops(NULL, dip,
6829 		    DDI_CTLOPS_POWER, &power_req, NULL);
6830 #endif
6831 	}
6832 	return (DDI_SUCCESS);
6833 }
6834 
6835 /*
6836  * Bring parent of a node that is about to be probed up to full power, and
6837  * arrange for it to stay up until pm_post_probe() or pm_post_attach() decide
6838  * it is time to let it go down again
6839  */
6840 void
6841 pm_pre_probe(dev_info_t *dip, pm_ppm_cookie_t *cp)
6842 {
6843 	int result;
6844 	power_req_t power_req;
6845 
6846 	bzero(cp, sizeof (*cp));
6847 	cp->ppc_dip = dip;
6848 
6849 	pm_ppm_claim(dip);
6850 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
6851 		power_req.request_type = PMR_PPM_PRE_PROBE;
6852 		power_req.req.ppm_config_req.who = dip;
6853 		ASSERT(PPM(dip) != NULL);
6854 		(void) pm_ctlops(PPM(dip), dip,
6855 		    DDI_CTLOPS_POWER, &power_req, &result);
6856 		cp->ppc_ppm = PPM(dip);
6857 	} else {
6858 #ifdef DEBUG
6859 		/* pass it to the default handler so we can debug things */
6860 		power_req.request_type = PMR_PPM_PRE_PROBE;
6861 		power_req.req.ppm_config_req.who = dip;
6862 		(void) pm_ctlops(NULL, dip,
6863 		    DDI_CTLOPS_POWER, &power_req, &result);
6864 #endif
6865 		cp->ppc_ppm = NULL;
6866 	}
6867 }
6868 
6869 int
6870 pm_pre_config(dev_info_t *dip, char *devnm)
6871 {
6872 	PMD_FUNC(pmf, "pre_config")
6873 	int ret;
6874 
6875 	if (MDI_VHCI(dip)) {
6876 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6877 		ret = mdi_power(dip, MDI_PM_PRE_CONFIG, NULL, devnm, 0);
6878 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6879 	} else if (!PM_GET_PM_INFO(dip))
6880 		return (DDI_SUCCESS);
6881 
6882 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
6883 	pm_hold_power(dip);
6884 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6885 	if (ret != DDI_SUCCESS)
6886 		pm_rele_power(dip);
6887 	return (ret);
6888 }
6889 
6890 /*
6891  * This routine is called by devfs during its walk to unconfigue a node.
6892  * If the call is due to auto mod_unloads and the dip is not at its
6893  * full power, we return DDI_FAILURE to terminate the walk, otherwise
6894  * return DDI_SUCCESS.
6895  */
6896 int
6897 pm_pre_unconfig(dev_info_t *dip, int flags, int *held, char *devnm)
6898 {
6899 	PMD_FUNC(pmf, "pre_unconfig")
6900 	int ret;
6901 
6902 	if (MDI_VHCI(dip)) {
6903 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf,
6904 		    PM_DEVICE(dip), flags))
6905 		ret = mdi_power(dip, MDI_PM_PRE_UNCONFIG, held, devnm, flags);
6906 		return (ret == MDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
6907 	} else if (!PM_GET_PM_INFO(dip))
6908 		return (DDI_SUCCESS);
6909 
6910 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), flags=%x\n", pmf, PM_DEVICE(dip),
6911 	    flags))
6912 	*held = 0;
6913 
6914 	/*
6915 	 * If the dip is a leaf node, don't power it up.
6916 	 */
6917 	if (!ddi_get_child(dip))
6918 		return (DDI_SUCCESS);
6919 
6920 	/*
6921 	 * Do not power up the node if it is called due to auto-modunload.
6922 	 */
6923 	if ((flags & NDI_AUTODETACH) && !pm_all_at_normal(dip))
6924 		return (DDI_FAILURE);
6925 
6926 	pm_hold_power(dip);
6927 	*held = 1;
6928 	ret = pm_all_to_normal(dip, PM_CANBLOCK_BLOCK);
6929 	if (ret != DDI_SUCCESS) {
6930 		pm_rele_power(dip);
6931 		*held = 0;
6932 	}
6933 	return (ret);
6934 }
6935 
6936 /*
6937  * Notify ppm of attach action.  Parent is already held at full power by
6938  * probe action.
6939  */
6940 void
6941 pm_pre_attach(dev_info_t *dip, pm_ppm_cookie_t *cp, ddi_attach_cmd_t cmd)
6942 {
6943 	static char *me = "pm_pre_attach";
6944 	power_req_t power_req;
6945 	int result;
6946 
6947 	/*
6948 	 * Initialize and fill in the PPM cookie
6949 	 */
6950 	bzero(cp, sizeof (*cp));
6951 	cp->ppc_cmd = (int)cmd;
6952 	cp->ppc_ppm = PPM(dip);
6953 	cp->ppc_dip = dip;
6954 
6955 	/*
6956 	 * DDI_ATTACH and DDI_RESUME cmds need to call platform specific
6957 	 * Power Management stuff. DDI_RESUME also has to purge it's
6958 	 * powerlevel information.
6959 	 */
6960 	switch (cmd) {
6961 	case DDI_ATTACH:
6962 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6963 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6964 			power_req.req.ppm_config_req.who = dip;
6965 			ASSERT(PPM(dip));
6966 			(void) pm_ctlops(cp->ppc_ppm, dip, DDI_CTLOPS_POWER,
6967 			    &power_req, &result);
6968 		}
6969 #ifdef DEBUG
6970 		else {
6971 			power_req.request_type = PMR_PPM_PRE_ATTACH;
6972 			power_req.req.ppm_config_req.who = dip;
6973 			(void) pm_ctlops(NULL, dip,
6974 			    DDI_CTLOPS_POWER, &power_req, &result);
6975 		}
6976 #endif
6977 		break;
6978 	case DDI_RESUME:
6979 		pm_forget_power_level(dip);
6980 
6981 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
6982 			power_req.request_type = PMR_PPM_PRE_RESUME;
6983 			power_req.req.resume_req.who = cp->ppc_dip;
6984 			power_req.req.resume_req.cmd =
6985 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6986 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
6987 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
6988 			    DDI_CTLOPS_POWER, &power_req, &result);
6989 		}
6990 #ifdef DEBUG
6991 		else {
6992 			power_req.request_type = PMR_PPM_PRE_RESUME;
6993 			power_req.req.resume_req.who = cp->ppc_dip;
6994 			power_req.req.resume_req.cmd =
6995 			    (ddi_attach_cmd_t)cp->ppc_cmd;
6996 			(void) pm_ctlops(NULL, cp->ppc_dip,
6997 			    DDI_CTLOPS_POWER, &power_req, &result);
6998 		}
6999 #endif
7000 		break;
7001 
7002 	case DDI_PM_RESUME:
7003 		break;
7004 
7005 	default:
7006 		panic(me);
7007 	}
7008 }
7009 
7010 /*
7011  * Nexus drivers call into pm framework to indicate which child driver is
7012  * being uninstalled.  In some platforms, ppm may need to reconfigure the
7013  * hardware since the device driver is no longer installed.
7014  */
7015 int
7016 pm_uninit_child(dev_info_t *dip)
7017 {
7018 	power_req_t power_req;
7019 
7020 	ASSERT(ddi_binding_name(dip));
7021 	ASSERT(ddi_get_name_addr(dip));
7022 	pm_ppm_claim(dip);
7023 	if (pm_ppm_claimed(dip)) {	/* if ppm driver claims the node */
7024 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
7025 		power_req.req.ppm_config_req.who = dip;
7026 		ASSERT(PPM(dip));
7027 		return (pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER, &power_req,
7028 		    NULL));
7029 	} else {
7030 #ifdef DEBUG
7031 		/* pass it to the default handler so we can debug things */
7032 		power_req.request_type = PMR_PPM_UNINIT_CHILD;
7033 		power_req.req.ppm_config_req.who = dip;
7034 		(void) pm_ctlops(NULL, dip, DDI_CTLOPS_POWER, &power_req, NULL);
7035 #endif
7036 	}
7037 	return (DDI_SUCCESS);
7038 }
7039 /*
7040  * Decrement kidsupcnt so scan can turn the parent back off if it is idle
7041  * Also notify ppm of result of probe if there is a ppm that cares
7042  */
7043 void
7044 pm_post_probe(pm_ppm_cookie_t *cp, int ret, int probe_failed)
7045 {
7046 	_NOTE(ARGUNUSED(probe_failed))
7047 	int result;
7048 	power_req_t power_req;
7049 
7050 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7051 		power_req.request_type = PMR_PPM_POST_PROBE;
7052 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7053 		power_req.req.ppm_config_req.result = ret;
7054 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7055 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip, DDI_CTLOPS_POWER,
7056 		    &power_req, &result);
7057 	}
7058 #ifdef DEBUG
7059 	else {
7060 		power_req.request_type = PMR_PPM_POST_PROBE;
7061 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7062 		power_req.req.ppm_config_req.result = ret;
7063 		(void) pm_ctlops(NULL, cp->ppc_dip, DDI_CTLOPS_POWER,
7064 		    &power_req, &result);
7065 	}
7066 #endif
7067 }
7068 
7069 void
7070 pm_post_config(dev_info_t *dip, char *devnm)
7071 {
7072 	PMD_FUNC(pmf, "post_config")
7073 
7074 	if (MDI_VHCI(dip)) {
7075 		PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7076 		(void) mdi_power(dip, MDI_PM_POST_CONFIG, NULL, devnm, 0);
7077 		return;
7078 	} else if (!PM_GET_PM_INFO(dip))
7079 		return;
7080 
7081 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
7082 	pm_rele_power(dip);
7083 }
7084 
7085 void
7086 pm_post_unconfig(dev_info_t *dip, int held, char *devnm)
7087 {
7088 	PMD_FUNC(pmf, "post_unconfig")
7089 
7090 	if (MDI_VHCI(dip)) {
7091 		PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf,
7092 		    PM_DEVICE(dip), held))
7093 		(void) mdi_power(dip, MDI_PM_POST_UNCONFIG, &held, devnm, 0);
7094 		return;
7095 	} else if (!PM_GET_PM_INFO(dip))
7096 		return;
7097 
7098 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), held = %d\n", pmf, PM_DEVICE(dip),
7099 	    held))
7100 	if (!held)
7101 		return;
7102 	/*
7103 	 * We have held power in pre_unconfig, release it here.
7104 	 */
7105 	pm_rele_power(dip);
7106 }
7107 
7108 /*
7109  * Notify ppm of result of attach if there is a ppm that cares
7110  */
7111 void
7112 pm_post_attach(pm_ppm_cookie_t *cp, int ret)
7113 {
7114 	int result;
7115 	power_req_t power_req;
7116 	dev_info_t	*dip;
7117 
7118 	if (cp->ppc_cmd != DDI_ATTACH)
7119 		return;
7120 
7121 	dip = cp->ppc_dip;
7122 
7123 	if (ret == DDI_SUCCESS) {
7124 		/*
7125 		 * Attach succeeded, so proceed to doing post-attach pm tasks
7126 		 */
7127 		if (PM_GET_PM_INFO(dip) == NULL)
7128 			(void) pm_start(dip);
7129 	} else {
7130 		/*
7131 		 * Attach may have got pm started before failing
7132 		 */
7133 		pm_stop(dip);
7134 	}
7135 
7136 	if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7137 		power_req.request_type = PMR_PPM_POST_ATTACH;
7138 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7139 		power_req.req.ppm_config_req.result = ret;
7140 		ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7141 		(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7142 		    DDI_CTLOPS_POWER, &power_req, &result);
7143 	}
7144 #ifdef DEBUG
7145 	else {
7146 		power_req.request_type = PMR_PPM_POST_ATTACH;
7147 		power_req.req.ppm_config_req.who = cp->ppc_dip;
7148 		power_req.req.ppm_config_req.result = ret;
7149 		(void) pm_ctlops(NULL, cp->ppc_dip,
7150 		    DDI_CTLOPS_POWER, &power_req, &result);
7151 	}
7152 #endif
7153 }
7154 
7155 /*
7156  * Notify ppm of attach action.  Parent is already held at full power by
7157  * probe action.
7158  */
7159 void
7160 pm_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, pm_ppm_cookie_t *cp)
7161 {
7162 	int result;
7163 	power_req_t power_req;
7164 
7165 	bzero(cp, sizeof (*cp));
7166 	cp->ppc_dip = dip;
7167 	cp->ppc_cmd = (int)cmd;
7168 
7169 	switch (cmd) {
7170 	case DDI_DETACH:
7171 		pm_detaching(dip);		/* suspend pm while detaching */
7172 		if (pm_ppm_claimed(dip)) {	/* if ppm driver claims node */
7173 			power_req.request_type = PMR_PPM_PRE_DETACH;
7174 			power_req.req.ppm_config_req.who = dip;
7175 			ASSERT(PPM(dip));
7176 			(void) pm_ctlops(PPM(dip), dip, DDI_CTLOPS_POWER,
7177 			    &power_req, &result);
7178 			cp->ppc_ppm = PPM(dip);
7179 		} else {
7180 #ifdef DEBUG
7181 			/* pass to the default handler so we can debug things */
7182 			power_req.request_type = PMR_PPM_PRE_DETACH;
7183 			power_req.req.ppm_config_req.who = dip;
7184 			(void) pm_ctlops(NULL, dip,
7185 			    DDI_CTLOPS_POWER, &power_req, &result);
7186 #endif
7187 			cp->ppc_ppm = NULL;
7188 		}
7189 		break;
7190 
7191 	default:
7192 		break;
7193 	}
7194 }
7195 
7196 /*
7197  * Dip is either a leaf node that exported "no-involuntary-power-cycles" prop.,
7198  * (if devi_pm_noinvol count is 0) or an ancestor of such a node.  We need to
7199  * make an entry to record the details, which includes certain flag settings.
7200  */
7201 static void
7202 pm_record_invol_path(char *path, int flags, int noinvolpm, int volpmd,
7203     int wasvolpmd, major_t major)
7204 {
7205 	PMD_FUNC(pmf, "record_invol_path")
7206 	major_t pm_path_to_major(char *);
7207 	size_t plen;
7208 	pm_noinvol_t *ip, *np, *pp;
7209 	pp = NULL;
7210 
7211 	plen = strlen(path) + 1;
7212 	np = kmem_zalloc(sizeof (*np), KM_SLEEP);
7213 	np->ni_size = plen;
7214 	np->ni_path = kmem_alloc(plen, KM_SLEEP);
7215 	np->ni_noinvolpm = noinvolpm;
7216 	np->ni_volpmd = volpmd;
7217 	np->ni_wasvolpmd = wasvolpmd;
7218 	np->ni_flags = flags;
7219 	(void) strcpy(np->ni_path, path);
7220 	/*
7221 	 * If we haven't actually seen the node attached, it is hard to figure
7222 	 * out its major.  If we could hold the node by path, we would be much
7223 	 * happier here.
7224 	 */
7225 	if (major == DDI_MAJOR_T_NONE) {
7226 		np->ni_major = pm_path_to_major(path);
7227 	} else {
7228 		np->ni_major = major;
7229 	}
7230 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7231 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7232 		int comp = strcmp(path, ip->ni_path);
7233 		if (comp < 0) {
7234 			PMD(PMD_NOINVOL, ("%s: %s insert before %s\n",
7235 			    pmf, path, ip->ni_path))
7236 			/* insert before current entry */
7237 			np->ni_next = ip;
7238 			if (pp) {
7239 				pp->ni_next = np;
7240 			} else {
7241 				pm_noinvol_head = np;
7242 			}
7243 			rw_exit(&pm_noinvol_rwlock);
7244 #ifdef DEBUG
7245 			if (pm_debug & PMD_NOINVOL)
7246 				pr_noinvol("record_invol_path exit0");
7247 #endif
7248 			return;
7249 		} else if (comp == 0) {
7250 			panic("%s already in pm_noinvol list", path);
7251 		}
7252 	}
7253 	/*
7254 	 * If we did not find an entry in the list that this should go before,
7255 	 * then it must go at the end
7256 	 */
7257 	if (pp) {
7258 		PMD(PMD_NOINVOL, ("%s: %s append after %s\n", pmf, path,
7259 		    pp->ni_path))
7260 		ASSERT(pp->ni_next == 0);
7261 		pp->ni_next = np;
7262 	} else {
7263 		PMD(PMD_NOINVOL, ("%s: %s added to end-of-list\n", pmf, path))
7264 		ASSERT(!pm_noinvol_head);
7265 		pm_noinvol_head = np;
7266 	}
7267 	rw_exit(&pm_noinvol_rwlock);
7268 #ifdef DEBUG
7269 	if (pm_debug & PMD_NOINVOL)
7270 		pr_noinvol("record_invol_path exit");
7271 #endif
7272 }
7273 
7274 void
7275 pm_record_invol(dev_info_t *dip)
7276 {
7277 	char *pathbuf;
7278 	int pm_all_components_off(dev_info_t *);
7279 	int volpmd = (PM_NUMCMPTS(dip) > 0) && pm_all_components_off(dip);
7280 
7281 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7282 	(void) ddi_pathname(dip, pathbuf);
7283 
7284 	pm_record_invol_path(pathbuf, (DEVI(dip)->devi_pm_flags &
7285 	    (PMC_NO_INVOL | PMC_CONSOLE_FB)), DEVI(dip)->devi_pm_noinvolpm,
7286 	    DEVI(dip)->devi_pm_volpmd, volpmd, PM_MAJOR(dip));
7287 
7288 	/*
7289 	 * If this child's detach will be holding up its ancestors, then we
7290 	 * allow for an exception to that if all children of this type have
7291 	 * gone down voluntarily.
7292 	 * Now walk down the tree incrementing devi_pm_noinvolpm
7293 	 */
7294 	(void) pm_noinvol_update(PM_BP_NOINVOL_DETACH, 0, volpmd, pathbuf,
7295 	    dip);
7296 	kmem_free(pathbuf, MAXPATHLEN);
7297 }
7298 
7299 void
7300 pm_post_detach(pm_ppm_cookie_t *cp, int ret)
7301 {
7302 	dev_info_t *dip = cp->ppc_dip;
7303 	int result;
7304 	power_req_t power_req;
7305 
7306 	switch (cp->ppc_cmd) {
7307 	case DDI_DETACH:
7308 		if (cp->ppc_ppm) {	/* if ppm driver claims the node */
7309 			power_req.request_type = PMR_PPM_POST_DETACH;
7310 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7311 			power_req.req.ppm_config_req.result = ret;
7312 			ASSERT(PPM(cp->ppc_dip) == cp->ppc_ppm);
7313 			(void) pm_ctlops(cp->ppc_ppm, cp->ppc_dip,
7314 			    DDI_CTLOPS_POWER, &power_req, &result);
7315 		}
7316 #ifdef DEBUG
7317 		else {
7318 			power_req.request_type = PMR_PPM_POST_DETACH;
7319 			power_req.req.ppm_config_req.who = cp->ppc_dip;
7320 			power_req.req.ppm_config_req.result = ret;
7321 			(void) pm_ctlops(NULL, cp->ppc_dip,
7322 			    DDI_CTLOPS_POWER, &power_req, &result);
7323 		}
7324 #endif
7325 		if (ret == DDI_SUCCESS) {
7326 			/*
7327 			 * For hotplug detach we assume it is *really* gone
7328 			 */
7329 			if (cp->ppc_cmd == DDI_DETACH &&
7330 			    ((DEVI(dip)->devi_pm_flags &
7331 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7332 			    DEVI(dip)->devi_pm_noinvolpm))
7333 				pm_record_invol(dip);
7334 			DEVI(dip)->devi_pm_flags &=
7335 			    ~(PMC_NO_INVOL | PMC_NOINVOL_DONE);
7336 
7337 			/*
7338 			 * If console fb is detaching, then we don't need to
7339 			 * worry any more about it going off (pm_detaching has
7340 			 * brought up all components)
7341 			 */
7342 			if (PM_IS_CFB(dip)) {
7343 				mutex_enter(&pm_cfb_lock);
7344 				ASSERT(cfb_dip_detaching);
7345 				ASSERT(cfb_dip == NULL);
7346 				ASSERT(pm_cfb_comps_off == 0);
7347 				cfb_dip_detaching = NULL;
7348 				mutex_exit(&pm_cfb_lock);
7349 			}
7350 			pm_stop(dip);	/* make it permanent */
7351 		} else {
7352 			if (PM_IS_CFB(dip)) {
7353 				mutex_enter(&pm_cfb_lock);
7354 				ASSERT(cfb_dip_detaching);
7355 				ASSERT(cfb_dip == NULL);
7356 				ASSERT(pm_cfb_comps_off == 0);
7357 				cfb_dip = cfb_dip_detaching;
7358 				cfb_dip_detaching = NULL;
7359 				mutex_exit(&pm_cfb_lock);
7360 			}
7361 			pm_detach_failed(dip);	/* resume power management */
7362 		}
7363 		break;
7364 	case DDI_PM_SUSPEND:
7365 		break;
7366 	case DDI_SUSPEND:
7367 		break;				/* legal, but nothing to do */
7368 	default:
7369 #ifdef DEBUG
7370 		panic("pm_post_detach: unrecognized cmd %d for detach",
7371 		    cp->ppc_cmd);
7372 		/*NOTREACHED*/
7373 #else
7374 		break;
7375 #endif
7376 	}
7377 }
7378 
7379 /*
7380  * Called after vfs_mountroot has got the clock started to fix up timestamps
7381  * that were set when root bush drivers attached.  hresttime was 0 then, so the
7382  * devices look busy but have a 0 busycnt
7383  */
7384 int
7385 pm_adjust_timestamps(dev_info_t *dip, void *arg)
7386 {
7387 	_NOTE(ARGUNUSED(arg))
7388 
7389 	pm_info_t *info = PM_GET_PM_INFO(dip);
7390 	struct pm_component *cp;
7391 	int i;
7392 
7393 	if (!info)
7394 		return (DDI_WALK_CONTINUE);
7395 	PM_LOCK_BUSY(dip);
7396 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7397 		cp = PM_CP(dip, i);
7398 		if (cp->pmc_timestamp == 0 && cp->pmc_busycount == 0)
7399 			cp->pmc_timestamp = gethrestime_sec();
7400 	}
7401 	PM_UNLOCK_BUSY(dip);
7402 	return (DDI_WALK_CONTINUE);
7403 }
7404 
7405 /*
7406  * Called at attach time to see if the device being attached has a record in
7407  * the no involuntary power cycles list.  If so, we do some bookkeeping on the
7408  * parents and set a flag in the dip
7409  */
7410 void
7411 pm_noinvol_specd(dev_info_t *dip)
7412 {
7413 	PMD_FUNC(pmf, "noinvol_specd")
7414 	char *pathbuf;
7415 	pm_noinvol_t *ip, *pp = NULL;
7416 	int wasvolpmd;
7417 	int found = 0;
7418 
7419 	if (DEVI(dip)->devi_pm_flags & PMC_NOINVOL_DONE)
7420 		return;
7421 	DEVI(dip)->devi_pm_flags |=  PMC_NOINVOL_DONE;
7422 	pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7423 	(void) ddi_pathname(dip, pathbuf);
7424 
7425 	PM_LOCK_DIP(dip);
7426 	DEVI(dip)->devi_pm_volpmd = 0;
7427 	DEVI(dip)->devi_pm_noinvolpm = 0;
7428 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7429 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7430 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7431 		    pmf, pathbuf, ip->ni_path))
7432 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7433 			found++;
7434 			break;
7435 		}
7436 	}
7437 	rw_exit(&pm_noinvol_rwlock);
7438 	if (!found) {
7439 		PM_UNLOCK_DIP(dip);
7440 		kmem_free(pathbuf, MAXPATHLEN);
7441 		return;
7442 	}
7443 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
7444 	pp = NULL;
7445 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
7446 		PMD(PMD_NOINVOL, ("%s: comparing '%s' to '%s'\n",
7447 		    pmf, pathbuf, ip->ni_path))
7448 		if (strcmp(pathbuf, ip->ni_path) == 0) {
7449 			ip->ni_flags &= ~PMC_DRIVER_REMOVED;
7450 			DEVI(dip)->devi_pm_flags |= ip->ni_flags;
7451 			/*
7452 			 * Handle special case of console fb
7453 			 */
7454 			if (PM_IS_CFB(dip)) {
7455 				mutex_enter(&pm_cfb_lock);
7456 				cfb_dip = dip;
7457 				PMD(PMD_CFB, ("%s: %s@%s(%s#%d) setting "
7458 				    "cfb_dip\n", pmf, PM_DEVICE(dip)))
7459 				mutex_exit(&pm_cfb_lock);
7460 			}
7461 			DEVI(dip)->devi_pm_noinvolpm = ip->ni_noinvolpm;
7462 			ASSERT((DEVI(dip)->devi_pm_flags &
7463 			    (PMC_NO_INVOL | PMC_CONSOLE_FB)) ||
7464 			    DEVI(dip)->devi_pm_noinvolpm);
7465 			DEVI(dip)->devi_pm_volpmd = ip->ni_volpmd;
7466 			PMD(PMD_NOINVOL, ("%s: noinvol=%d, volpmd=%d, "
7467 			    "wasvolpmd=%d, flags=%x, path=%s\n", pmf,
7468 			    ip->ni_noinvolpm, ip->ni_volpmd,
7469 			    ip->ni_wasvolpmd, ip->ni_flags, ip->ni_path))
7470 			/*
7471 			 * free the entry in hopes the list will now be empty
7472 			 * and we won't have to search it any more until the
7473 			 * device detaches
7474 			 */
7475 			if (pp) {
7476 				PMD(PMD_NOINVOL, ("%s: free %s, prev %s\n",
7477 				    pmf, ip->ni_path, pp->ni_path))
7478 				pp->ni_next = ip->ni_next;
7479 			} else {
7480 				PMD(PMD_NOINVOL, ("%s: free %s head\n",
7481 				    pmf, ip->ni_path))
7482 				ASSERT(pm_noinvol_head == ip);
7483 				pm_noinvol_head = ip->ni_next;
7484 			}
7485 			PM_UNLOCK_DIP(dip);
7486 			wasvolpmd = ip->ni_wasvolpmd;
7487 			rw_exit(&pm_noinvol_rwlock);
7488 			kmem_free(ip->ni_path, ip->ni_size);
7489 			kmem_free(ip, sizeof (*ip));
7490 			/*
7491 			 * Now walk up the tree decrementing devi_pm_noinvolpm
7492 			 * (and volpmd if appropriate)
7493 			 */
7494 			(void) pm_noinvol_update(PM_BP_NOINVOL_ATTACH, 0,
7495 			    wasvolpmd, pathbuf, dip);
7496 #ifdef DEBUG
7497 			if (pm_debug & PMD_NOINVOL)
7498 				pr_noinvol("noinvol_specd exit");
7499 #endif
7500 			kmem_free(pathbuf, MAXPATHLEN);
7501 			return;
7502 		}
7503 	}
7504 	kmem_free(pathbuf, MAXPATHLEN);
7505 	rw_exit(&pm_noinvol_rwlock);
7506 	PM_UNLOCK_DIP(dip);
7507 }
7508 
7509 int
7510 pm_all_components_off(dev_info_t *dip)
7511 {
7512 	int i;
7513 	pm_component_t *cp;
7514 
7515 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
7516 		cp = PM_CP(dip, i);
7517 		if (cp->pmc_cur_pwr == PM_LEVEL_UNKNOWN ||
7518 		    cp->pmc_comp.pmc_lvals[cp->pmc_cur_pwr])
7519 			return (0);
7520 	}
7521 	return (1);	/* all off */
7522 }
7523 
7524 /*
7525  * Make sure that all "no involuntary power cycles" devices are attached.
7526  * Called before doing a cpr suspend to make sure the driver has a say about
7527  * the power cycle
7528  */
7529 int
7530 pm_reattach_noinvol(void)
7531 {
7532 	PMD_FUNC(pmf, "reattach_noinvol")
7533 	pm_noinvol_t *ip;
7534 	char *path;
7535 	dev_info_t *dip;
7536 
7537 	/*
7538 	 * Prevent the modunload thread from unloading any modules until we
7539 	 * have completely stopped all kernel threads.
7540 	 */
7541 	modunload_disable();
7542 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7543 		/*
7544 		 * Forget we'v ever seen any entry
7545 		 */
7546 		ip->ni_persistent = 0;
7547 	}
7548 restart:
7549 	rw_enter(&pm_noinvol_rwlock, RW_READER);
7550 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
7551 #ifdef PMDDEBUG
7552 		major_t maj;
7553 		maj = ip->ni_major;
7554 #endif
7555 		path = ip->ni_path;
7556 		if (path != NULL && !(ip->ni_flags & PMC_DRIVER_REMOVED)) {
7557 			if (ip->ni_persistent) {
7558 				/*
7559 				 * If we weren't able to make this entry
7560 				 * go away, then we give up, as
7561 				 * holding/attaching the driver ought to have
7562 				 * resulted in this entry being deleted
7563 				 */
7564 				PMD(PMD_NOINVOL, ("%s: can't reattach %s "
7565 				    "(%s|%d)\n", pmf, ip->ni_path,
7566 				    ddi_major_to_name(maj), (int)maj))
7567 				cmn_err(CE_WARN, "cpr: unable to reattach %s ",
7568 				    ip->ni_path);
7569 				modunload_enable();
7570 				rw_exit(&pm_noinvol_rwlock);
7571 				return (0);
7572 			}
7573 			ip->ni_persistent++;
7574 			rw_exit(&pm_noinvol_rwlock);
7575 			PMD(PMD_NOINVOL, ("%s: holding %s\n", pmf, path))
7576 			dip = e_ddi_hold_devi_by_path(path, 0);
7577 			if (dip == NULL) {
7578 				PMD(PMD_NOINVOL, ("%s: can't hold (%s|%d)\n",
7579 				    pmf, path, (int)maj))
7580 				cmn_err(CE_WARN, "cpr: unable to hold %s "
7581 				    "driver", path);
7582 				modunload_enable();
7583 				return (0);
7584 			} else {
7585 				PMD(PMD_DHR, ("%s: release %s\n", pmf, path))
7586 				/*
7587 				 * Since the modunload thread is stopped, we
7588 				 * don't have to keep the driver held, which
7589 				 * saves a ton of bookkeeping
7590 				 */
7591 				ddi_release_devi(dip);
7592 				goto restart;
7593 			}
7594 		} else {
7595 			PMD(PMD_NOINVOL, ("%s: skip %s; unknown major\n",
7596 			    pmf, ip->ni_path))
7597 			continue;
7598 		}
7599 	}
7600 	rw_exit(&pm_noinvol_rwlock);
7601 	return (1);
7602 }
7603 
7604 void
7605 pm_reattach_noinvol_fini(void)
7606 {
7607 	modunload_enable();
7608 }
7609 
7610 /*
7611  * Display pm support code
7612  */
7613 
7614 
7615 /*
7616  * console frame-buffer power-mgmt gets enabled when debugging
7617  * services are not present or console fbpm override is set
7618  */
7619 void
7620 pm_cfb_setup(const char *stdout_path)
7621 {
7622 	PMD_FUNC(pmf, "cfb_setup")
7623 	extern int obpdebug;
7624 	char *devname;
7625 	dev_info_t *dip;
7626 	int devname_len;
7627 	extern dev_info_t *fbdip;
7628 
7629 	/*
7630 	 * By virtue of this function being called (from consconfig),
7631 	 * we know stdout is a framebuffer.
7632 	 */
7633 	stdout_is_framebuffer = 1;
7634 
7635 	if (obpdebug || (boothowto & RB_DEBUG)) {
7636 		if (pm_cfb_override == 0) {
7637 			/*
7638 			 * Console is frame buffer, but we want to suppress
7639 			 * pm on it because of debugging setup
7640 			 */
7641 			pm_cfb_enabled = 0;
7642 			cmn_err(CE_NOTE, "Kernel debugger present: disabling "
7643 			    "console power management.");
7644 			/*
7645 			 * however, we still need to know which is the console
7646 			 * fb in order to suppress pm on it
7647 			 */
7648 		} else {
7649 			cmn_err(CE_WARN, "Kernel debugger present: see "
7650 			    "kmdb(1M) for interaction with power management.");
7651 		}
7652 	}
7653 #ifdef DEBUG
7654 	/*
7655 	 * IF console is fb and is power managed, don't do prom_printfs from
7656 	 * pm debug macro
7657 	 */
7658 	if (pm_cfb_enabled && !pm_debug_to_console) {
7659 		if (pm_debug)
7660 			prom_printf("pm debug output will be to log only\n");
7661 		pm_divertdebug++;
7662 	}
7663 #endif
7664 	devname = i_ddi_strdup((char *)stdout_path, KM_SLEEP);
7665 	devname_len = strlen(devname) + 1;
7666 	PMD(PMD_CFB, ("%s: stripped %s\n", pmf, devname))
7667 	/* if the driver is attached */
7668 	if ((dip = fbdip) != NULL) {
7669 		PMD(PMD_CFB, ("%s: attached: %s@%s(%s#%d)\n", pmf,
7670 		    PM_DEVICE(dip)))
7671 		/*
7672 		 * We set up here as if the driver were power manageable in case
7673 		 * we get a later attach of a pm'able driver (which would result
7674 		 * in a panic later)
7675 		 */
7676 		cfb_dip = dip;
7677 		DEVI(dip)->devi_pm_flags |= (PMC_CONSOLE_FB | PMC_NO_INVOL);
7678 		PMD(PMD_CFB, ("%s: cfb_dip -> %s@%s(%s#%d)\n", pmf,
7679 		    PM_DEVICE(dip)))
7680 #ifdef DEBUG
7681 		if (!(PM_GET_PM_INFO(dip) != NULL && PM_NUMCMPTS(dip))) {
7682 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d) not power-managed\n",
7683 			    pmf, PM_DEVICE(dip)))
7684 		}
7685 #endif
7686 	} else {
7687 		char *ep;
7688 		PMD(PMD_CFB, ("%s: pntd %s failed\n", pmf, devname))
7689 		pm_record_invol_path(devname,
7690 		    (PMC_CONSOLE_FB | PMC_NO_INVOL), 1, 0, 0,
7691 		    DDI_MAJOR_T_NONE);
7692 		for (ep = strrchr(devname, '/'); ep != devname;
7693 		    ep = strrchr(devname, '/')) {
7694 			PMD(PMD_CFB, ("%s: devname %s\n", pmf, devname))
7695 			*ep = '\0';
7696 			dip = pm_name_to_dip(devname, 0);
7697 			if (dip != NULL) {
7698 				/*
7699 				 * Walk up the tree incrementing
7700 				 * devi_pm_noinvolpm
7701 				 */
7702 				(void) pm_noinvol_update(PM_BP_NOINVOL_CFB,
7703 				    0, 0, devname, dip);
7704 				break;
7705 			} else {
7706 				pm_record_invol_path(devname,
7707 				    PMC_NO_INVOL, 1, 0, 0, DDI_MAJOR_T_NONE);
7708 			}
7709 		}
7710 	}
7711 	kmem_free(devname, devname_len);
7712 }
7713 
7714 void
7715 pm_cfb_rele(void)
7716 {
7717 	mutex_enter(&pm_cfb_lock);
7718 	/*
7719 	 * this call isn't using the console any  more, it is ok to take it
7720 	 * down if the count goes to 0
7721 	 */
7722 	cfb_inuse--;
7723 	mutex_exit(&pm_cfb_lock);
7724 }
7725 
7726 /*
7727  * software interrupt handler for fbpm; this function exists because we can't
7728  * bring up the frame buffer power from above lock level.  So if we need to,
7729  * we instead schedule a softint that runs this routine and takes us into
7730  * debug_enter (a bit delayed from the original request, but avoiding a panic).
7731  */
7732 static uint_t
7733 pm_cfb_softint(caddr_t int_handler_arg)
7734 {
7735 	_NOTE(ARGUNUSED(int_handler_arg))
7736 	int rval = DDI_INTR_UNCLAIMED;
7737 
7738 	mutex_enter(&pm_cfb_lock);
7739 	if (pm_soft_pending) {
7740 		mutex_exit(&pm_cfb_lock);
7741 		debug_enter((char *)NULL);
7742 		/* acquired in debug_enter before calling pm_cfb_trigger */
7743 		pm_cfb_rele();
7744 		mutex_enter(&pm_cfb_lock);
7745 		pm_soft_pending = B_FALSE;
7746 		mutex_exit(&pm_cfb_lock);
7747 		rval = DDI_INTR_CLAIMED;
7748 	} else
7749 		mutex_exit(&pm_cfb_lock);
7750 
7751 	return (rval);
7752 }
7753 
7754 void
7755 pm_cfb_setup_intr(void)
7756 {
7757 	PMD_FUNC(pmf, "cfb_setup_intr")
7758 	extern void prom_set_outfuncs(void (*)(void), void (*)(void));
7759 	void pm_cfb_check_and_powerup(void);
7760 
7761 	mutex_init(&pm_cfb_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7762 #ifdef PMDDEBUG
7763 	mutex_init(&pm_debug_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL8));
7764 #endif
7765 
7766 	if (!stdout_is_framebuffer) {
7767 		PMD(PMD_CFB, ("%s: console not fb\n", pmf))
7768 		return;
7769 	}
7770 
7771 	/*
7772 	 * setup software interrupt handler
7773 	 */
7774 	if (ddi_add_softintr(ddi_root_node(), DDI_SOFTINT_HIGH, &pm_soft_id,
7775 	    NULL, NULL, pm_cfb_softint, NULL) != DDI_SUCCESS)
7776 		panic("pm: unable to register soft intr.");
7777 
7778 	prom_set_outfuncs(pm_cfb_check_and_powerup, pm_cfb_rele);
7779 }
7780 
7781 /*
7782  * Checks to see if it is safe to write to the console wrt power management
7783  * (i.e. if the console is a framebuffer, then it must be at full power)
7784  * returns 1 when power is off (power-up is needed)
7785  * returns 0 when power is on (power-up not needed)
7786  */
7787 int
7788 pm_cfb_check_and_hold(void)
7789 {
7790 	/*
7791 	 * cfb_dip is set iff console is a power manageable frame buffer
7792 	 * device
7793 	 */
7794 	extern int modrootloaded;
7795 
7796 	mutex_enter(&pm_cfb_lock);
7797 	cfb_inuse++;
7798 	ASSERT(cfb_inuse);	/* wrap? */
7799 	if (modrootloaded && cfb_dip) {
7800 		/*
7801 		 * don't power down the frame buffer, the prom is using it
7802 		 */
7803 		if (pm_cfb_comps_off) {
7804 			mutex_exit(&pm_cfb_lock);
7805 			return (1);
7806 		}
7807 	}
7808 	mutex_exit(&pm_cfb_lock);
7809 	return (0);
7810 }
7811 
7812 /*
7813  * turn on cfb power (which is known to be off).
7814  * Must be called below lock level!
7815  */
7816 void
7817 pm_cfb_powerup(void)
7818 {
7819 	pm_info_t *info;
7820 	int norm;
7821 	int ccount, ci;
7822 	int unused;
7823 #ifdef DEBUG
7824 	/*
7825 	 * Can't reenter prom_prekern, so suppress pm debug messages
7826 	 * (still go to circular buffer).
7827 	 */
7828 	mutex_enter(&pm_debug_lock);
7829 	pm_divertdebug++;
7830 	mutex_exit(&pm_debug_lock);
7831 #endif
7832 	info = PM_GET_PM_INFO(cfb_dip);
7833 	ASSERT(info);
7834 
7835 	ccount = PM_NUMCMPTS(cfb_dip);
7836 	for (ci = 0; ci < ccount; ci++) {
7837 		norm = pm_get_normal_power(cfb_dip, ci);
7838 		(void) pm_set_power(cfb_dip, ci, norm, PM_LEVEL_UPONLY,
7839 		    PM_CANBLOCK_BYPASS, 0, &unused);
7840 	}
7841 #ifdef DEBUG
7842 	mutex_enter(&pm_debug_lock);
7843 	pm_divertdebug--;
7844 	mutex_exit(&pm_debug_lock);
7845 #endif
7846 }
7847 
7848 /*
7849  * Check if the console framebuffer is powered up.  If not power it up.
7850  * Note: Calling pm_cfb_check_and_hold has put a hold on the power state which
7851  * must be released by calling pm_cfb_rele when the console fb operation
7852  * is completed.
7853  */
7854 void
7855 pm_cfb_check_and_powerup(void)
7856 {
7857 	if (pm_cfb_check_and_hold())
7858 		pm_cfb_powerup();
7859 }
7860 
7861 /*
7862  * Trigger a low level interrupt to power up console frame buffer.
7863  */
7864 void
7865 pm_cfb_trigger(void)
7866 {
7867 	if (cfb_dip == NULL)
7868 		return;
7869 
7870 	mutex_enter(&pm_cfb_lock);
7871 	/*
7872 	 * If the machine appears to be hung, pulling the keyboard connector of
7873 	 * the console will cause a high level interrupt and go to debug_enter.
7874 	 * But, if the fb is powered down, this routine will be called to bring
7875 	 * it up (by generating a softint to do the work). If a second attempt
7876 	 * at triggering this softint happens before the first one completes,
7877 	 * we panic as softints are most likely not being handled.
7878 	 */
7879 	if (pm_soft_pending) {
7880 		panicstr = "pm_cfb_trigger: failed to enter the debugger";
7881 		panic(panicstr);	/* does a power up at any intr level */
7882 		/* NOTREACHED */
7883 	}
7884 	pm_soft_pending = B_TRUE;
7885 	mutex_exit(&pm_cfb_lock);
7886 	ddi_trigger_softintr(pm_soft_id);
7887 }
7888 
7889 static major_t i_path_to_major(char *, char *);
7890 
7891 major_t
7892 pm_path_to_major(char *path)
7893 {
7894 	PMD_FUNC(pmf, "path_to_major")
7895 	char *np, *ap, *bp;
7896 	major_t ret;
7897 	size_t len;
7898 
7899 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, path))
7900 
7901 	np = strrchr(path, '/');
7902 	if (np != NULL)
7903 		np++;
7904 	else
7905 		np = path;
7906 	len = strlen(np) + 1;
7907 	bp = kmem_alloc(len, KM_SLEEP);
7908 	(void) strcpy(bp, np);
7909 	if ((ap = strchr(bp, '@')) != NULL) {
7910 		*ap = '\0';
7911 	}
7912 	PMD(PMD_NOINVOL, ("%s: %d\n", pmf, ddi_name_to_major(np)))
7913 	ret = i_path_to_major(path, np);
7914 	kmem_free(bp, len);
7915 	return (ret);
7916 }
7917 
7918 #ifdef DEBUG
7919 #ifndef sparc
7920 clock_t pt_sleep = 1;
7921 #endif
7922 
7923 char	*pm_msgp;
7924 char	*pm_bufend;
7925 char	*pm_msgbuf = NULL;
7926 int	pm_logpages = 0x100;
7927 #include <sys/sunldi.h>
7928 #include <sys/uio.h>
7929 clock_t	pm_log_sleep = 1000;
7930 int	pm_extra_cr = 1;
7931 volatile int pm_tty = 1;
7932 
7933 #define	PMLOGPGS	pm_logpages
7934 
7935 #if defined(__x86)
7936 void pm_printf(char *s);
7937 #endif
7938 
7939 /*PRINTFLIKE1*/
7940 void
7941 pm_log(const char *fmt, ...)
7942 {
7943 	va_list adx;
7944 	size_t size;
7945 
7946 	mutex_enter(&pm_debug_lock);
7947 	if (pm_msgbuf == NULL) {
7948 		pm_msgbuf = kmem_zalloc(mmu_ptob(PMLOGPGS), KM_SLEEP);
7949 		pm_bufend = pm_msgbuf + mmu_ptob(PMLOGPGS) - 1;
7950 		pm_msgp = pm_msgbuf;
7951 	}
7952 	va_start(adx, fmt);
7953 	size = vsnprintf(NULL, 0, fmt, adx) + 1;
7954 	va_end(adx);
7955 	va_start(adx, fmt);
7956 	if (size > (pm_bufend - pm_msgp)) {		/* wraps */
7957 		bzero(pm_msgp, pm_bufend - pm_msgp);
7958 		(void) vsnprintf(pm_msgbuf, size, fmt, adx);
7959 		if (!pm_divertdebug)
7960 			prom_printf("%s", pm_msgp);
7961 #if defined(__x86)
7962 		if (pm_tty) {
7963 			pm_printf(pm_msgp);
7964 			if (pm_extra_cr)
7965 				pm_printf("\r");
7966 		}
7967 #endif
7968 		pm_msgp = pm_msgbuf + size;
7969 	} else {
7970 		(void) vsnprintf(pm_msgp, size, fmt, adx);
7971 #if defined(__x86)
7972 		if (pm_tty) {
7973 			pm_printf(pm_msgp);
7974 			if (pm_extra_cr)
7975 				pm_printf("\r");
7976 		}
7977 #endif
7978 		if (!pm_divertdebug)
7979 			prom_printf("%s", pm_msgp);
7980 		pm_msgp += size;
7981 	}
7982 	va_end(adx);
7983 	mutex_exit(&pm_debug_lock);
7984 	drv_usecwait((clock_t)pm_log_sleep);
7985 }
7986 #endif	/* DEBUG */
7987 
7988 /*
7989  * We want to save the state of any directly pm'd devices over the suspend/
7990  * resume process so that we can put them back the way the controlling
7991  * process left them.
7992  */
7993 void
7994 pm_save_direct_levels(void)
7995 {
7996 	pm_processes_stopped = 1;
7997 	ddi_walk_devs(ddi_root_node(), pm_save_direct_lvl_walk, 0);
7998 }
7999 
8000 static int
8001 pm_save_direct_lvl_walk(dev_info_t *dip, void *arg)
8002 {
8003 	_NOTE(ARGUNUSED(arg))
8004 	int i;
8005 	int *ip;
8006 	pm_info_t *info = PM_GET_PM_INFO(dip);
8007 
8008 	if (!info)
8009 		return (DDI_WALK_CONTINUE);
8010 
8011 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8012 		if (PM_NUMCMPTS(dip) > 2) {
8013 			info->pmi_lp = kmem_alloc(PM_NUMCMPTS(dip) *
8014 			    sizeof (int), KM_SLEEP);
8015 			ip = info->pmi_lp;
8016 		} else {
8017 			ip = info->pmi_levels;
8018 		}
8019 		/* autopm and processes are stopped, ok not to lock power */
8020 		for (i = 0; i < PM_NUMCMPTS(dip); i++)
8021 			*ip++ = PM_CURPOWER(dip, i);
8022 		/*
8023 		 * There is a small window between stopping the
8024 		 * processes and setting pm_processes_stopped where
8025 		 * a driver could get hung up in a pm_raise_power()
8026 		 * call.  Free any such driver now.
8027 		 */
8028 		pm_proceed(dip, PMP_RELEASE, -1, -1);
8029 	}
8030 
8031 	return (DDI_WALK_CONTINUE);
8032 }
8033 
8034 void
8035 pm_restore_direct_levels(void)
8036 {
8037 	/*
8038 	 * If cpr didn't call pm_save_direct_levels, (because stopping user
8039 	 * threads failed) then we don't want to try to restore them
8040 	 */
8041 	if (!pm_processes_stopped)
8042 		return;
8043 
8044 	ddi_walk_devs(ddi_root_node(), pm_restore_direct_lvl_walk, 0);
8045 	pm_processes_stopped = 0;
8046 }
8047 
8048 static int
8049 pm_restore_direct_lvl_walk(dev_info_t *dip, void *arg)
8050 {
8051 	_NOTE(ARGUNUSED(arg))
8052 	PMD_FUNC(pmf, "restore_direct_lvl_walk")
8053 	int i, nc, result;
8054 	int *ip;
8055 
8056 	pm_info_t *info = PM_GET_PM_INFO(dip);
8057 	if (!info)
8058 		return (DDI_WALK_CONTINUE);
8059 
8060 	if (PM_ISDIRECT(dip) && !PM_ISBC(dip)) {
8061 		if ((nc = PM_NUMCMPTS(dip)) > 2) {
8062 			ip = &info->pmi_lp[nc - 1];
8063 		} else {
8064 			ip = &info->pmi_levels[nc - 1];
8065 		}
8066 		/*
8067 		 * Because fb drivers fail attempts to turn off the
8068 		 * fb when the monitor is on, but treat a request to
8069 		 * turn on the monitor as a request to turn on the
8070 		 * fb too, we process components in descending order
8071 		 * Because autopm is disabled and processes aren't
8072 		 * running, it is ok to examine current power outside
8073 		 * of the power lock
8074 		 */
8075 		for (i = nc - 1; i >= 0; i--, ip--) {
8076 			if (PM_CURPOWER(dip, i) == *ip)
8077 				continue;
8078 			if (pm_set_power(dip, i, *ip, PM_LEVEL_EXACT,
8079 			    PM_CANBLOCK_BYPASS, 0, &result) != DDI_SUCCESS) {
8080 				cmn_err(CE_WARN, "cpr: unable "
8081 				    "to restore power level of "
8082 				    "component %d of directly "
8083 				    "power manged device %s@%s"
8084 				    " to %d",
8085 				    i, PM_NAME(dip),
8086 				    PM_ADDR(dip), *ip);
8087 				PMD(PMD_FAIL, ("%s: failed to restore "
8088 				    "%s@%s(%s#%d)[%d] exact(%d)->%d, "
8089 				    "errno %d\n", pmf, PM_DEVICE(dip), i,
8090 				    PM_CURPOWER(dip, i), *ip, result))
8091 			}
8092 		}
8093 		if (nc > 2) {
8094 			kmem_free(info->pmi_lp, nc * sizeof (int));
8095 			info->pmi_lp = NULL;
8096 		}
8097 	}
8098 	return (DDI_WALK_CONTINUE);
8099 }
8100 
8101 /*
8102  * Stolen from the bootdev module
8103  * attempt to convert a path to a major number
8104  */
8105 static major_t
8106 i_path_to_major(char *path, char *leaf_name)
8107 {
8108 	extern major_t path_to_major(char *pathname);
8109 	major_t maj;
8110 
8111 	if ((maj = path_to_major(path)) == DDI_MAJOR_T_NONE) {
8112 		maj = ddi_name_to_major(leaf_name);
8113 	}
8114 
8115 	return (maj);
8116 }
8117 
8118 static void i_pm_driver_removed(major_t major);
8119 
8120 /*
8121  * When user calls rem_drv, we need to forget no-involuntary-power-cycles state
8122  * An entry in the list means that the device is detached, so we need to
8123  * adjust its ancestors as if they had just seen this attach, and any detached
8124  * ancestors need to have their list entries adjusted.
8125  */
8126 void
8127 pm_driver_removed(major_t major)
8128 {
8129 
8130 	/*
8131 	 * Serialize removal of drivers. This is to keep ancestors of
8132 	 * a node that is being deleted from getting deleted and added back
8133 	 * with different counters.
8134 	 */
8135 	mutex_enter(&pm_remdrv_lock);
8136 	i_pm_driver_removed(major);
8137 	mutex_exit(&pm_remdrv_lock);
8138 }
8139 
8140 static void adjust_ancestors(char *, int);
8141 static int pm_is_noinvol_ancestor(pm_noinvol_t *);
8142 static void pm_noinvol_process_ancestors(char *);
8143 
8144 /*
8145  * This routine is called recursively by pm_noinvol_process_ancestors()
8146  */
8147 static void
8148 i_pm_driver_removed(major_t major)
8149 {
8150 	PMD_FUNC(pmf, "driver_removed")
8151 	pm_noinvol_t *ip, *pp = NULL;
8152 	int wasvolpmd;
8153 	ASSERT(major != DDI_MAJOR_T_NONE);
8154 	PMD(PMD_NOINVOL, ("%s: %s\n", pmf, ddi_major_to_name(major)))
8155 again:
8156 	rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8157 	for (ip = pm_noinvol_head; ip; pp = ip, ip = ip->ni_next) {
8158 		if (major != ip->ni_major)
8159 			continue;
8160 		/*
8161 		 * If it is an ancestor of no-invol node, which is
8162 		 * not removed, skip it. This is to cover the case of
8163 		 * ancestor removed without removing its descendants.
8164 		 */
8165 		if (pm_is_noinvol_ancestor(ip)) {
8166 			ip->ni_flags |= PMC_DRIVER_REMOVED;
8167 			continue;
8168 		}
8169 		wasvolpmd = ip->ni_wasvolpmd;
8170 		/*
8171 		 * remove the entry from the list
8172 		 */
8173 		if (pp) {
8174 			PMD(PMD_NOINVOL, ("%s: freeing %s, prev is %s\n",
8175 			    pmf, ip->ni_path, pp->ni_path))
8176 			pp->ni_next = ip->ni_next;
8177 		} else {
8178 			PMD(PMD_NOINVOL, ("%s: free %s head\n", pmf,
8179 			    ip->ni_path))
8180 			ASSERT(pm_noinvol_head == ip);
8181 			pm_noinvol_head = ip->ni_next;
8182 		}
8183 		rw_exit(&pm_noinvol_rwlock);
8184 		adjust_ancestors(ip->ni_path, wasvolpmd);
8185 		/*
8186 		 * Had an ancestor been removed before this node, it would have
8187 		 * been skipped. Adjust the no-invol counters for such skipped
8188 		 * ancestors.
8189 		 */
8190 		pm_noinvol_process_ancestors(ip->ni_path);
8191 		kmem_free(ip->ni_path, ip->ni_size);
8192 		kmem_free(ip, sizeof (*ip));
8193 		goto again;
8194 	}
8195 	rw_exit(&pm_noinvol_rwlock);
8196 }
8197 
8198 /*
8199  * returns 1, if *aip is a ancestor of a no-invol node
8200  *	   0, otherwise
8201  */
8202 static int
8203 pm_is_noinvol_ancestor(pm_noinvol_t *aip)
8204 {
8205 	pm_noinvol_t *ip;
8206 
8207 	ASSERT(strlen(aip->ni_path) != 0);
8208 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8209 		if (ip == aip)
8210 			continue;
8211 		/*
8212 		 * To be an ancestor, the path must be an initial substring of
8213 		 * the descendent, and end just before a '/' in the
8214 		 * descendent's path.
8215 		 */
8216 		if ((strstr(ip->ni_path, aip->ni_path) == ip->ni_path) &&
8217 		    (ip->ni_path[strlen(aip->ni_path)] == '/'))
8218 			return (1);
8219 	}
8220 	return (0);
8221 }
8222 
8223 /*
8224  * scan through the pm_noinvolpm list adjusting ancestors of the current
8225  * node;  Modifies string *path.
8226  */
8227 static void
8228 adjust_ancestors(char *path, int wasvolpmd)
8229 {
8230 	PMD_FUNC(pmf, "adjust_ancestors")
8231 	char *cp;
8232 	pm_noinvol_t *lp;
8233 	pm_noinvol_t *pp = NULL;
8234 	major_t locked = DDI_MAJOR_T_NONE;
8235 	dev_info_t *dip;
8236 	char	*pathbuf;
8237 	size_t pathbuflen = strlen(path) + 1;
8238 
8239 	/*
8240 	 * First we look up the ancestor's dip.  If we find it, then we
8241 	 * adjust counts up the tree
8242 	 */
8243 	PMD(PMD_NOINVOL, ("%s: %s wasvolpmd %d\n", pmf, path, wasvolpmd))
8244 	pathbuf = kmem_alloc(pathbuflen, KM_SLEEP);
8245 	(void) strcpy(pathbuf, path);
8246 	cp = strrchr(pathbuf, '/');
8247 	if (cp == NULL)	{
8248 		/* if no ancestors, then nothing to do */
8249 		kmem_free(pathbuf, pathbuflen);
8250 		return;
8251 	}
8252 	*cp = '\0';
8253 	dip = pm_name_to_dip(pathbuf, 1);
8254 	if (dip != NULL) {
8255 		locked = PM_MAJOR(dip);
8256 
8257 		(void) pm_noinvol_update(PM_BP_NOINVOL_REMDRV, 0, wasvolpmd,
8258 		    path, dip);
8259 
8260 		if (locked != DDI_MAJOR_T_NONE)
8261 			ddi_release_devi(dip);
8262 	} else {
8263 		char *apath;
8264 		size_t len = strlen(pathbuf) + 1;
8265 		int  lock_held = 1;
8266 
8267 		/*
8268 		 * Now check for ancestors that exist only in the list
8269 		 */
8270 		apath = kmem_alloc(len, KM_SLEEP);
8271 		(void) strcpy(apath, pathbuf);
8272 		rw_enter(&pm_noinvol_rwlock, RW_WRITER);
8273 		for (lp = pm_noinvol_head; lp; pp = lp, lp = lp->ni_next) {
8274 			/*
8275 			 * This can only happen once.  Since we have to drop
8276 			 * the lock, we need to extract the relevant info.
8277 			 */
8278 			if (strcmp(pathbuf, lp->ni_path) == 0) {
8279 				PMD(PMD_NOINVOL, ("%s: %s no %d -> %d\n", pmf,
8280 				    lp->ni_path, lp->ni_noinvolpm,
8281 				    lp->ni_noinvolpm - 1))
8282 				lp->ni_noinvolpm--;
8283 				if (wasvolpmd && lp->ni_volpmd) {
8284 					PMD(PMD_NOINVOL, ("%s: %s vol %d -> "
8285 					    "%d\n", pmf, lp->ni_path,
8286 					    lp->ni_volpmd, lp->ni_volpmd - 1))
8287 					lp->ni_volpmd--;
8288 				}
8289 				/*
8290 				 * remove the entry from the list, if there
8291 				 * are no more no-invol descendants and node
8292 				 * itself is not a no-invol node.
8293 				 */
8294 				if (!(lp->ni_noinvolpm ||
8295 				    (lp->ni_flags & PMC_NO_INVOL))) {
8296 					ASSERT(lp->ni_volpmd == 0);
8297 					if (pp) {
8298 						PMD(PMD_NOINVOL, ("%s: freeing "
8299 						    "%s, prev is %s\n", pmf,
8300 						    lp->ni_path, pp->ni_path))
8301 						pp->ni_next = lp->ni_next;
8302 					} else {
8303 						PMD(PMD_NOINVOL, ("%s: free %s "
8304 						    "head\n", pmf, lp->ni_path))
8305 						ASSERT(pm_noinvol_head == lp);
8306 						pm_noinvol_head = lp->ni_next;
8307 					}
8308 					lock_held = 0;
8309 					rw_exit(&pm_noinvol_rwlock);
8310 					adjust_ancestors(apath, wasvolpmd);
8311 					/* restore apath */
8312 					(void) strcpy(apath, pathbuf);
8313 					kmem_free(lp->ni_path, lp->ni_size);
8314 					kmem_free(lp, sizeof (*lp));
8315 				}
8316 				break;
8317 			}
8318 		}
8319 		if (lock_held)
8320 			rw_exit(&pm_noinvol_rwlock);
8321 		adjust_ancestors(apath, wasvolpmd);
8322 		kmem_free(apath, len);
8323 	}
8324 	kmem_free(pathbuf, pathbuflen);
8325 }
8326 
8327 /*
8328  * Do no-invol processing for any ancestors i.e. adjust counters of ancestors,
8329  * which were skipped even though their drivers were removed.
8330  */
8331 static void
8332 pm_noinvol_process_ancestors(char *path)
8333 {
8334 	pm_noinvol_t *lp;
8335 
8336 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8337 	for (lp = pm_noinvol_head; lp; lp = lp->ni_next) {
8338 		if (strstr(path, lp->ni_path) &&
8339 		    (lp->ni_flags & PMC_DRIVER_REMOVED)) {
8340 			rw_exit(&pm_noinvol_rwlock);
8341 			i_pm_driver_removed(lp->ni_major);
8342 			return;
8343 		}
8344 	}
8345 	rw_exit(&pm_noinvol_rwlock);
8346 }
8347 
8348 /*
8349  * Returns true if (detached) device needs to be kept up because it exported the
8350  * "no-involuntary-power-cycles" property or we're pretending it did (console
8351  * fb case) or it is an ancestor of such a device and has used up the "one
8352  * free cycle" allowed when all such leaf nodes have voluntarily powered down
8353  * upon detach.  In any event, we need an exact hit on the path or we return
8354  * false.
8355  */
8356 int
8357 pm_noinvol_detached(char *path)
8358 {
8359 	PMD_FUNC(pmf, "noinvol_detached")
8360 	pm_noinvol_t *ip;
8361 	int ret = 0;
8362 
8363 	rw_enter(&pm_noinvol_rwlock, RW_READER);
8364 	for (ip = pm_noinvol_head; ip; ip = ip->ni_next) {
8365 		if (strcmp(path, ip->ni_path) == 0) {
8366 			if (ip->ni_flags & PMC_CONSOLE_FB) {
8367 				PMD(PMD_NOINVOL | PMD_CFB, ("%s: inhibits CFB "
8368 				    "%s\n", pmf, path))
8369 				ret = 1;
8370 				break;
8371 			}
8372 #ifdef	DEBUG
8373 			if (ip->ni_noinvolpm != ip->ni_volpmd)
8374 				PMD(PMD_NOINVOL, ("%s: (%d != %d) inhibits %s"
8375 				    "\n", pmf, ip->ni_noinvolpm, ip->ni_volpmd,
8376 				    path))
8377 #endif
8378 			ret = (ip->ni_noinvolpm != ip->ni_volpmd);
8379 			break;
8380 		}
8381 	}
8382 	rw_exit(&pm_noinvol_rwlock);
8383 	return (ret);
8384 }
8385 
8386 int
8387 pm_is_cfb(dev_info_t *dip)
8388 {
8389 	return (dip == cfb_dip);
8390 }
8391 
8392 #ifdef	DEBUG
8393 /*
8394  * Return true if all components of the console frame buffer are at
8395  * "normal" power, i.e., fully on.  For the case where the console is not
8396  * a framebuffer, we also return true
8397  */
8398 int
8399 pm_cfb_is_up(void)
8400 {
8401 	return (pm_cfb_comps_off == 0);
8402 }
8403 #endif
8404 
8405 /*
8406  * Preventing scan from powering down the node by incrementing the
8407  * kidsupcnt.
8408  */
8409 void
8410 pm_hold_power(dev_info_t *dip)
8411 {
8412 	e_pm_hold_rele_power(dip, 1);
8413 }
8414 
8415 /*
8416  * Releasing the hold by decrementing the kidsupcnt allowing scan
8417  * to power down the node if all conditions are met.
8418  */
8419 void
8420 pm_rele_power(dev_info_t *dip)
8421 {
8422 	e_pm_hold_rele_power(dip, -1);
8423 }
8424 
8425 /*
8426  * A wrapper of pm_all_to_normal() to power up a dip
8427  * to its normal level
8428  */
8429 int
8430 pm_powerup(dev_info_t *dip)
8431 {
8432 	PMD_FUNC(pmf, "pm_powerup")
8433 
8434 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
8435 	ASSERT(!(servicing_interrupt()));
8436 
8437 	/*
8438 	 * in case this node is not already participating pm
8439 	 */
8440 	if (!PM_GET_PM_INFO(dip)) {
8441 		if (!DEVI_IS_ATTACHING(dip))
8442 			return (DDI_SUCCESS);
8443 		if (pm_start(dip) != DDI_SUCCESS)
8444 			return (DDI_FAILURE);
8445 		if (!PM_GET_PM_INFO(dip))
8446 			return (DDI_SUCCESS);
8447 	}
8448 
8449 	return (pm_all_to_normal(dip, PM_CANBLOCK_BLOCK));
8450 }
8451 
8452 int
8453 pm_rescan_walk(dev_info_t *dip, void *arg)
8454 {
8455 	_NOTE(ARGUNUSED(arg))
8456 
8457 	if (!PM_GET_PM_INFO(dip) || PM_ISBC(dip))
8458 		return (DDI_WALK_CONTINUE);
8459 
8460 	/*
8461 	 * Currently pm_cpr_callb/resume code is the only caller
8462 	 * and it needs to make sure that stopped scan get
8463 	 * reactivated. Otherwise, rescan walk needn't reactive
8464 	 * stopped scan.
8465 	 */
8466 	pm_scan_init(dip);
8467 
8468 	(void) pm_rescan(dip);
8469 	return (DDI_WALK_CONTINUE);
8470 }
8471 
8472 static dev_info_t *
8473 pm_get_next_descendent(dev_info_t *dip, dev_info_t *tdip)
8474 {
8475 	dev_info_t *wdip, *pdip;
8476 
8477 	for (wdip = tdip; wdip != dip; wdip = pdip) {
8478 		pdip = ddi_get_parent(wdip);
8479 		if (pdip == dip)
8480 			return (wdip);
8481 	}
8482 	return (NULL);
8483 }
8484 
8485 int
8486 pm_busop_bus_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8487     void *arg, void *result)
8488 {
8489 	PMD_FUNC(pmf, "bp_bus_power")
8490 	dev_info_t	*cdip;
8491 	pm_info_t	*cinfo;
8492 	pm_bp_child_pwrchg_t	*bpc;
8493 	pm_sp_misc_t		*pspm;
8494 	pm_bp_nexus_pwrup_t *bpn;
8495 	pm_bp_child_pwrchg_t new_bpc;
8496 	pm_bp_noinvol_t *bpi;
8497 	dev_info_t *tdip;
8498 	char *pathbuf;
8499 	int		ret = DDI_SUCCESS;
8500 	int		errno = 0;
8501 	pm_component_t *cp;
8502 
8503 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8504 	    pm_decode_op(op)))
8505 	switch (op) {
8506 	case BUS_POWER_CHILD_PWRCHG:
8507 		bpc = (pm_bp_child_pwrchg_t *)arg;
8508 		pspm = (pm_sp_misc_t *)bpc->bpc_private;
8509 		tdip = bpc->bpc_dip;
8510 		cdip = pm_get_next_descendent(dip, tdip);
8511 		cinfo = PM_GET_PM_INFO(cdip);
8512 		if (cdip != tdip) {
8513 			/*
8514 			 * If the node is an involved parent, it needs to
8515 			 * power up the node as it is needed.  There is nothing
8516 			 * else the framework can do here.
8517 			 */
8518 			if (PM_WANTS_NOTIFICATION(cdip)) {
8519 				PMD(PMD_SET, ("%s: call bus_power for "
8520 				    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(cdip)))
8521 				return ((*PM_BUS_POWER_FUNC(cdip))(cdip,
8522 				    impl_arg, op, arg, result));
8523 			}
8524 			ASSERT(pspm->pspm_direction == PM_LEVEL_UPONLY ||
8525 			    pspm->pspm_direction == PM_LEVEL_DOWNONLY ||
8526 			    pspm->pspm_direction == PM_LEVEL_EXACT);
8527 			/*
8528 			 * we presume that the parent needs to be up in
8529 			 * order for the child to change state (either
8530 			 * because it must already be on if the child is on
8531 			 * (and the pm_all_to_normal_nexus() will be a nop)
8532 			 * or because it will need to be on for the child
8533 			 * to come on; so we make the call regardless
8534 			 */
8535 			pm_hold_power(cdip);
8536 			if (cinfo) {
8537 				pm_canblock_t canblock = pspm->pspm_canblock;
8538 				ret = pm_all_to_normal_nexus(cdip, canblock);
8539 				if (ret != DDI_SUCCESS) {
8540 					pm_rele_power(cdip);
8541 					return (ret);
8542 				}
8543 			}
8544 			PMD(PMD_SET, ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8545 			    PM_DEVICE(cdip)))
8546 			ret = pm_busop_bus_power(cdip, impl_arg, op, arg,
8547 			    result);
8548 			pm_rele_power(cdip);
8549 		} else {
8550 			ret = pm_busop_set_power(cdip, impl_arg, op, arg,
8551 			    result);
8552 		}
8553 		return (ret);
8554 
8555 	case BUS_POWER_NEXUS_PWRUP:
8556 		bpn = (pm_bp_nexus_pwrup_t *)arg;
8557 		pspm = (pm_sp_misc_t *)bpn->bpn_private;
8558 
8559 		if (!e_pm_valid_info(dip, NULL) ||
8560 		    !e_pm_valid_comp(dip, bpn->bpn_comp, &cp) ||
8561 		    !e_pm_valid_power(dip, bpn->bpn_comp, bpn->bpn_level)) {
8562 			PMD(PMD_SET, ("%s: %s@%s(%s#%d) has no pm info; EIO\n",
8563 			    pmf, PM_DEVICE(dip)))
8564 			*pspm->pspm_errnop = EIO;
8565 			*(int *)result = DDI_FAILURE;
8566 			return (DDI_FAILURE);
8567 		}
8568 
8569 		ASSERT(bpn->bpn_dip == dip);
8570 		PMD(PMD_SET, ("%s: nexus powerup for %s@%s(%s#%d)\n", pmf,
8571 		    PM_DEVICE(dip)))
8572 		new_bpc.bpc_dip = dip;
8573 		pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8574 		new_bpc.bpc_path = ddi_pathname(dip, pathbuf);
8575 		new_bpc.bpc_comp = bpn->bpn_comp;
8576 		new_bpc.bpc_olevel = PM_CURPOWER(dip, bpn->bpn_comp);
8577 		new_bpc.bpc_nlevel = bpn->bpn_level;
8578 		new_bpc.bpc_private = bpn->bpn_private;
8579 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_direction =
8580 		    PM_LEVEL_UPONLY;
8581 		((pm_sp_misc_t *)(new_bpc.bpc_private))->pspm_errnop =
8582 		    &errno;
8583 		ret = pm_busop_set_power(dip, impl_arg, BUS_POWER_CHILD_PWRCHG,
8584 		    (void *)&new_bpc, result);
8585 		kmem_free(pathbuf, MAXPATHLEN);
8586 		return (ret);
8587 
8588 	case BUS_POWER_NOINVOL:
8589 		bpi = (pm_bp_noinvol_t *)arg;
8590 		tdip = bpi->bpni_dip;
8591 		cdip = pm_get_next_descendent(dip, tdip);
8592 
8593 		/* In case of rem_drv, the leaf node has been removed */
8594 		if (cdip == NULL)
8595 			return (DDI_SUCCESS);
8596 
8597 		cinfo = PM_GET_PM_INFO(cdip);
8598 		if (cdip != tdip) {
8599 			if (PM_WANTS_NOTIFICATION(cdip)) {
8600 				PMD(PMD_NOINVOL,
8601 				    ("%s: call bus_power for %s@%s(%s#%d)\n",
8602 				    pmf, PM_DEVICE(cdip)))
8603 				ret = (*PM_BUS_POWER_FUNC(cdip))
8604 				    (cdip, NULL, op, arg, result);
8605 				if ((cinfo) && (ret == DDI_SUCCESS))
8606 					(void) pm_noinvol_update_node(cdip,
8607 					    bpi);
8608 				return (ret);
8609 			} else {
8610 				PMD(PMD_NOINVOL,
8611 				    ("%s: walk down to %s@%s(%s#%d)\n", pmf,
8612 				    PM_DEVICE(cdip)))
8613 				ret = pm_busop_bus_power(cdip, NULL, op,
8614 				    arg, result);
8615 				/*
8616 				 * Update the current node.
8617 				 */
8618 				if ((cinfo) && (ret == DDI_SUCCESS))
8619 					(void) pm_noinvol_update_node(cdip,
8620 					    bpi);
8621 				return (ret);
8622 			}
8623 		} else {
8624 			/*
8625 			 * For attach, detach, power up:
8626 			 * Do nothing for leaf node since its
8627 			 * counts are already updated.
8628 			 * For CFB and driver removal, since the
8629 			 * path and the target dip passed in is up to and incl.
8630 			 * the immediate ancestor, need to do the update.
8631 			 */
8632 			PMD(PMD_NOINVOL, ("%s: target %s@%s(%s#%d) is "
8633 			    "reached\n", pmf, PM_DEVICE(cdip)))
8634 			if (cinfo && ((bpi->bpni_cmd == PM_BP_NOINVOL_REMDRV) ||
8635 			    (bpi->bpni_cmd == PM_BP_NOINVOL_CFB)))
8636 				(void) pm_noinvol_update_node(cdip, bpi);
8637 			return (DDI_SUCCESS);
8638 		}
8639 
8640 	default:
8641 		PMD(PMD_SET, ("%s: operation %d is not supported!\n", pmf, op))
8642 		return (DDI_FAILURE);
8643 	}
8644 }
8645 
8646 static int
8647 pm_busop_set_power(dev_info_t *dip, void *impl_arg, pm_bus_power_op_t op,
8648     void *arg, void *resultp)
8649 {
8650 	_NOTE(ARGUNUSED(impl_arg))
8651 	PMD_FUNC(pmf, "bp_set_power")
8652 	pm_ppm_devlist_t *devl = NULL;
8653 	int clevel, circ;
8654 #ifdef	DEBUG
8655 	int circ_db, ccirc_db;
8656 #endif
8657 	int ret = DDI_SUCCESS;
8658 	dev_info_t *cdip;
8659 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
8660 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
8661 	pm_canblock_t canblock = pspm->pspm_canblock;
8662 	int scan = pspm->pspm_scan;
8663 	int comp = bpc->bpc_comp;
8664 	int olevel = bpc->bpc_olevel;
8665 	int nlevel = bpc->bpc_nlevel;
8666 	int comps_off_incr = 0;
8667 	dev_info_t *pdip = ddi_get_parent(dip);
8668 	int dodeps;
8669 	int direction = pspm->pspm_direction;
8670 	int *errnop = pspm->pspm_errnop;
8671 #ifdef PMDDEBUG
8672 	char *dir = pm_decode_direction(direction);
8673 #endif
8674 	int *iresp = (int *)resultp;
8675 	time_t	idletime, thresh;
8676 	pm_component_t *cp = PM_CP(dip, comp);
8677 	int work_type;
8678 
8679 	*iresp = DDI_SUCCESS;
8680 	*errnop = 0;
8681 	ASSERT(op == BUS_POWER_CHILD_PWRCHG);
8682 	PMD(PMD_SET, ("%s: %s@%s(%s#%d) %s\n", pmf, PM_DEVICE(dip),
8683 	    pm_decode_op(op)))
8684 
8685 	/*
8686 	 * The following set of conditions indicate we are here to handle a
8687 	 * driver's pm_[raise|lower]_power request, but the device is being
8688 	 * power managed (PM_DIRECT_PM) by a user process.  For that case
8689 	 * we want to pm_block and pass a status back to the caller based
8690 	 * on whether the controlling process's next activity on the device
8691 	 * matches the current request or not.  This distinction tells
8692 	 * downstream functions to avoid calling into a driver or changing
8693 	 * the framework's power state.  To actually block, we need:
8694 	 *
8695 	 * PM_ISDIRECT(dip)
8696 	 *	no reason to block unless a process is directly controlling dev
8697 	 * direction != PM_LEVEL_EXACT
8698 	 *	EXACT is used by controlling proc's PM_SET_CURRENT_POWER ioctl
8699 	 * !pm_processes_stopped
8700 	 *	don't block if controlling proc already be stopped for cpr
8701 	 * canblock != PM_CANBLOCK_BYPASS
8702 	 *	our caller must not have explicitly prevented blocking
8703 	 */
8704 	if (direction != PM_LEVEL_EXACT && canblock != PM_CANBLOCK_BYPASS) {
8705 		PM_LOCK_DIP(dip);
8706 		while (PM_ISDIRECT(dip) && !pm_processes_stopped) {
8707 			/* releases dip lock */
8708 			ret = pm_busop_match_request(dip, bpc);
8709 			if (ret == EAGAIN) {
8710 				PM_LOCK_DIP(dip);
8711 				continue;
8712 			}
8713 			return (*iresp = ret);
8714 		}
8715 		PM_UNLOCK_DIP(dip);
8716 	}
8717 	/* BC device is never scanned, so power will stick until we are done */
8718 	if (PM_ISBC(dip) && comp != 0 && nlevel != 0 &&
8719 	    direction != PM_LEVEL_DOWNONLY) {
8720 		int nrmpwr0 = pm_get_normal_power(dip, 0);
8721 		if (pm_set_power(dip, 0, nrmpwr0, direction,
8722 		    canblock, 0, resultp) != DDI_SUCCESS) {
8723 			/* *resultp set by pm_set_power */
8724 			return (DDI_FAILURE);
8725 		}
8726 	}
8727 	if (PM_WANTS_NOTIFICATION(pdip)) {
8728 		PMD(PMD_SET, ("%s: pre_notify %s@%s(%s#%d) for child "
8729 		    "%s@%s(%s#%d)\n", pmf, PM_DEVICE(pdip), PM_DEVICE(dip)))
8730 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8731 		    BUS_POWER_PRE_NOTIFICATION, bpc, resultp);
8732 		if (ret != DDI_SUCCESS) {
8733 			PMD(PMD_SET, ("%s: failed to pre_notify %s@%s(%s#%d)\n",
8734 			    pmf, PM_DEVICE(pdip)))
8735 			return (DDI_FAILURE);
8736 		}
8737 	} else {
8738 		/*
8739 		 * Since we don't know what the actual power level is,
8740 		 * we place a power hold on the parent no matter what
8741 		 * component and level is changing.
8742 		 */
8743 		pm_hold_power(pdip);
8744 	}
8745 	PM_LOCK_POWER(dip, &circ);
8746 	clevel = PM_CURPOWER(dip, comp);
8747 	/*
8748 	 * It's possible that a call was made to pm_update_maxpower()
8749 	 * on another thread before we took the lock above. So, we need to
8750 	 * make sure that this request isn't processed after the
8751 	 * change of power executed on behalf of pm_update_maxpower().
8752 	 */
8753 	if (nlevel > pm_get_normal_power(dip, comp)) {
8754 		PMD(PMD_SET, ("%s: requested level is higher than normal.\n",
8755 		    pmf))
8756 		ret = DDI_FAILURE;
8757 		*iresp = DDI_FAILURE;
8758 		goto post_notify;
8759 	}
8760 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, olvl=%d, nlvl=%d, clvl=%d, "
8761 	    "dir=%s\n", pmf, PM_DEVICE(dip), comp, bpc->bpc_olevel, nlevel,
8762 	    clevel, dir))
8763 	switch (direction) {
8764 	case PM_LEVEL_UPONLY:
8765 		/* Powering up */
8766 		if (clevel >= nlevel) {
8767 			PMD(PMD_SET, ("%s: current level is already "
8768 			    "at or above the requested level.\n", pmf))
8769 			*iresp = DDI_SUCCESS;
8770 			ret = DDI_SUCCESS;
8771 			goto post_notify;
8772 		}
8773 		break;
8774 	case PM_LEVEL_EXACT:
8775 		/* specific level request */
8776 		if (clevel == nlevel && !PM_ISBC(dip)) {
8777 			PMD(PMD_SET, ("%s: current level is already "
8778 			    "at the requested level.\n", pmf))
8779 			*iresp = DDI_SUCCESS;
8780 			ret = DDI_SUCCESS;
8781 			goto post_notify;
8782 		} else if (PM_IS_CFB(dip) && (nlevel < clevel)) {
8783 			PMD(PMD_CFB, ("%s: powerdown of console\n", pmf))
8784 			if (!pm_cfb_enabled) {
8785 				PMD(PMD_ERROR | PMD_CFB,
8786 				    ("%s: !pm_cfb_enabled, fails\n", pmf))
8787 				*errnop = EINVAL;
8788 				*iresp = DDI_FAILURE;
8789 				ret = DDI_FAILURE;
8790 				goto post_notify;
8791 			}
8792 			mutex_enter(&pm_cfb_lock);
8793 			while (cfb_inuse) {
8794 				mutex_exit(&pm_cfb_lock);
8795 				if (delay_sig(1) == EINTR) {
8796 					ret = DDI_FAILURE;
8797 					*iresp = DDI_FAILURE;
8798 					*errnop = EINTR;
8799 					goto post_notify;
8800 				}
8801 				mutex_enter(&pm_cfb_lock);
8802 			}
8803 			mutex_exit(&pm_cfb_lock);
8804 		}
8805 		break;
8806 	case PM_LEVEL_DOWNONLY:
8807 		/* Powering down */
8808 		thresh = cur_threshold(dip, comp);
8809 		idletime = gethrestime_sec() - cp->pmc_timestamp;
8810 		if (scan && ((PM_KUC(dip) != 0) ||
8811 		    (cp->pmc_busycount > 0) ||
8812 		    ((idletime < thresh) && !PM_IS_PID(dip)))) {
8813 #ifdef	DEBUG
8814 			if (DEVI(dip)->devi_pm_kidsupcnt != 0)
8815 				PMD(PMD_SET, ("%s: scan failed: "
8816 				    "kidsupcnt != 0\n", pmf))
8817 			if (cp->pmc_busycount > 0)
8818 				PMD(PMD_SET, ("%s: scan failed: "
8819 				    "device become busy\n", pmf))
8820 			if (idletime < thresh)
8821 				PMD(PMD_SET, ("%s: scan failed: device "
8822 				    "hasn't been idle long enough\n", pmf))
8823 #endif
8824 			*iresp = DDI_FAILURE;
8825 			*errnop = EBUSY;
8826 			ret = DDI_FAILURE;
8827 			goto post_notify;
8828 		} else if (clevel != PM_LEVEL_UNKNOWN && clevel <= nlevel) {
8829 			PMD(PMD_SET, ("%s: current level is already at "
8830 			    "or below the requested level.\n", pmf))
8831 			*iresp = DDI_SUCCESS;
8832 			ret = DDI_SUCCESS;
8833 			goto post_notify;
8834 		}
8835 		break;
8836 	}
8837 
8838 	if (PM_IS_CFB(dip) && (comps_off_incr =
8839 	    calc_cfb_comps_incr(dip, comp, clevel, nlevel)) > 0) {
8840 		/*
8841 		 * Pre-adjust pm_cfb_comps_off if lowering a console fb
8842 		 * component from full power.  Remember that we tried to
8843 		 * lower power in case it fails and we need to back out
8844 		 * the adjustment.
8845 		 */
8846 		update_comps_off(comps_off_incr, dip);
8847 		PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d cfb_comps_off->%d\n",
8848 		    pmf, PM_DEVICE(dip), comp, clevel, nlevel,
8849 		    pm_cfb_comps_off))
8850 	}
8851 
8852 	if ((*iresp = power_dev(dip,
8853 	    comp, nlevel, clevel, canblock, &devl)) == DDI_SUCCESS) {
8854 #ifdef DEBUG
8855 		/*
8856 		 * All descendents of this node should already be powered off.
8857 		 */
8858 		if (PM_CURPOWER(dip, comp) == 0) {
8859 			pm_desc_pwrchk_t pdpchk;
8860 			pdpchk.pdpc_dip = dip;
8861 			pdpchk.pdpc_par_involved = PM_WANTS_NOTIFICATION(dip);
8862 			ndi_devi_enter(dip, &circ_db);
8863 			for (cdip = ddi_get_child(dip); cdip != NULL;
8864 			    cdip = ddi_get_next_sibling(cdip)) {
8865 				ndi_devi_enter(cdip, &ccirc_db);
8866 				ddi_walk_devs(cdip, pm_desc_pwrchk_walk,
8867 				    (void *)&pdpchk);
8868 				ndi_devi_exit(cdip, ccirc_db);
8869 			}
8870 			ndi_devi_exit(dip, circ_db);
8871 		}
8872 #endif
8873 		/*
8874 		 * Post-adjust pm_cfb_comps_off if we brought an fb component
8875 		 * back up to full power.
8876 		 */
8877 		if (PM_IS_CFB(dip) && comps_off_incr < 0) {
8878 			update_comps_off(comps_off_incr, dip);
8879 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8880 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8881 			    comp, clevel, nlevel, pm_cfb_comps_off))
8882 		}
8883 		dodeps = 0;
8884 		if (POWERING_OFF(clevel, nlevel)) {
8885 			if (PM_ISBC(dip)) {
8886 				dodeps = (comp == 0);
8887 			} else {
8888 				int i;
8889 				dodeps = 1;
8890 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8891 					/* if some component still on */
8892 					if (PM_CURPOWER(dip, i)) {
8893 						dodeps = 0;
8894 						break;
8895 					}
8896 				}
8897 			}
8898 			if (dodeps)
8899 				work_type = PM_DEP_WK_POWER_OFF;
8900 		} else if (POWERING_ON(clevel, nlevel)) {
8901 			if (PM_ISBC(dip)) {
8902 				dodeps = (comp == 0);
8903 			} else {
8904 				int i;
8905 				dodeps = 1;
8906 				for (i = 0; i < PM_NUMCMPTS(dip); i++) {
8907 					if (i == comp)
8908 						continue;
8909 					if (PM_CURPOWER(dip, i) > 0) {
8910 						dodeps = 0;
8911 						break;
8912 					}
8913 				}
8914 			}
8915 			if (dodeps)
8916 				work_type = PM_DEP_WK_POWER_ON;
8917 		}
8918 
8919 		if (dodeps) {
8920 			char *pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8921 
8922 			(void) ddi_pathname(dip, pathbuf);
8923 			pm_dispatch_to_dep_thread(work_type, pathbuf, NULL,
8924 			    PM_DEP_NOWAIT, NULL, 0);
8925 			kmem_free(pathbuf, MAXPATHLEN);
8926 		}
8927 		if ((PM_CURPOWER(dip, comp) == nlevel) && pm_watchers()) {
8928 			int old;
8929 
8930 			/* If old power cached during deadlock, use it. */
8931 			old = (cp->pmc_flags & PM_PHC_WHILE_SET_POWER ?
8932 			    cp->pmc_phc_pwr : olevel);
8933 			mutex_enter(&pm_rsvp_lock);
8934 			pm_enqueue_notify(PSC_HAS_CHANGED, dip, comp, nlevel,
8935 			    old, canblock);
8936 			pm_enqueue_notify_others(&devl, canblock);
8937 			mutex_exit(&pm_rsvp_lock);
8938 		} else {
8939 			pm_ppm_devlist_t *p;
8940 			pm_ppm_devlist_t *next;
8941 			for (p = devl; p != NULL; p = next) {
8942 				next = p->ppd_next;
8943 				kmem_free(p, sizeof (pm_ppm_devlist_t));
8944 			}
8945 			devl = NULL;
8946 		}
8947 
8948 		/*
8949 		 * If we are coming from a scan, don't do it again,
8950 		 * else we can have infinite loops.
8951 		 */
8952 		if (!scan)
8953 			pm_rescan(dip);
8954 	} else {
8955 		/* if we incremented pm_comps_off_count, but failed */
8956 		if (comps_off_incr > 0) {
8957 			update_comps_off(-comps_off_incr, dip);
8958 			PMD(PMD_CFB, ("%s: %s@%s(%s#%d)[%d] %d->%d "
8959 			    "cfb_comps_off->%d\n", pmf, PM_DEVICE(dip),
8960 			    comp, clevel, nlevel, pm_cfb_comps_off))
8961 		}
8962 		*errnop = EIO;
8963 	}
8964 
8965 post_notify:
8966 	/*
8967 	 * This thread may have been in deadlock with pm_power_has_changed.
8968 	 * Before releasing power lock, clear the flag which marks this
8969 	 * condition.
8970 	 */
8971 	cp->pmc_flags &= ~PM_PHC_WHILE_SET_POWER;
8972 
8973 	/*
8974 	 * Update the old power level in the bus power structure with the
8975 	 * actual power level before the transition was made to the new level.
8976 	 * Some involved parents depend on this information to keep track of
8977 	 * their children's power transition.
8978 	 */
8979 	if (*iresp != DDI_FAILURE)
8980 		bpc->bpc_olevel = clevel;
8981 
8982 	if (PM_WANTS_NOTIFICATION(pdip)) {
8983 		ret = (*PM_BUS_POWER_FUNC(pdip))(pdip, NULL,
8984 		    BUS_POWER_POST_NOTIFICATION, bpc, resultp);
8985 		PM_UNLOCK_POWER(dip, circ);
8986 		PMD(PMD_SET, ("%s: post_notify %s@%s(%s#%d) for "
8987 		    "child %s@%s(%s#%d), ret=%d\n", pmf, PM_DEVICE(pdip),
8988 		    PM_DEVICE(dip), ret))
8989 	} else {
8990 		nlevel = cur_power(cp); /* in case phc deadlock updated pwr */
8991 		PM_UNLOCK_POWER(dip, circ);
8992 		/*
8993 		 * Now that we know what power transition has occurred
8994 		 * (if any), release the power hold.  Leave the hold
8995 		 * in effect in the case of OFF->ON transition.
8996 		 */
8997 		if (!(clevel == 0 && nlevel > 0 &&
8998 		    (!PM_ISBC(dip) || comp == 0)))
8999 			pm_rele_power(pdip);
9000 		/*
9001 		 * If the power transition was an ON->OFF transition,
9002 		 * remove the power hold from the parent.
9003 		 */
9004 		if ((clevel > 0 || clevel == PM_LEVEL_UNKNOWN) &&
9005 		    nlevel == 0 && (!PM_ISBC(dip) || comp == 0))
9006 			pm_rele_power(pdip);
9007 	}
9008 	if (*iresp != DDI_SUCCESS || ret != DDI_SUCCESS)
9009 		return (DDI_FAILURE);
9010 	else
9011 		return (DDI_SUCCESS);
9012 }
9013 
9014 /*
9015  * If an app (SunVTS or Xsun) has taken control, then block until it
9016  * gives it up or makes the requested power level change, unless
9017  * we have other instructions about blocking.  Returns DDI_SUCCESS,
9018  * DDI_FAILURE or EAGAIN (owner released device from directpm).
9019  */
9020 static int
9021 pm_busop_match_request(dev_info_t *dip, void *arg)
9022 {
9023 	PMD_FUNC(pmf, "bp_match_request")
9024 	pm_bp_child_pwrchg_t *bpc = (pm_bp_child_pwrchg_t *)arg;
9025 	pm_sp_misc_t *pspm = (pm_sp_misc_t *)bpc->bpc_private;
9026 	int comp = bpc->bpc_comp;
9027 	int nlevel = bpc->bpc_nlevel;
9028 	pm_canblock_t canblock = pspm->pspm_canblock;
9029 	int direction = pspm->pspm_direction;
9030 	int clevel, circ;
9031 
9032 	ASSERT(PM_IAM_LOCKING_DIP(dip));
9033 	PM_LOCK_POWER(dip, &circ);
9034 	clevel = PM_CURPOWER(dip, comp);
9035 	PMD(PMD_SET, ("%s: %s@%s(%s#%d), cmp=%d, nlvl=%d, clvl=%d\n",
9036 	    pmf, PM_DEVICE(dip), comp, nlevel, clevel))
9037 	if (direction == PM_LEVEL_UPONLY) {
9038 		if (clevel >= nlevel) {
9039 			PM_UNLOCK_POWER(dip, circ);
9040 			PM_UNLOCK_DIP(dip);
9041 			return (DDI_SUCCESS);
9042 		}
9043 	} else if (clevel == nlevel) {
9044 		PM_UNLOCK_POWER(dip, circ);
9045 		PM_UNLOCK_DIP(dip);
9046 		return (DDI_SUCCESS);
9047 	}
9048 	if (canblock == PM_CANBLOCK_FAIL) {
9049 		PM_UNLOCK_POWER(dip, circ);
9050 		PM_UNLOCK_DIP(dip);
9051 		return (DDI_FAILURE);
9052 	}
9053 	if (canblock == PM_CANBLOCK_BLOCK) {
9054 		/*
9055 		 * To avoid a deadlock, we must not hold the
9056 		 * power lock when we pm_block.
9057 		 */
9058 		PM_UNLOCK_POWER(dip, circ);
9059 		PMD(PMD_SET, ("%s: blocking\n", pmf))
9060 		/* pm_block releases dip lock */
9061 		switch (pm_block(dip, comp, nlevel, clevel)) {
9062 		case PMP_RELEASE:
9063 			return (EAGAIN);
9064 		case PMP_SUCCEED:
9065 			return (DDI_SUCCESS);
9066 		case PMP_FAIL:
9067 			return (DDI_FAILURE);
9068 		}
9069 	} else {
9070 		ASSERT(0);
9071 	}
9072 	_NOTE(NOTREACHED);
9073 	return (DDI_FAILURE);	/* keep gcc happy */
9074 }
9075 
9076 static int
9077 pm_all_to_normal_nexus(dev_info_t *dip, pm_canblock_t canblock)
9078 {
9079 	PMD_FUNC(pmf, "all_to_normal_nexus")
9080 	int		*normal;
9081 	int		i, ncomps;
9082 	size_t		size;
9083 	int		changefailed = 0;
9084 	int		ret, result = DDI_SUCCESS;
9085 	pm_bp_nexus_pwrup_t	bpn;
9086 	pm_sp_misc_t	pspm;
9087 
9088 	ASSERT(PM_GET_PM_INFO(dip));
9089 	PMD(PMD_ALLNORM, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9090 	if (pm_get_norm_pwrs(dip, &normal, &size) != DDI_SUCCESS) {
9091 		PMD(PMD_ALLNORM, ("%s: can't get norm pwrs\n", pmf))
9092 		return (DDI_FAILURE);
9093 	}
9094 	ncomps = PM_NUMCMPTS(dip);
9095 	for (i = 0; i < ncomps; i++) {
9096 		bpn.bpn_dip = dip;
9097 		bpn.bpn_comp = i;
9098 		bpn.bpn_level = normal[i];
9099 		pspm.pspm_canblock = canblock;
9100 		pspm.pspm_scan = 0;
9101 		bpn.bpn_private = &pspm;
9102 		ret = pm_busop_bus_power(dip, NULL, BUS_POWER_NEXUS_PWRUP,
9103 		    (void *)&bpn, (void *)&result);
9104 		if (ret != DDI_SUCCESS || result != DDI_SUCCESS) {
9105 			PMD(PMD_FAIL | PMD_ALLNORM, ("%s: %s@%s(%s#%d)[%d] "
9106 			    "->%d failure result %d\n", pmf, PM_DEVICE(dip),
9107 			    i, normal[i], result))
9108 			changefailed++;
9109 		}
9110 	}
9111 	kmem_free(normal, size);
9112 	if (changefailed) {
9113 		PMD(PMD_FAIL, ("%s: failed to set %d comps %s@%s(%s#%d) "
9114 		    "full power\n", pmf, changefailed, PM_DEVICE(dip)))
9115 		return (DDI_FAILURE);
9116 	}
9117 	return (DDI_SUCCESS);
9118 }
9119 
9120 int
9121 pm_noinvol_update(int subcmd, int volpmd, int wasvolpmd, char *path,
9122     dev_info_t *tdip)
9123 {
9124 	PMD_FUNC(pmf, "noinvol_update")
9125 	pm_bp_noinvol_t args;
9126 	int ret;
9127 	int result = DDI_SUCCESS;
9128 
9129 	args.bpni_path = path;
9130 	args.bpni_dip = tdip;
9131 	args.bpni_cmd = subcmd;
9132 	args.bpni_wasvolpmd = wasvolpmd;
9133 	args.bpni_volpmd = volpmd;
9134 	PMD(PMD_NOINVOL, ("%s: update for path %s tdip %p subcmd %d "
9135 	    "volpmd %d wasvolpmd %d\n", pmf,
9136 	    path, (void *)tdip, subcmd, wasvolpmd, volpmd))
9137 	ret = pm_busop_bus_power(ddi_root_node(), NULL, BUS_POWER_NOINVOL,
9138 	    &args, &result);
9139 	return (ret);
9140 }
9141 
9142 void
9143 pm_noinvol_update_node(dev_info_t *dip, pm_bp_noinvol_t *req)
9144 {
9145 	PMD_FUNC(pmf, "noinvol_update_node")
9146 
9147 	PMD(PMD_NOINVOL, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9148 	switch (req->bpni_cmd) {
9149 	case PM_BP_NOINVOL_ATTACH:
9150 		PMD(PMD_NOINVOL, ("%s: PM_PB_NOINVOL_ATTACH %s@%s(%s#%d) "
9151 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9152 		    DEVI(dip)->devi_pm_noinvolpm,
9153 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9154 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9155 		PM_LOCK_DIP(dip);
9156 		DEVI(dip)->devi_pm_noinvolpm--;
9157 		if (req->bpni_wasvolpmd) {
9158 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_ATTACH "
9159 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9160 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9161 			    DEVI(dip)->devi_pm_volpmd - 1))
9162 			if (DEVI(dip)->devi_pm_volpmd)
9163 				DEVI(dip)->devi_pm_volpmd--;
9164 		}
9165 		PM_UNLOCK_DIP(dip);
9166 		break;
9167 
9168 	case PM_BP_NOINVOL_DETACH:
9169 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH %s@%s(%s#%d) "
9170 		    "noinvolpm %d->%d\n", pmf, PM_DEVICE(dip),
9171 		    DEVI(dip)->devi_pm_noinvolpm,
9172 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9173 		PM_LOCK_DIP(dip);
9174 		DEVI(dip)->devi_pm_noinvolpm++;
9175 		if (req->bpni_wasvolpmd) {
9176 			PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_DETACH "
9177 			    "%s@%s(%s#%d) volpmd %d->%d\n", pmf,
9178 			    PM_DEVICE(dip), DEVI(dip)->devi_pm_volpmd,
9179 			    DEVI(dip)->devi_pm_volpmd + 1))
9180 			DEVI(dip)->devi_pm_volpmd++;
9181 		}
9182 		PM_UNLOCK_DIP(dip);
9183 		break;
9184 
9185 	case PM_BP_NOINVOL_REMDRV:
9186 		PMD(PMD_NOINVOL, ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9187 		    "noinvol %d->%d\n", pmf, PM_DEVICE(dip),
9188 		    DEVI(dip)->devi_pm_noinvolpm,
9189 		    DEVI(dip)->devi_pm_noinvolpm - 1))
9190 		ASSERT(DEVI(dip)->devi_pm_noinvolpm);
9191 		PM_LOCK_DIP(dip);
9192 		DEVI(dip)->devi_pm_noinvolpm--;
9193 		if (req->bpni_wasvolpmd) {
9194 			PMD(PMD_NOINVOL,
9195 			    ("%s: PM_BP_NOINVOL_REMDRV %s@%s(%s#%d) "
9196 			    "volpmd %d->%d\n", pmf, PM_DEVICE(dip),
9197 			    DEVI(dip)->devi_pm_volpmd,
9198 			    DEVI(dip)->devi_pm_volpmd - 1))
9199 			/*
9200 			 * A power up could come in between and
9201 			 * clear the volpmd, if that's the case,
9202 			 * volpmd would be clear.
9203 			 */
9204 			if (DEVI(dip)->devi_pm_volpmd)
9205 				DEVI(dip)->devi_pm_volpmd--;
9206 		}
9207 		PM_UNLOCK_DIP(dip);
9208 		break;
9209 
9210 	case PM_BP_NOINVOL_CFB:
9211 		PMD(PMD_NOINVOL,
9212 		    ("%s: PM_BP_NOIVOL_CFB %s@%s(%s#%d) noinvol %d->%d\n",
9213 		    pmf, PM_DEVICE(dip), DEVI(dip)->devi_pm_noinvolpm,
9214 		    DEVI(dip)->devi_pm_noinvolpm + 1))
9215 		PM_LOCK_DIP(dip);
9216 		DEVI(dip)->devi_pm_noinvolpm++;
9217 		PM_UNLOCK_DIP(dip);
9218 		break;
9219 
9220 	case PM_BP_NOINVOL_POWER:
9221 		PMD(PMD_NOINVOL,
9222 		    ("%s: PM_BP_NOIVOL_PWR %s@%s(%s#%d) volpmd %d->%d\n",
9223 		    pmf, PM_DEVICE(dip),
9224 		    DEVI(dip)->devi_pm_volpmd, DEVI(dip)->devi_pm_volpmd -
9225 		    req->bpni_volpmd))
9226 		PM_LOCK_DIP(dip);
9227 		DEVI(dip)->devi_pm_volpmd -= req->bpni_volpmd;
9228 		PM_UNLOCK_DIP(dip);
9229 		break;
9230 
9231 	default:
9232 		break;
9233 	}
9234 
9235 }
9236 
9237 #ifdef DEBUG
9238 static int
9239 pm_desc_pwrchk_walk(dev_info_t *dip, void *arg)
9240 {
9241 	PMD_FUNC(pmf, "desc_pwrchk")
9242 	pm_desc_pwrchk_t *pdpchk = (pm_desc_pwrchk_t *)arg;
9243 	pm_info_t *info = PM_GET_PM_INFO(dip);
9244 	int i;
9245 	/* LINTED */
9246 	int curpwr, ce_level;
9247 
9248 	if (!info)
9249 		return (DDI_WALK_CONTINUE);
9250 
9251 	PMD(PMD_SET, ("%s: %s@%s(%s#%d)\n", pmf, PM_DEVICE(dip)))
9252 	for (i = 0; i < PM_NUMCMPTS(dip); i++) {
9253 		/* LINTED */
9254 		if ((curpwr = PM_CURPOWER(dip, i)) == 0)
9255 			continue;
9256 		/* E_FUNC_SET_NOT_USED */
9257 		ce_level = (pdpchk->pdpc_par_involved == 0) ? CE_PANIC :
9258 		    CE_WARN;
9259 		PMD(PMD_SET, ("%s: %s@%s(%s#%d) is powered off while desc "
9260 		    "%s@%s(%s#%d)[%d] is at %d\n", pmf,
9261 		    PM_DEVICE(pdpchk->pdpc_dip), PM_DEVICE(dip), i, curpwr))
9262 		cmn_err(ce_level, "!device %s@%s(%s#%d) is powered on, "
9263 		    "while its ancestor, %s@%s(%s#%d), is powering off!",
9264 		    PM_DEVICE(dip), PM_DEVICE(pdpchk->pdpc_dip));
9265 	}
9266 	return (DDI_WALK_CONTINUE);
9267 }
9268 #endif
9269 
9270 /*
9271  * Record the fact that one thread is borrowing the lock on a device node.
9272  * Use is restricted to the case where the lending thread will block until
9273  * the borrowing thread (always curthread) completes.
9274  */
9275 void
9276 pm_borrow_lock(kthread_t *lender)
9277 {
9278 	lock_loan_t *prev = &lock_loan_head;
9279 	lock_loan_t *cur = (lock_loan_t *)kmem_zalloc(sizeof (*cur), KM_SLEEP);
9280 
9281 	cur->pmlk_borrower = curthread;
9282 	cur->pmlk_lender = lender;
9283 	mutex_enter(&pm_loan_lock);
9284 	cur->pmlk_next = prev->pmlk_next;
9285 	prev->pmlk_next = cur;
9286 	mutex_exit(&pm_loan_lock);
9287 }
9288 
9289 /*
9290  * Return the borrowed lock.  A thread can borrow only one.
9291  */
9292 void
9293 pm_return_lock(void)
9294 {
9295 	lock_loan_t *cur;
9296 	lock_loan_t *prev = &lock_loan_head;
9297 
9298 	mutex_enter(&pm_loan_lock);
9299 	ASSERT(prev->pmlk_next != NULL);
9300 	for (cur = prev->pmlk_next; cur; prev = cur, cur = cur->pmlk_next)
9301 		if (cur->pmlk_borrower == curthread)
9302 			break;
9303 
9304 	ASSERT(cur != NULL);
9305 	prev->pmlk_next = cur->pmlk_next;
9306 	mutex_exit(&pm_loan_lock);
9307 	kmem_free(cur, sizeof (*cur));
9308 }
9309 
9310 #if defined(__x86)
9311 
9312 #define	CPR_RXR	0x1
9313 #define	CPR_TXR	0x20
9314 #define	CPR_DATAREG	0x3f8
9315 #define	CPR_LSTAT	0x3fd
9316 #define	CPR_INTRCTL	0x3f9
9317 
9318 char
9319 pm_getchar(void)
9320 {
9321 	while ((inb(CPR_LSTAT) & CPR_RXR) != CPR_RXR)
9322 		drv_usecwait(10);
9323 
9324 	return (inb(CPR_DATAREG));
9325 
9326 }
9327 
9328 void
9329 pm_putchar(char c)
9330 {
9331 	while ((inb(CPR_LSTAT) & CPR_TXR) == 0)
9332 		drv_usecwait(10);
9333 
9334 	outb(CPR_DATAREG, c);
9335 }
9336 
9337 void
9338 pm_printf(char *s)
9339 {
9340 	while (*s) {
9341 		pm_putchar(*s++);
9342 	}
9343 }
9344 
9345 #endif
9346 
9347 int
9348 pm_ppm_searchlist(pm_searchargs_t *sp)
9349 {
9350 	power_req_t power_req;
9351 	int result = 0;
9352 	/* LINTED */
9353 	int ret;
9354 
9355 	power_req.request_type = PMR_PPM_SEARCH_LIST;
9356 	power_req.req.ppm_search_list_req.searchlist = sp;
9357 	ASSERT(DEVI(ddi_root_node())->devi_pm_ppm);
9358 	ret = pm_ctlops((dev_info_t *)DEVI(ddi_root_node())->devi_pm_ppm,
9359 	    ddi_root_node(), DDI_CTLOPS_POWER, &power_req, &result);
9360 	PMD(PMD_SX, ("pm_ppm_searchlist returns %d, result %d\n",
9361 	    ret, result))
9362 	return (result);
9363 }
9364