xref: /titanic_44/usr/src/uts/sun4u/sunfire/io/sysctrl_quiesce.c (revision 8fc99e42676a23421c75e76660640f9765d693b1)
129949e86Sstevel /*
229949e86Sstevel  * CDDL HEADER START
329949e86Sstevel  *
429949e86Sstevel  * The contents of this file are subject to the terms of the
529949e86Sstevel  * Common Development and Distribution License (the "License").
629949e86Sstevel  * You may not use this file except in compliance with the License.
729949e86Sstevel  *
829949e86Sstevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
929949e86Sstevel  * or http://www.opensolaris.org/os/licensing.
1029949e86Sstevel  * See the License for the specific language governing permissions
1129949e86Sstevel  * and limitations under the License.
1229949e86Sstevel  *
1329949e86Sstevel  * When distributing Covered Code, include this CDDL HEADER in each
1429949e86Sstevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1529949e86Sstevel  * If applicable, add the following below this CDDL HEADER, with the
1629949e86Sstevel  * fields enclosed by brackets "[]" replaced with your own identifying
1729949e86Sstevel  * information: Portions Copyright [yyyy] [name of copyright owner]
1829949e86Sstevel  *
1929949e86Sstevel  * CDDL HEADER END
2029949e86Sstevel  */
2129949e86Sstevel 
2229949e86Sstevel /*
23*8fc99e42STrevor Thompson  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
2429949e86Sstevel  * Use is subject to license terms.
2529949e86Sstevel  */
2629949e86Sstevel 
2729949e86Sstevel /*
2829949e86Sstevel  * This workaround inhibits prom_printf after the cpus are grabbed.
2929949e86Sstevel  * This can be removed when 4154263 is corrected.
3029949e86Sstevel  */
3129949e86Sstevel #define	Bug_4154263
3229949e86Sstevel 
3329949e86Sstevel /*
3429949e86Sstevel  * A CPR derivative specifically for sunfire
3529949e86Sstevel  */
3629949e86Sstevel 
3729949e86Sstevel #include <sys/types.h>
3829949e86Sstevel #include <sys/systm.h>
3929949e86Sstevel #include <sys/machparam.h>
4029949e86Sstevel #include <sys/machsystm.h>
4129949e86Sstevel #include <sys/ddi.h>
4229949e86Sstevel #define	SUNDDI_IMPL
4329949e86Sstevel #include <sys/sunddi.h>
4429949e86Sstevel #include <sys/time.h>
4529949e86Sstevel #include <sys/kmem.h>
4629949e86Sstevel #include <nfs/lm.h>
4729949e86Sstevel #include <sys/ddi_impldefs.h>
4829949e86Sstevel #include <sys/obpdefs.h>
4929949e86Sstevel #include <sys/cmn_err.h>
5029949e86Sstevel #include <sys/debug.h>
5129949e86Sstevel #include <sys/errno.h>
5229949e86Sstevel #include <sys/callb.h>
5329949e86Sstevel #include <sys/clock.h>
5429949e86Sstevel #include <sys/x_call.h>
5529949e86Sstevel #include <sys/cpuvar.h>
5629949e86Sstevel #include <sys/epm.h>
5729949e86Sstevel #include <sys/vfs.h>
5829949e86Sstevel #include <sys/fhc.h>
5929949e86Sstevel #include <sys/sysctrl.h>
6029949e86Sstevel #include <sys/promif.h>
6129949e86Sstevel #include <sys/conf.h>
6229949e86Sstevel #include <sys/modctl.h>
6329949e86Sstevel #include <sys/cyclic.h>
6429949e86Sstevel #include <sys/sunndi.h>
6529949e86Sstevel #include <sys/machsystm.h>
6629949e86Sstevel 
6729949e86Sstevel static enum sysctrl_suspend_state {
6829949e86Sstevel 	SYSC_STATE_BEGIN = 0,
6929949e86Sstevel 	SYSC_STATE_USER,
7029949e86Sstevel 	SYSC_STATE_DAEMON,
7129949e86Sstevel 	SYSC_STATE_DRIVER,
7229949e86Sstevel 	SYSC_STATE_FULL } suspend_state;
7329949e86Sstevel 
7429949e86Sstevel static int	pstate_save;
7529949e86Sstevel static uint_t	sysctrl_gate[NCPU];
7629949e86Sstevel int	sysctrl_quiesce_debug = FALSE;
7729949e86Sstevel static int	sysctrl_skip_kernel_threads = TRUE;
7829949e86Sstevel 
7929949e86Sstevel /*
8029949e86Sstevel  * sysctrl_skip_user_threads is used to control if user threads should
8129949e86Sstevel  * be suspended.  If sysctrl_skip_user_threads is true, the rest of the
8229949e86Sstevel  * flags are not used; if it is false, sysctrl_check_user_stop_result
8329949e86Sstevel  * will be used to control whether or not we need to check suspend
8429949e86Sstevel  * result, and sysctrl_allow_blocked_threads will be used to control
8529949e86Sstevel  * whether or not we allow suspend to continue if there are blocked
8629949e86Sstevel  * threads.  We allow all combinations of sysctrl_check_user_stop_result
8729949e86Sstevel  * and sysctrl_allow_block_threads, even though it might not make much
8829949e86Sstevel  * sense to not allow block threads when we don't even check stop
8929949e86Sstevel  * result.
9029949e86Sstevel  */
9129949e86Sstevel static int	sysctrl_skip_user_threads = 0;		/* default to FALSE */
9229949e86Sstevel static int	sysctrl_check_user_stop_result = 1;	/* default to TRUE */
9329949e86Sstevel static int	sysctrl_allow_blocked_threads = 1;	/* default to TRUE */
9429949e86Sstevel 
9529949e86Sstevel static int	sysc_watchdog_suspended;
9629949e86Sstevel 
9729949e86Sstevel extern int	sysctrl_enable_detach_suspend;
9829949e86Sstevel static int	sysc_lastval;
9929949e86Sstevel 
10029949e86Sstevel #define	DEBUGP(p) { if (sysctrl_quiesce_debug) p; }
10129949e86Sstevel #define	errp	prom_printf
10229949e86Sstevel 
10329949e86Sstevel #define	SYSC_CPU_LOOP_MSEC	1000
10429949e86Sstevel 
10529949e86Sstevel static void
sysctrl_grab_cpus(void)10629949e86Sstevel sysctrl_grab_cpus(void)
10729949e86Sstevel {
10829949e86Sstevel 	int		i;
10929949e86Sstevel 	cpuset_t	others;
11029949e86Sstevel 	extern cpuset_t	cpu_ready_set;
11129949e86Sstevel 	extern void	sysctrl_freeze(void);
11229949e86Sstevel 	uint64_t	sysc_tick_limit;
11329949e86Sstevel 	uint64_t	sysc_current_tick;
11429949e86Sstevel 	uint64_t	sysc_tick_deadline;
11529949e86Sstevel 
11629949e86Sstevel 	extern u_longlong_t	gettick(void);
11729949e86Sstevel 
11829949e86Sstevel 	for (i = 0; i < NCPU; i++)
11929949e86Sstevel 		sysctrl_gate[i] = 0;
12029949e86Sstevel 
12129949e86Sstevel 	/* tell other cpus to go quiet and wait for continue signal */
12229949e86Sstevel 	others = cpu_ready_set;
12329949e86Sstevel 	CPUSET_DEL(others, CPU->cpu_id);
12429949e86Sstevel 	xt_some(others, (xcfunc_t *)sysctrl_freeze, (uint64_t)sysctrl_gate,
12529949e86Sstevel 	    (uint64_t)(&sysctrl_gate[CPU->cpu_id]));
12629949e86Sstevel 
127*8fc99e42STrevor Thompson 	sysc_tick_limit = ((uint64_t)sys_tick_freq * SYSC_CPU_LOOP_MSEC) / 1000;
12829949e86Sstevel 
12929949e86Sstevel 	/* wait for each cpu to check in */
13029949e86Sstevel 	for (i = 0; i < NCPU; i++) {
13129949e86Sstevel 		if (!CPU_IN_SET(others, i))
13229949e86Sstevel 			continue;
13329949e86Sstevel 
13429949e86Sstevel 		/*
13529949e86Sstevel 		 * Get current tick value and calculate the deadline tick
13629949e86Sstevel 		 */
13729949e86Sstevel 		sysc_current_tick = gettick();
13829949e86Sstevel 		sysc_tick_deadline = sysc_current_tick + sysc_tick_limit;
13929949e86Sstevel 
14029949e86Sstevel 		while (sysctrl_gate[i] == 0) {
14129949e86Sstevel 			/* If in panic, we just return */
14229949e86Sstevel 			if (panicstr)
14329949e86Sstevel 				break;
14429949e86Sstevel 
14529949e86Sstevel 			/* Panic the system if cpu not responsed by deadline */
14629949e86Sstevel 			sysc_current_tick = gettick();
14729949e86Sstevel 			if (sysc_current_tick >= sysc_tick_deadline) {
14829949e86Sstevel 				cmn_err(CE_PANIC, "sysctrl: cpu %d not "
14929949e86Sstevel 				    "responding to quiesce command", i);
15029949e86Sstevel 			}
15129949e86Sstevel 		}
15229949e86Sstevel 	}
15329949e86Sstevel 
15429949e86Sstevel 	/* now even our interrupts are disabled -- really quiet now */
15529949e86Sstevel 	pstate_save = disable_vec_intr();
15629949e86Sstevel }
15729949e86Sstevel 
15829949e86Sstevel static void
sysctrl_release_cpus(void)15929949e86Sstevel sysctrl_release_cpus(void)
16029949e86Sstevel {
16129949e86Sstevel 	/* let the other cpus go */
16229949e86Sstevel 	sysctrl_gate[CPU->cpu_id] = 1;
16329949e86Sstevel 
16429949e86Sstevel 	/* restore our interrupts too */
16529949e86Sstevel 	enable_vec_intr(pstate_save);
16629949e86Sstevel }
16729949e86Sstevel 
16829949e86Sstevel static void
sysctrl_stop_intr(void)16929949e86Sstevel sysctrl_stop_intr(void)
17029949e86Sstevel {
17129949e86Sstevel 	mutex_enter(&cpu_lock);
17229949e86Sstevel 	kpreempt_disable();
17329949e86Sstevel 	cyclic_suspend();
17429949e86Sstevel }
17529949e86Sstevel 
17629949e86Sstevel static void
sysctrl_enable_intr(void)17729949e86Sstevel sysctrl_enable_intr(void)
17829949e86Sstevel {
17929949e86Sstevel 	cyclic_resume();
18029949e86Sstevel 	(void) spl0();
18129949e86Sstevel 	kpreempt_enable();
18229949e86Sstevel 	mutex_exit(&cpu_lock);
18329949e86Sstevel }
18429949e86Sstevel 
18529949e86Sstevel static int
sysctrl_is_real_device(dev_info_t * dip)18629949e86Sstevel sysctrl_is_real_device(dev_info_t *dip)
18729949e86Sstevel {
18829949e86Sstevel 	struct regspec *regbuf;
18929949e86Sstevel 	int length;
19029949e86Sstevel 	int rc;
19129949e86Sstevel 
19229949e86Sstevel 	if (ddi_get_driver(dip) == NULL)
19329949e86Sstevel 		return (FALSE);
19429949e86Sstevel 
19529949e86Sstevel 	if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
19629949e86Sstevel 		return (TRUE);
19729949e86Sstevel 	if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
19829949e86Sstevel 		return (FALSE);
19929949e86Sstevel 
20029949e86Sstevel 	/*
20129949e86Sstevel 	 * now the general case
20229949e86Sstevel 	 */
20329949e86Sstevel 	rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
20429949e86Sstevel 	    (caddr_t)&regbuf, &length);
20529949e86Sstevel 	ASSERT(rc != DDI_PROP_NO_MEMORY);
20629949e86Sstevel 	if (rc != DDI_PROP_SUCCESS) {
20729949e86Sstevel 		return (FALSE);
20829949e86Sstevel 	} else {
20929949e86Sstevel 		kmem_free(regbuf, length);
21029949e86Sstevel 		return (TRUE);
21129949e86Sstevel 	}
21229949e86Sstevel }
21329949e86Sstevel 
21429949e86Sstevel static dev_info_t *failed_driver;
21529949e86Sstevel static char device_path[MAXPATHLEN];
21629949e86Sstevel 
21729949e86Sstevel static int
sysctrl_suspend_devices(dev_info_t * dip,sysc_cfga_pkt_t * pkt)21829949e86Sstevel sysctrl_suspend_devices(dev_info_t *dip, sysc_cfga_pkt_t *pkt)
21929949e86Sstevel {
22029949e86Sstevel 	int circ;
22129949e86Sstevel 
22229949e86Sstevel 	ASSERT(dip == NULL || ddi_get_parent(dip) == NULL ||
22329949e86Sstevel 	    DEVI_BUSY_OWNED(ddi_get_parent(dip)));
22429949e86Sstevel 
22529949e86Sstevel 	failed_driver = NULL;
22629949e86Sstevel 	for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
22729949e86Sstevel 		/*
22829949e86Sstevel 		 * Hold parent busy while walking child list
22929949e86Sstevel 		 */
23029949e86Sstevel 		ndi_devi_enter(dip, &circ);
23129949e86Sstevel 		if (sysctrl_suspend_devices(ddi_get_child(dip), pkt)) {
23229949e86Sstevel 			ndi_devi_exit(dip, circ);
23329949e86Sstevel 			return (ENXIO);
23429949e86Sstevel 		}
23529949e86Sstevel 		ndi_devi_exit(dip, circ);
23629949e86Sstevel 
23729949e86Sstevel 		if (!sysctrl_is_real_device(dip))
23829949e86Sstevel 			continue;
23929949e86Sstevel 
24029949e86Sstevel 		/*
24129949e86Sstevel 		 * Safe to call ddi_pathname() as parent is held busy
24229949e86Sstevel 		 */
24329949e86Sstevel 		(void) ddi_pathname(dip, device_path);
24429949e86Sstevel 		DEBUGP(errp(" suspending device %s\n", device_path));
24529949e86Sstevel 		if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
24629949e86Sstevel 			DEBUGP(errp("  unable to suspend device %s\n",
24729949e86Sstevel 			    device_path));
24829949e86Sstevel 
24929949e86Sstevel 			(void) strncpy(pkt->errbuf, device_path,
25029949e86Sstevel 			    SYSC_OUTPUT_LEN);
25129949e86Sstevel 			SYSC_ERR_SET(pkt, SYSC_ERR_SUSPEND);
25229949e86Sstevel 			ndi_hold_devi(dip);
25329949e86Sstevel 			failed_driver = dip;
25429949e86Sstevel 			return (ENXIO);
25529949e86Sstevel 		}
25629949e86Sstevel 	}
25729949e86Sstevel 
25829949e86Sstevel 	return (DDI_SUCCESS);
25929949e86Sstevel }
26029949e86Sstevel 
26129949e86Sstevel static void
sysctrl_resume_devices(dev_info_t * start,sysc_cfga_pkt_t * pkt)26229949e86Sstevel sysctrl_resume_devices(dev_info_t *start, sysc_cfga_pkt_t *pkt)
26329949e86Sstevel {
26429949e86Sstevel 	int		circ;
26529949e86Sstevel 	dev_info_t	*dip, *next, *last = NULL;
26629949e86Sstevel 
26729949e86Sstevel 	ASSERT(start == NULL || ddi_get_parent(start) == NULL ||
26829949e86Sstevel 	    DEVI_BUSY_OWNED(ddi_get_parent(start)));
26929949e86Sstevel 
27029949e86Sstevel 	/* attach in reverse device tree order */
27129949e86Sstevel 	while (last != start) {
27229949e86Sstevel 		dip = start;
27329949e86Sstevel 		next = ddi_get_next_sibling(dip);
27429949e86Sstevel 		while (next != last && dip != failed_driver) {
27529949e86Sstevel 			dip = next;
27629949e86Sstevel 			next = ddi_get_next_sibling(dip);
27729949e86Sstevel 		}
27829949e86Sstevel 		if (dip == failed_driver) {
27929949e86Sstevel 			failed_driver = NULL;
28029949e86Sstevel 			ndi_rele_devi(dip);
28129949e86Sstevel 		} else if (sysctrl_is_real_device(dip) &&
28229949e86Sstevel 		    failed_driver == NULL) {
28329949e86Sstevel 			/*
28429949e86Sstevel 			 * Parent dip is held busy, so ddi_pathname() can
28529949e86Sstevel 			 * be safely called.
28629949e86Sstevel 			 */
28729949e86Sstevel 			(void) ddi_pathname(dip, device_path);
28829949e86Sstevel 			DEBUGP(errp(" resuming device %s\n", device_path));
28929949e86Sstevel 			if (devi_attach(dip, DDI_RESUME) != DDI_SUCCESS) {
29029949e86Sstevel 				/*
29129949e86Sstevel 				 * XXX - if in the future we decide not to
29229949e86Sstevel 				 * panic the system, we need to set the error
29329949e86Sstevel 				 * SYSC_ERR_RESUME here and also change the
29429949e86Sstevel 				 * cfgadm platform library.
29529949e86Sstevel 				 */
29629949e86Sstevel 				cmn_err(CE_PANIC, "Unable to resume device %s",
29729949e86Sstevel 				    device_path);
29829949e86Sstevel 			}
29929949e86Sstevel 		}
30029949e86Sstevel 		ndi_devi_enter(dip, &circ);
30129949e86Sstevel 		sysctrl_resume_devices(ddi_get_child(dip), pkt);
30229949e86Sstevel 		ndi_devi_exit(dip, circ);
30329949e86Sstevel 
30429949e86Sstevel 		last = dip;
30529949e86Sstevel 	}
30629949e86Sstevel }
30729949e86Sstevel 
30829949e86Sstevel /*
30929949e86Sstevel  * True if thread is virtually stopped.  Similar to CPR_VSTOPPED
31029949e86Sstevel  * but from DR point of view.  These user threads are waiting in
31129949e86Sstevel  * the kernel.  Once they complete in the kernel, they will process
31229949e86Sstevel  * the stop signal and stop.
31329949e86Sstevel  */
31429949e86Sstevel #define	SYSCTRL_VSTOPPED(t)		\
31529949e86Sstevel 	((t)->t_state == TS_SLEEP &&	\
31629949e86Sstevel 	(t)->t_wchan != NULL &&		\
31729949e86Sstevel 	(t)->t_astflag &&		\
31829949e86Sstevel 	((t)->t_proc_flag & TP_CHKPT))
31929949e86Sstevel 
32029949e86Sstevel static int
sysctrl_stop_user_threads(sysc_cfga_pkt_t * pkt)32129949e86Sstevel sysctrl_stop_user_threads(sysc_cfga_pkt_t *pkt)
32229949e86Sstevel {
32329949e86Sstevel 	int		count;
32429949e86Sstevel 	char		cache_psargs[PSARGSZ];
32529949e86Sstevel 	kthread_id_t	cache_tp;
32629949e86Sstevel 	uint_t		cache_t_state;
32729949e86Sstevel 	int		bailout;
32829949e86Sstevel 	pid_t		pid;
32929949e86Sstevel 
33029949e86Sstevel 	extern void add_one_utstop();
33129949e86Sstevel 	extern void utstop_timedwait(clock_t);
33229949e86Sstevel 	extern void utstop_init(void);
33329949e86Sstevel 
33429949e86Sstevel #define	SYSCTRL_UTSTOP_RETRY	4
33529949e86Sstevel #define	SYSCTRL_UTSTOP_WAIT	hz
33629949e86Sstevel 
33729949e86Sstevel 	if (sysctrl_skip_user_threads)
33829949e86Sstevel 		return (DDI_SUCCESS);
33929949e86Sstevel 
34029949e86Sstevel 	utstop_init();
34129949e86Sstevel 
34229949e86Sstevel 	/* we need to try a few times to get past fork, etc. */
34329949e86Sstevel 	for (count = 0; count < SYSCTRL_UTSTOP_RETRY; count++) {
34429949e86Sstevel 		kthread_id_t tp;
34529949e86Sstevel 
34629949e86Sstevel 		/* walk the entire threadlist */
34729949e86Sstevel 		mutex_enter(&pidlock);
34829949e86Sstevel 		for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
34929949e86Sstevel 			proc_t *p = ttoproc(tp);
35029949e86Sstevel 
35129949e86Sstevel 			/* handle kernel threads separately */
35229949e86Sstevel 			if (p->p_as == &kas || p->p_stat == SZOMB)
35329949e86Sstevel 				continue;
35429949e86Sstevel 
35529949e86Sstevel 			mutex_enter(&p->p_lock);
35629949e86Sstevel 			thread_lock(tp);
35729949e86Sstevel 
35829949e86Sstevel 			if (tp->t_state == TS_STOPPED) {
35929949e86Sstevel 				/* add another reason to stop this thread */
36029949e86Sstevel 				tp->t_schedflag &= ~TS_RESUME;
36129949e86Sstevel 			} else {
36229949e86Sstevel 				tp->t_proc_flag |= TP_CHKPT;
36329949e86Sstevel 
36429949e86Sstevel 				thread_unlock(tp);
36529949e86Sstevel 				mutex_exit(&p->p_lock);
36629949e86Sstevel 				add_one_utstop();
36729949e86Sstevel 				mutex_enter(&p->p_lock);
36829949e86Sstevel 				thread_lock(tp);
36929949e86Sstevel 
37029949e86Sstevel 				aston(tp);
37129949e86Sstevel 
372c97ad5cdSakolb 				if (ISWAKEABLE(tp) || ISWAITING(tp)) {
37329949e86Sstevel 					setrun_locked(tp);
37429949e86Sstevel 				}
37529949e86Sstevel 
37629949e86Sstevel 			}
37729949e86Sstevel 
37829949e86Sstevel 			/* grab thread if needed */
37929949e86Sstevel 			if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
38029949e86Sstevel 				poke_cpu(tp->t_cpu->cpu_id);
38129949e86Sstevel 
38229949e86Sstevel 
38329949e86Sstevel 			thread_unlock(tp);
38429949e86Sstevel 			mutex_exit(&p->p_lock);
38529949e86Sstevel 		}
38629949e86Sstevel 		mutex_exit(&pidlock);
38729949e86Sstevel 
38829949e86Sstevel 
38929949e86Sstevel 		/* let everything catch up */
39029949e86Sstevel 		utstop_timedwait(count * count * SYSCTRL_UTSTOP_WAIT);
39129949e86Sstevel 
39229949e86Sstevel 
39329949e86Sstevel 		/* now, walk the threadlist again to see if we are done */
39429949e86Sstevel 		mutex_enter(&pidlock);
39529949e86Sstevel 		for (tp = curthread->t_next, bailout = 0;
39629949e86Sstevel 		    bailout == 0 && tp != curthread; tp = tp->t_next) {
39729949e86Sstevel 			proc_t *p = ttoproc(tp);
39829949e86Sstevel 
39929949e86Sstevel 			/* handle kernel threads separately */
40029949e86Sstevel 			if (p->p_as == &kas || p->p_stat == SZOMB)
40129949e86Sstevel 				continue;
40229949e86Sstevel 
40329949e86Sstevel 			/*
40429949e86Sstevel 			 * If this thread didn't stop, and we don't allow
40529949e86Sstevel 			 * unstopped blocked threads, bail.
40629949e86Sstevel 			 */
40729949e86Sstevel 			/* did this thread stop? */
40829949e86Sstevel 			thread_lock(tp);
40929949e86Sstevel 			if (!CPR_ISTOPPED(tp) &&
41029949e86Sstevel 			    !(sysctrl_allow_blocked_threads &&
41129949e86Sstevel 			    SYSCTRL_VSTOPPED(tp))) {
41229949e86Sstevel 
41329949e86Sstevel 				/* nope, cache the details for later */
41429949e86Sstevel 				bcopy(p->p_user.u_psargs, cache_psargs,
41529949e86Sstevel 				    sizeof (cache_psargs));
41629949e86Sstevel 				cache_tp = tp;
41729949e86Sstevel 				cache_t_state = tp->t_state;
41829949e86Sstevel 				bailout = 1;
41929949e86Sstevel 				pid = p->p_pidp->pid_id;
42029949e86Sstevel 			}
42129949e86Sstevel 			thread_unlock(tp);
42229949e86Sstevel 		}
42329949e86Sstevel 		mutex_exit(&pidlock);
42429949e86Sstevel 
42529949e86Sstevel 		/* were all the threads stopped? */
42629949e86Sstevel 		if (!bailout)
42729949e86Sstevel 			break;
42829949e86Sstevel 	}
42929949e86Sstevel 
43029949e86Sstevel 	/* were we unable to stop all threads after a few tries? */
43129949e86Sstevel 	if (bailout) {
43229949e86Sstevel 		(void) sprintf(pkt->errbuf, "process: %s id: %d state: %x"
433*8fc99e42STrevor Thompson 		    " thread descriptor: %p", cache_psargs, (int)pid,
434*8fc99e42STrevor Thompson 		    cache_t_state, (void *)cache_tp);
43529949e86Sstevel 
43629949e86Sstevel 		SYSC_ERR_SET(pkt, SYSC_ERR_UTHREAD);
43729949e86Sstevel 
43829949e86Sstevel 		return (ESRCH);
43929949e86Sstevel 	}
44029949e86Sstevel 
44129949e86Sstevel 	return (DDI_SUCCESS);
44229949e86Sstevel }
44329949e86Sstevel 
44429949e86Sstevel static int
sysctrl_stop_kernel_threads(sysc_cfga_pkt_t * pkt)44529949e86Sstevel sysctrl_stop_kernel_threads(sysc_cfga_pkt_t *pkt)
44629949e86Sstevel {
44729949e86Sstevel 	caddr_t		name;
44829949e86Sstevel 	kthread_id_t	tp;
44929949e86Sstevel 
45029949e86Sstevel 	if (sysctrl_skip_kernel_threads) {
45129949e86Sstevel 		return (DDI_SUCCESS);
45229949e86Sstevel 	}
45329949e86Sstevel 
45429949e86Sstevel 	/*
45529949e86Sstevel 	 * Note: we unlock the table in resume.
45629949e86Sstevel 	 * We only need to lock the callback table if we are actually
45729949e86Sstevel 	 * suspending kernel threads.
45829949e86Sstevel 	 */
45929949e86Sstevel 	callb_lock_table();
46029949e86Sstevel 	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
46129949e86Sstevel 	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
46229949e86Sstevel 
46329949e86Sstevel 		(void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
46429949e86Sstevel 		SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
46529949e86Sstevel 		return (EBUSY);
46629949e86Sstevel 	}
46729949e86Sstevel 
46829949e86Sstevel 	/*
46929949e86Sstevel 	 * Verify that all threads are accounted for
47029949e86Sstevel 	 */
47129949e86Sstevel 	mutex_enter(&pidlock);
47229949e86Sstevel 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
47329949e86Sstevel 		proc_t	*p = ttoproc(tp);
47429949e86Sstevel 
47529949e86Sstevel 		if (p->p_as != &kas)
47629949e86Sstevel 			continue;
47729949e86Sstevel 
47829949e86Sstevel 		if (tp->t_flag & T_INTR_THREAD)
47929949e86Sstevel 			continue;
48029949e86Sstevel 
48129949e86Sstevel 		if (!callb_is_stopped(tp, &name)) {
48229949e86Sstevel 			mutex_exit(&pidlock);
48329949e86Sstevel 			(void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
48429949e86Sstevel 			SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
48529949e86Sstevel 			return (EBUSY);
48629949e86Sstevel 		}
48729949e86Sstevel 	}
48829949e86Sstevel 
48929949e86Sstevel 	mutex_exit(&pidlock);
49029949e86Sstevel 	return (DDI_SUCCESS);
49129949e86Sstevel }
49229949e86Sstevel 
49329949e86Sstevel static void
sysctrl_start_user_threads(void)49429949e86Sstevel sysctrl_start_user_threads(void)
49529949e86Sstevel {
49629949e86Sstevel 	kthread_id_t tp;
49729949e86Sstevel 
49829949e86Sstevel 	mutex_enter(&pidlock);
49929949e86Sstevel 
50029949e86Sstevel 	/* walk all threads and release them */
50129949e86Sstevel 	for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
50229949e86Sstevel 		proc_t *p = ttoproc(tp);
50329949e86Sstevel 
50429949e86Sstevel 		/* skip kernel threads */
50529949e86Sstevel 		if (ttoproc(tp)->p_as == &kas)
50629949e86Sstevel 			continue;
50729949e86Sstevel 
50829949e86Sstevel 		mutex_enter(&p->p_lock);
50929949e86Sstevel 		tp->t_proc_flag &= ~TP_CHKPT;
51029949e86Sstevel 		mutex_exit(&p->p_lock);
51129949e86Sstevel 
51229949e86Sstevel 		thread_lock(tp);
51329949e86Sstevel 		if (CPR_ISTOPPED(tp)) {
51429949e86Sstevel 			/* back on the runq */
51529949e86Sstevel 			tp->t_schedflag |= TS_RESUME;
51629949e86Sstevel 			setrun_locked(tp);
51729949e86Sstevel 		}
51829949e86Sstevel 		thread_unlock(tp);
51929949e86Sstevel 	}
52029949e86Sstevel 
52129949e86Sstevel 	mutex_exit(&pidlock);
52229949e86Sstevel }
52329949e86Sstevel 
52429949e86Sstevel static void
sysctrl_signal_user(int sig)52529949e86Sstevel sysctrl_signal_user(int sig)
52629949e86Sstevel {
52729949e86Sstevel 	struct proc *p;
52829949e86Sstevel 
52929949e86Sstevel 	mutex_enter(&pidlock);
53029949e86Sstevel 
53129949e86Sstevel 	for (p = practive; p != NULL; p = p->p_next) {
53229949e86Sstevel 		/* only user threads */
53329949e86Sstevel 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
53429949e86Sstevel 		    p == proc_init || p == ttoproc(curthread))
53529949e86Sstevel 			continue;
53629949e86Sstevel 
53729949e86Sstevel 		mutex_enter(&p->p_lock);
53829949e86Sstevel 		sigtoproc(p, NULL, sig);
53929949e86Sstevel 		mutex_exit(&p->p_lock);
54029949e86Sstevel 	}
54129949e86Sstevel 
54229949e86Sstevel 	mutex_exit(&pidlock);
54329949e86Sstevel 
54429949e86Sstevel 	/* add a bit of delay */
54529949e86Sstevel 	delay(hz);
54629949e86Sstevel }
54729949e86Sstevel 
54829949e86Sstevel void
sysctrl_resume(sysc_cfga_pkt_t * pkt)54929949e86Sstevel sysctrl_resume(sysc_cfga_pkt_t *pkt)
55029949e86Sstevel {
55129949e86Sstevel #ifndef Bug_4154263
55229949e86Sstevel 	DEBUGP(errp("resume system...\n"));
55329949e86Sstevel #endif
55429949e86Sstevel 	switch (suspend_state) {
55529949e86Sstevel 	case SYSC_STATE_FULL:
55629949e86Sstevel 		/*
55729949e86Sstevel 		 * release all the other cpus
55829949e86Sstevel 		 */
55929949e86Sstevel #ifndef	Bug_4154263
56029949e86Sstevel 		DEBUGP(errp("release cpus..."));
56129949e86Sstevel #endif
562646e55b6Scth 		/*
563646e55b6Scth 		 * Prevent false alarm in tod_validate() due to tod
564646e55b6Scth 		 * value change between suspend and resume
565646e55b6Scth 		 */
566646e55b6Scth 		mutex_enter(&tod_lock);
567*8fc99e42STrevor Thompson 		tod_status_set(TOD_DR_RESUME_DONE);
568646e55b6Scth 		mutex_exit(&tod_lock);
569646e55b6Scth 
57029949e86Sstevel 		sysctrl_release_cpus();
57129949e86Sstevel 		DEBUGP(errp("cpus resumed...\n"));
57229949e86Sstevel 
57329949e86Sstevel 		/*
57429949e86Sstevel 		 * If we suspended hw watchdog at suspend,
57529949e86Sstevel 		 * re-enable it now.
57629949e86Sstevel 		 */
57729949e86Sstevel 		if (sysc_watchdog_suspended) {
57829949e86Sstevel 			mutex_enter(&tod_lock);
57929949e86Sstevel 			tod_ops.tod_set_watchdog_timer(
58029949e86Sstevel 			    watchdog_timeout_seconds);
58129949e86Sstevel 			mutex_exit(&tod_lock);
58229949e86Sstevel 		}
58329949e86Sstevel 
58429949e86Sstevel 		/*
58529949e86Sstevel 		 * resume callout
58629949e86Sstevel 		 */
58729949e86Sstevel 		(void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
58829949e86Sstevel 		(void) callb_execute_class(CB_CL_CPR_CALLOUT,
58929949e86Sstevel 		    CB_CODE_CPR_RESUME);
59029949e86Sstevel 		sysctrl_enable_intr();
59129949e86Sstevel 		/* FALLTHROUGH */
59229949e86Sstevel 
59329949e86Sstevel 	case SYSC_STATE_DRIVER:
59429949e86Sstevel 		/*
59529949e86Sstevel 		 * resume drivers
59629949e86Sstevel 		 */
59729949e86Sstevel 		DEBUGP(errp("resume drivers..."));
59829949e86Sstevel 		sysctrl_resume_devices(ddi_root_node(), pkt);
59929949e86Sstevel 		DEBUGP(errp("done\n"));
60029949e86Sstevel 
60129949e86Sstevel 		/*
60229949e86Sstevel 		 * resume the lock manager
60329949e86Sstevel 		 */
60429949e86Sstevel 		lm_cprresume();
60529949e86Sstevel 
60629949e86Sstevel 		/* FALLTHROUGH */
60729949e86Sstevel 
60829949e86Sstevel 	case SYSC_STATE_DAEMON:
60929949e86Sstevel 		/*
61029949e86Sstevel 		 * resume kernel daemons
61129949e86Sstevel 		 */
61229949e86Sstevel 		if (!sysctrl_skip_kernel_threads) {
61329949e86Sstevel 			DEBUGP(errp("starting kernel daemons..."));
61429949e86Sstevel 			(void) callb_execute_class(CB_CL_CPR_DAEMON,
61529949e86Sstevel 			    CB_CODE_CPR_RESUME);
61629949e86Sstevel 			callb_unlock_table();
61729949e86Sstevel 		}
61829949e86Sstevel 		DEBUGP(errp("done\n"));
61929949e86Sstevel 
62029949e86Sstevel 		/* FALLTHROUGH */
62129949e86Sstevel 
62229949e86Sstevel 	case SYSC_STATE_USER:
62329949e86Sstevel 		/*
62429949e86Sstevel 		 * finally, resume user threads
62529949e86Sstevel 		 */
62629949e86Sstevel 		if (!sysctrl_skip_user_threads) {
62729949e86Sstevel 			DEBUGP(errp("starting user threads..."));
62829949e86Sstevel 			sysctrl_start_user_threads();
62929949e86Sstevel 			DEBUGP(errp("done\n"));
63029949e86Sstevel 		}
63129949e86Sstevel 		/* FALLTHROUGH */
63229949e86Sstevel 
63329949e86Sstevel 	case SYSC_STATE_BEGIN:
63429949e86Sstevel 	default:
63529949e86Sstevel 		/*
63629949e86Sstevel 		 * let those who care know that we've just resumed
63729949e86Sstevel 		 */
63829949e86Sstevel 		DEBUGP(errp("sending SIGTHAW..."));
63929949e86Sstevel 		sysctrl_signal_user(SIGTHAW);
64029949e86Sstevel 		DEBUGP(errp("done\n"));
64129949e86Sstevel 		break;
64229949e86Sstevel 	}
64329949e86Sstevel 
64429949e86Sstevel 	/* Restore sysctrl detach/suspend to its original value */
64529949e86Sstevel 	sysctrl_enable_detach_suspend = sysc_lastval;
64629949e86Sstevel 
64729949e86Sstevel 	DEBUGP(errp("system state restored\n"));
64829949e86Sstevel }
64929949e86Sstevel 
65029949e86Sstevel void
sysctrl_suspend_prepare(void)65129949e86Sstevel sysctrl_suspend_prepare(void)
65229949e86Sstevel {
65329949e86Sstevel 	/*
65429949e86Sstevel 	 * We use a function, lm_cprsuspend(), in the suspend flow that
65529949e86Sstevel 	 * is redirected to a module through the modstubs mechanism.
65629949e86Sstevel 	 * If the module is currently not loaded, modstubs attempts
65729949e86Sstevel 	 * the modload. The context this happens in below causes the
65829949e86Sstevel 	 * module load to block forever, so this function must be called
65929949e86Sstevel 	 * in the normal system call context ahead of time.
66029949e86Sstevel 	 */
66129949e86Sstevel 	(void) modload("misc", "klmmod");
66229949e86Sstevel }
66329949e86Sstevel 
66429949e86Sstevel int
sysctrl_suspend(sysc_cfga_pkt_t * pkt)66529949e86Sstevel sysctrl_suspend(sysc_cfga_pkt_t *pkt)
66629949e86Sstevel {
66729949e86Sstevel 	int rc = DDI_SUCCESS;
66829949e86Sstevel 
66929949e86Sstevel 	/* enable sysctrl detach/suspend function */
67029949e86Sstevel 	sysc_lastval = sysctrl_enable_detach_suspend;
67129949e86Sstevel 	sysctrl_enable_detach_suspend = 1;
67229949e86Sstevel 
67329949e86Sstevel 	/*
67429949e86Sstevel 	 * first, stop all user threads
67529949e86Sstevel 	 */
67629949e86Sstevel 	DEBUGP(errp("\nstopping user threads..."));
67729949e86Sstevel 	suspend_state = SYSC_STATE_USER;
67829949e86Sstevel 	if (((rc = sysctrl_stop_user_threads(pkt)) != DDI_SUCCESS) &&
67929949e86Sstevel 	    sysctrl_check_user_stop_result) {
68029949e86Sstevel 		sysctrl_resume(pkt);
68129949e86Sstevel 		return (rc);
68229949e86Sstevel 	}
68329949e86Sstevel 	DEBUGP(errp("done\n"));
68429949e86Sstevel 
68529949e86Sstevel 	/*
68629949e86Sstevel 	 * now stop daemon activities
68729949e86Sstevel 	 */
68829949e86Sstevel 	DEBUGP(errp("stopping kernel daemons..."));
68929949e86Sstevel 	suspend_state = SYSC_STATE_DAEMON;
69029949e86Sstevel 	if (rc = sysctrl_stop_kernel_threads(pkt)) {
69129949e86Sstevel 		sysctrl_resume(pkt);
69229949e86Sstevel 		return (rc);
69329949e86Sstevel 	}
69429949e86Sstevel 	DEBUGP(errp("done\n"));
69529949e86Sstevel 
69629949e86Sstevel 	/*
69729949e86Sstevel 	 * This sync swap out all user pages
69829949e86Sstevel 	 */
69929949e86Sstevel 	vfs_sync(SYNC_ALL);
70029949e86Sstevel 
70129949e86Sstevel 	/*
70229949e86Sstevel 	 * special treatment for lock manager
70329949e86Sstevel 	 */
70429949e86Sstevel 	lm_cprsuspend();
70529949e86Sstevel 
70629949e86Sstevel 	/*
70729949e86Sstevel 	 * sync the file system in case we never make it back
70829949e86Sstevel 	 */
70929949e86Sstevel 	sync();
71029949e86Sstevel 
71129949e86Sstevel 	/*
71229949e86Sstevel 	 * now suspend drivers
71329949e86Sstevel 	 */
71429949e86Sstevel 	DEBUGP(errp("suspending drivers..."));
71529949e86Sstevel 	suspend_state = SYSC_STATE_DRIVER;
71629949e86Sstevel 	if (rc = sysctrl_suspend_devices(ddi_root_node(), pkt)) {
71729949e86Sstevel 		sysctrl_resume(pkt);
71829949e86Sstevel 		return (rc);
71929949e86Sstevel 	}
72029949e86Sstevel 	DEBUGP(errp("done\n"));
72129949e86Sstevel 
72229949e86Sstevel 	/*
72329949e86Sstevel 	 * handle the callout table
72429949e86Sstevel 	 */
72529949e86Sstevel 	sysctrl_stop_intr();
72629949e86Sstevel 
72729949e86Sstevel 	(void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
72829949e86Sstevel 
72929949e86Sstevel 	/*
73029949e86Sstevel 	 * if watchdog was activated, disable it
73129949e86Sstevel 	 */
73229949e86Sstevel 	if (watchdog_activated) {
73329949e86Sstevel 		mutex_enter(&tod_lock);
73429949e86Sstevel 		tod_ops.tod_clear_watchdog_timer();
73529949e86Sstevel 		mutex_exit(&tod_lock);
73629949e86Sstevel 		sysc_watchdog_suspended = 1;
73729949e86Sstevel 	} else {
73829949e86Sstevel 		sysc_watchdog_suspended = 0;
73929949e86Sstevel 	}
74029949e86Sstevel 
74129949e86Sstevel 	/*
74229949e86Sstevel 	 * finally, grab all cpus
74329949e86Sstevel 	 */
74429949e86Sstevel 	DEBUGP(errp("freezing all cpus...\n"));
74529949e86Sstevel 	suspend_state = SYSC_STATE_FULL;
74629949e86Sstevel 	sysctrl_grab_cpus();
74729949e86Sstevel #ifndef	Bug_4154263
74829949e86Sstevel 	DEBUGP(errp("done\n"));
74929949e86Sstevel 
75029949e86Sstevel 	DEBUGP(errp("system is quiesced\n"));
75129949e86Sstevel #endif
75229949e86Sstevel 
75329949e86Sstevel 	return (rc);
75429949e86Sstevel }
755