129949e86Sstevel /*
229949e86Sstevel * CDDL HEADER START
329949e86Sstevel *
429949e86Sstevel * The contents of this file are subject to the terms of the
529949e86Sstevel * Common Development and Distribution License (the "License").
629949e86Sstevel * You may not use this file except in compliance with the License.
729949e86Sstevel *
829949e86Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
929949e86Sstevel * or http://www.opensolaris.org/os/licensing.
1029949e86Sstevel * See the License for the specific language governing permissions
1129949e86Sstevel * and limitations under the License.
1229949e86Sstevel *
1329949e86Sstevel * When distributing Covered Code, include this CDDL HEADER in each
1429949e86Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1529949e86Sstevel * If applicable, add the following below this CDDL HEADER, with the
1629949e86Sstevel * fields enclosed by brackets "[]" replaced with your own identifying
1729949e86Sstevel * information: Portions Copyright [yyyy] [name of copyright owner]
1829949e86Sstevel *
1929949e86Sstevel * CDDL HEADER END
2029949e86Sstevel */
2129949e86Sstevel
2229949e86Sstevel /*
23*8fc99e42STrevor Thompson * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
2429949e86Sstevel * Use is subject to license terms.
2529949e86Sstevel */
2629949e86Sstevel
2729949e86Sstevel /*
2829949e86Sstevel * This workaround inhibits prom_printf after the cpus are grabbed.
2929949e86Sstevel * This can be removed when 4154263 is corrected.
3029949e86Sstevel */
3129949e86Sstevel #define Bug_4154263
3229949e86Sstevel
3329949e86Sstevel /*
3429949e86Sstevel * A CPR derivative specifically for sunfire
3529949e86Sstevel */
3629949e86Sstevel
3729949e86Sstevel #include <sys/types.h>
3829949e86Sstevel #include <sys/systm.h>
3929949e86Sstevel #include <sys/machparam.h>
4029949e86Sstevel #include <sys/machsystm.h>
4129949e86Sstevel #include <sys/ddi.h>
4229949e86Sstevel #define SUNDDI_IMPL
4329949e86Sstevel #include <sys/sunddi.h>
4429949e86Sstevel #include <sys/time.h>
4529949e86Sstevel #include <sys/kmem.h>
4629949e86Sstevel #include <nfs/lm.h>
4729949e86Sstevel #include <sys/ddi_impldefs.h>
4829949e86Sstevel #include <sys/obpdefs.h>
4929949e86Sstevel #include <sys/cmn_err.h>
5029949e86Sstevel #include <sys/debug.h>
5129949e86Sstevel #include <sys/errno.h>
5229949e86Sstevel #include <sys/callb.h>
5329949e86Sstevel #include <sys/clock.h>
5429949e86Sstevel #include <sys/x_call.h>
5529949e86Sstevel #include <sys/cpuvar.h>
5629949e86Sstevel #include <sys/epm.h>
5729949e86Sstevel #include <sys/vfs.h>
5829949e86Sstevel #include <sys/fhc.h>
5929949e86Sstevel #include <sys/sysctrl.h>
6029949e86Sstevel #include <sys/promif.h>
6129949e86Sstevel #include <sys/conf.h>
6229949e86Sstevel #include <sys/modctl.h>
6329949e86Sstevel #include <sys/cyclic.h>
6429949e86Sstevel #include <sys/sunndi.h>
6529949e86Sstevel #include <sys/machsystm.h>
6629949e86Sstevel
6729949e86Sstevel static enum sysctrl_suspend_state {
6829949e86Sstevel SYSC_STATE_BEGIN = 0,
6929949e86Sstevel SYSC_STATE_USER,
7029949e86Sstevel SYSC_STATE_DAEMON,
7129949e86Sstevel SYSC_STATE_DRIVER,
7229949e86Sstevel SYSC_STATE_FULL } suspend_state;
7329949e86Sstevel
7429949e86Sstevel static int pstate_save;
7529949e86Sstevel static uint_t sysctrl_gate[NCPU];
7629949e86Sstevel int sysctrl_quiesce_debug = FALSE;
7729949e86Sstevel static int sysctrl_skip_kernel_threads = TRUE;
7829949e86Sstevel
7929949e86Sstevel /*
8029949e86Sstevel * sysctrl_skip_user_threads is used to control if user threads should
8129949e86Sstevel * be suspended. If sysctrl_skip_user_threads is true, the rest of the
8229949e86Sstevel * flags are not used; if it is false, sysctrl_check_user_stop_result
8329949e86Sstevel * will be used to control whether or not we need to check suspend
8429949e86Sstevel * result, and sysctrl_allow_blocked_threads will be used to control
8529949e86Sstevel * whether or not we allow suspend to continue if there are blocked
8629949e86Sstevel * threads. We allow all combinations of sysctrl_check_user_stop_result
8729949e86Sstevel * and sysctrl_allow_block_threads, even though it might not make much
8829949e86Sstevel * sense to not allow block threads when we don't even check stop
8929949e86Sstevel * result.
9029949e86Sstevel */
9129949e86Sstevel static int sysctrl_skip_user_threads = 0; /* default to FALSE */
9229949e86Sstevel static int sysctrl_check_user_stop_result = 1; /* default to TRUE */
9329949e86Sstevel static int sysctrl_allow_blocked_threads = 1; /* default to TRUE */
9429949e86Sstevel
9529949e86Sstevel static int sysc_watchdog_suspended;
9629949e86Sstevel
9729949e86Sstevel extern int sysctrl_enable_detach_suspend;
9829949e86Sstevel static int sysc_lastval;
9929949e86Sstevel
10029949e86Sstevel #define DEBUGP(p) { if (sysctrl_quiesce_debug) p; }
10129949e86Sstevel #define errp prom_printf
10229949e86Sstevel
10329949e86Sstevel #define SYSC_CPU_LOOP_MSEC 1000
10429949e86Sstevel
10529949e86Sstevel static void
sysctrl_grab_cpus(void)10629949e86Sstevel sysctrl_grab_cpus(void)
10729949e86Sstevel {
10829949e86Sstevel int i;
10929949e86Sstevel cpuset_t others;
11029949e86Sstevel extern cpuset_t cpu_ready_set;
11129949e86Sstevel extern void sysctrl_freeze(void);
11229949e86Sstevel uint64_t sysc_tick_limit;
11329949e86Sstevel uint64_t sysc_current_tick;
11429949e86Sstevel uint64_t sysc_tick_deadline;
11529949e86Sstevel
11629949e86Sstevel extern u_longlong_t gettick(void);
11729949e86Sstevel
11829949e86Sstevel for (i = 0; i < NCPU; i++)
11929949e86Sstevel sysctrl_gate[i] = 0;
12029949e86Sstevel
12129949e86Sstevel /* tell other cpus to go quiet and wait for continue signal */
12229949e86Sstevel others = cpu_ready_set;
12329949e86Sstevel CPUSET_DEL(others, CPU->cpu_id);
12429949e86Sstevel xt_some(others, (xcfunc_t *)sysctrl_freeze, (uint64_t)sysctrl_gate,
12529949e86Sstevel (uint64_t)(&sysctrl_gate[CPU->cpu_id]));
12629949e86Sstevel
127*8fc99e42STrevor Thompson sysc_tick_limit = ((uint64_t)sys_tick_freq * SYSC_CPU_LOOP_MSEC) / 1000;
12829949e86Sstevel
12929949e86Sstevel /* wait for each cpu to check in */
13029949e86Sstevel for (i = 0; i < NCPU; i++) {
13129949e86Sstevel if (!CPU_IN_SET(others, i))
13229949e86Sstevel continue;
13329949e86Sstevel
13429949e86Sstevel /*
13529949e86Sstevel * Get current tick value and calculate the deadline tick
13629949e86Sstevel */
13729949e86Sstevel sysc_current_tick = gettick();
13829949e86Sstevel sysc_tick_deadline = sysc_current_tick + sysc_tick_limit;
13929949e86Sstevel
14029949e86Sstevel while (sysctrl_gate[i] == 0) {
14129949e86Sstevel /* If in panic, we just return */
14229949e86Sstevel if (panicstr)
14329949e86Sstevel break;
14429949e86Sstevel
14529949e86Sstevel /* Panic the system if cpu not responsed by deadline */
14629949e86Sstevel sysc_current_tick = gettick();
14729949e86Sstevel if (sysc_current_tick >= sysc_tick_deadline) {
14829949e86Sstevel cmn_err(CE_PANIC, "sysctrl: cpu %d not "
14929949e86Sstevel "responding to quiesce command", i);
15029949e86Sstevel }
15129949e86Sstevel }
15229949e86Sstevel }
15329949e86Sstevel
15429949e86Sstevel /* now even our interrupts are disabled -- really quiet now */
15529949e86Sstevel pstate_save = disable_vec_intr();
15629949e86Sstevel }
15729949e86Sstevel
15829949e86Sstevel static void
sysctrl_release_cpus(void)15929949e86Sstevel sysctrl_release_cpus(void)
16029949e86Sstevel {
16129949e86Sstevel /* let the other cpus go */
16229949e86Sstevel sysctrl_gate[CPU->cpu_id] = 1;
16329949e86Sstevel
16429949e86Sstevel /* restore our interrupts too */
16529949e86Sstevel enable_vec_intr(pstate_save);
16629949e86Sstevel }
16729949e86Sstevel
16829949e86Sstevel static void
sysctrl_stop_intr(void)16929949e86Sstevel sysctrl_stop_intr(void)
17029949e86Sstevel {
17129949e86Sstevel mutex_enter(&cpu_lock);
17229949e86Sstevel kpreempt_disable();
17329949e86Sstevel cyclic_suspend();
17429949e86Sstevel }
17529949e86Sstevel
17629949e86Sstevel static void
sysctrl_enable_intr(void)17729949e86Sstevel sysctrl_enable_intr(void)
17829949e86Sstevel {
17929949e86Sstevel cyclic_resume();
18029949e86Sstevel (void) spl0();
18129949e86Sstevel kpreempt_enable();
18229949e86Sstevel mutex_exit(&cpu_lock);
18329949e86Sstevel }
18429949e86Sstevel
18529949e86Sstevel static int
sysctrl_is_real_device(dev_info_t * dip)18629949e86Sstevel sysctrl_is_real_device(dev_info_t *dip)
18729949e86Sstevel {
18829949e86Sstevel struct regspec *regbuf;
18929949e86Sstevel int length;
19029949e86Sstevel int rc;
19129949e86Sstevel
19229949e86Sstevel if (ddi_get_driver(dip) == NULL)
19329949e86Sstevel return (FALSE);
19429949e86Sstevel
19529949e86Sstevel if (DEVI(dip)->devi_pm_flags & (PMC_NEEDS_SR|PMC_PARENTAL_SR))
19629949e86Sstevel return (TRUE);
19729949e86Sstevel if (DEVI(dip)->devi_pm_flags & PMC_NO_SR)
19829949e86Sstevel return (FALSE);
19929949e86Sstevel
20029949e86Sstevel /*
20129949e86Sstevel * now the general case
20229949e86Sstevel */
20329949e86Sstevel rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg",
20429949e86Sstevel (caddr_t)®buf, &length);
20529949e86Sstevel ASSERT(rc != DDI_PROP_NO_MEMORY);
20629949e86Sstevel if (rc != DDI_PROP_SUCCESS) {
20729949e86Sstevel return (FALSE);
20829949e86Sstevel } else {
20929949e86Sstevel kmem_free(regbuf, length);
21029949e86Sstevel return (TRUE);
21129949e86Sstevel }
21229949e86Sstevel }
21329949e86Sstevel
21429949e86Sstevel static dev_info_t *failed_driver;
21529949e86Sstevel static char device_path[MAXPATHLEN];
21629949e86Sstevel
21729949e86Sstevel static int
sysctrl_suspend_devices(dev_info_t * dip,sysc_cfga_pkt_t * pkt)21829949e86Sstevel sysctrl_suspend_devices(dev_info_t *dip, sysc_cfga_pkt_t *pkt)
21929949e86Sstevel {
22029949e86Sstevel int circ;
22129949e86Sstevel
22229949e86Sstevel ASSERT(dip == NULL || ddi_get_parent(dip) == NULL ||
22329949e86Sstevel DEVI_BUSY_OWNED(ddi_get_parent(dip)));
22429949e86Sstevel
22529949e86Sstevel failed_driver = NULL;
22629949e86Sstevel for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
22729949e86Sstevel /*
22829949e86Sstevel * Hold parent busy while walking child list
22929949e86Sstevel */
23029949e86Sstevel ndi_devi_enter(dip, &circ);
23129949e86Sstevel if (sysctrl_suspend_devices(ddi_get_child(dip), pkt)) {
23229949e86Sstevel ndi_devi_exit(dip, circ);
23329949e86Sstevel return (ENXIO);
23429949e86Sstevel }
23529949e86Sstevel ndi_devi_exit(dip, circ);
23629949e86Sstevel
23729949e86Sstevel if (!sysctrl_is_real_device(dip))
23829949e86Sstevel continue;
23929949e86Sstevel
24029949e86Sstevel /*
24129949e86Sstevel * Safe to call ddi_pathname() as parent is held busy
24229949e86Sstevel */
24329949e86Sstevel (void) ddi_pathname(dip, device_path);
24429949e86Sstevel DEBUGP(errp(" suspending device %s\n", device_path));
24529949e86Sstevel if (devi_detach(dip, DDI_SUSPEND) != DDI_SUCCESS) {
24629949e86Sstevel DEBUGP(errp(" unable to suspend device %s\n",
24729949e86Sstevel device_path));
24829949e86Sstevel
24929949e86Sstevel (void) strncpy(pkt->errbuf, device_path,
25029949e86Sstevel SYSC_OUTPUT_LEN);
25129949e86Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_SUSPEND);
25229949e86Sstevel ndi_hold_devi(dip);
25329949e86Sstevel failed_driver = dip;
25429949e86Sstevel return (ENXIO);
25529949e86Sstevel }
25629949e86Sstevel }
25729949e86Sstevel
25829949e86Sstevel return (DDI_SUCCESS);
25929949e86Sstevel }
26029949e86Sstevel
26129949e86Sstevel static void
sysctrl_resume_devices(dev_info_t * start,sysc_cfga_pkt_t * pkt)26229949e86Sstevel sysctrl_resume_devices(dev_info_t *start, sysc_cfga_pkt_t *pkt)
26329949e86Sstevel {
26429949e86Sstevel int circ;
26529949e86Sstevel dev_info_t *dip, *next, *last = NULL;
26629949e86Sstevel
26729949e86Sstevel ASSERT(start == NULL || ddi_get_parent(start) == NULL ||
26829949e86Sstevel DEVI_BUSY_OWNED(ddi_get_parent(start)));
26929949e86Sstevel
27029949e86Sstevel /* attach in reverse device tree order */
27129949e86Sstevel while (last != start) {
27229949e86Sstevel dip = start;
27329949e86Sstevel next = ddi_get_next_sibling(dip);
27429949e86Sstevel while (next != last && dip != failed_driver) {
27529949e86Sstevel dip = next;
27629949e86Sstevel next = ddi_get_next_sibling(dip);
27729949e86Sstevel }
27829949e86Sstevel if (dip == failed_driver) {
27929949e86Sstevel failed_driver = NULL;
28029949e86Sstevel ndi_rele_devi(dip);
28129949e86Sstevel } else if (sysctrl_is_real_device(dip) &&
28229949e86Sstevel failed_driver == NULL) {
28329949e86Sstevel /*
28429949e86Sstevel * Parent dip is held busy, so ddi_pathname() can
28529949e86Sstevel * be safely called.
28629949e86Sstevel */
28729949e86Sstevel (void) ddi_pathname(dip, device_path);
28829949e86Sstevel DEBUGP(errp(" resuming device %s\n", device_path));
28929949e86Sstevel if (devi_attach(dip, DDI_RESUME) != DDI_SUCCESS) {
29029949e86Sstevel /*
29129949e86Sstevel * XXX - if in the future we decide not to
29229949e86Sstevel * panic the system, we need to set the error
29329949e86Sstevel * SYSC_ERR_RESUME here and also change the
29429949e86Sstevel * cfgadm platform library.
29529949e86Sstevel */
29629949e86Sstevel cmn_err(CE_PANIC, "Unable to resume device %s",
29729949e86Sstevel device_path);
29829949e86Sstevel }
29929949e86Sstevel }
30029949e86Sstevel ndi_devi_enter(dip, &circ);
30129949e86Sstevel sysctrl_resume_devices(ddi_get_child(dip), pkt);
30229949e86Sstevel ndi_devi_exit(dip, circ);
30329949e86Sstevel
30429949e86Sstevel last = dip;
30529949e86Sstevel }
30629949e86Sstevel }
30729949e86Sstevel
30829949e86Sstevel /*
30929949e86Sstevel * True if thread is virtually stopped. Similar to CPR_VSTOPPED
31029949e86Sstevel * but from DR point of view. These user threads are waiting in
31129949e86Sstevel * the kernel. Once they complete in the kernel, they will process
31229949e86Sstevel * the stop signal and stop.
31329949e86Sstevel */
31429949e86Sstevel #define SYSCTRL_VSTOPPED(t) \
31529949e86Sstevel ((t)->t_state == TS_SLEEP && \
31629949e86Sstevel (t)->t_wchan != NULL && \
31729949e86Sstevel (t)->t_astflag && \
31829949e86Sstevel ((t)->t_proc_flag & TP_CHKPT))
31929949e86Sstevel
32029949e86Sstevel static int
sysctrl_stop_user_threads(sysc_cfga_pkt_t * pkt)32129949e86Sstevel sysctrl_stop_user_threads(sysc_cfga_pkt_t *pkt)
32229949e86Sstevel {
32329949e86Sstevel int count;
32429949e86Sstevel char cache_psargs[PSARGSZ];
32529949e86Sstevel kthread_id_t cache_tp;
32629949e86Sstevel uint_t cache_t_state;
32729949e86Sstevel int bailout;
32829949e86Sstevel pid_t pid;
32929949e86Sstevel
33029949e86Sstevel extern void add_one_utstop();
33129949e86Sstevel extern void utstop_timedwait(clock_t);
33229949e86Sstevel extern void utstop_init(void);
33329949e86Sstevel
33429949e86Sstevel #define SYSCTRL_UTSTOP_RETRY 4
33529949e86Sstevel #define SYSCTRL_UTSTOP_WAIT hz
33629949e86Sstevel
33729949e86Sstevel if (sysctrl_skip_user_threads)
33829949e86Sstevel return (DDI_SUCCESS);
33929949e86Sstevel
34029949e86Sstevel utstop_init();
34129949e86Sstevel
34229949e86Sstevel /* we need to try a few times to get past fork, etc. */
34329949e86Sstevel for (count = 0; count < SYSCTRL_UTSTOP_RETRY; count++) {
34429949e86Sstevel kthread_id_t tp;
34529949e86Sstevel
34629949e86Sstevel /* walk the entire threadlist */
34729949e86Sstevel mutex_enter(&pidlock);
34829949e86Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
34929949e86Sstevel proc_t *p = ttoproc(tp);
35029949e86Sstevel
35129949e86Sstevel /* handle kernel threads separately */
35229949e86Sstevel if (p->p_as == &kas || p->p_stat == SZOMB)
35329949e86Sstevel continue;
35429949e86Sstevel
35529949e86Sstevel mutex_enter(&p->p_lock);
35629949e86Sstevel thread_lock(tp);
35729949e86Sstevel
35829949e86Sstevel if (tp->t_state == TS_STOPPED) {
35929949e86Sstevel /* add another reason to stop this thread */
36029949e86Sstevel tp->t_schedflag &= ~TS_RESUME;
36129949e86Sstevel } else {
36229949e86Sstevel tp->t_proc_flag |= TP_CHKPT;
36329949e86Sstevel
36429949e86Sstevel thread_unlock(tp);
36529949e86Sstevel mutex_exit(&p->p_lock);
36629949e86Sstevel add_one_utstop();
36729949e86Sstevel mutex_enter(&p->p_lock);
36829949e86Sstevel thread_lock(tp);
36929949e86Sstevel
37029949e86Sstevel aston(tp);
37129949e86Sstevel
372c97ad5cdSakolb if (ISWAKEABLE(tp) || ISWAITING(tp)) {
37329949e86Sstevel setrun_locked(tp);
37429949e86Sstevel }
37529949e86Sstevel
37629949e86Sstevel }
37729949e86Sstevel
37829949e86Sstevel /* grab thread if needed */
37929949e86Sstevel if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
38029949e86Sstevel poke_cpu(tp->t_cpu->cpu_id);
38129949e86Sstevel
38229949e86Sstevel
38329949e86Sstevel thread_unlock(tp);
38429949e86Sstevel mutex_exit(&p->p_lock);
38529949e86Sstevel }
38629949e86Sstevel mutex_exit(&pidlock);
38729949e86Sstevel
38829949e86Sstevel
38929949e86Sstevel /* let everything catch up */
39029949e86Sstevel utstop_timedwait(count * count * SYSCTRL_UTSTOP_WAIT);
39129949e86Sstevel
39229949e86Sstevel
39329949e86Sstevel /* now, walk the threadlist again to see if we are done */
39429949e86Sstevel mutex_enter(&pidlock);
39529949e86Sstevel for (tp = curthread->t_next, bailout = 0;
39629949e86Sstevel bailout == 0 && tp != curthread; tp = tp->t_next) {
39729949e86Sstevel proc_t *p = ttoproc(tp);
39829949e86Sstevel
39929949e86Sstevel /* handle kernel threads separately */
40029949e86Sstevel if (p->p_as == &kas || p->p_stat == SZOMB)
40129949e86Sstevel continue;
40229949e86Sstevel
40329949e86Sstevel /*
40429949e86Sstevel * If this thread didn't stop, and we don't allow
40529949e86Sstevel * unstopped blocked threads, bail.
40629949e86Sstevel */
40729949e86Sstevel /* did this thread stop? */
40829949e86Sstevel thread_lock(tp);
40929949e86Sstevel if (!CPR_ISTOPPED(tp) &&
41029949e86Sstevel !(sysctrl_allow_blocked_threads &&
41129949e86Sstevel SYSCTRL_VSTOPPED(tp))) {
41229949e86Sstevel
41329949e86Sstevel /* nope, cache the details for later */
41429949e86Sstevel bcopy(p->p_user.u_psargs, cache_psargs,
41529949e86Sstevel sizeof (cache_psargs));
41629949e86Sstevel cache_tp = tp;
41729949e86Sstevel cache_t_state = tp->t_state;
41829949e86Sstevel bailout = 1;
41929949e86Sstevel pid = p->p_pidp->pid_id;
42029949e86Sstevel }
42129949e86Sstevel thread_unlock(tp);
42229949e86Sstevel }
42329949e86Sstevel mutex_exit(&pidlock);
42429949e86Sstevel
42529949e86Sstevel /* were all the threads stopped? */
42629949e86Sstevel if (!bailout)
42729949e86Sstevel break;
42829949e86Sstevel }
42929949e86Sstevel
43029949e86Sstevel /* were we unable to stop all threads after a few tries? */
43129949e86Sstevel if (bailout) {
43229949e86Sstevel (void) sprintf(pkt->errbuf, "process: %s id: %d state: %x"
433*8fc99e42STrevor Thompson " thread descriptor: %p", cache_psargs, (int)pid,
434*8fc99e42STrevor Thompson cache_t_state, (void *)cache_tp);
43529949e86Sstevel
43629949e86Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_UTHREAD);
43729949e86Sstevel
43829949e86Sstevel return (ESRCH);
43929949e86Sstevel }
44029949e86Sstevel
44129949e86Sstevel return (DDI_SUCCESS);
44229949e86Sstevel }
44329949e86Sstevel
44429949e86Sstevel static int
sysctrl_stop_kernel_threads(sysc_cfga_pkt_t * pkt)44529949e86Sstevel sysctrl_stop_kernel_threads(sysc_cfga_pkt_t *pkt)
44629949e86Sstevel {
44729949e86Sstevel caddr_t name;
44829949e86Sstevel kthread_id_t tp;
44929949e86Sstevel
45029949e86Sstevel if (sysctrl_skip_kernel_threads) {
45129949e86Sstevel return (DDI_SUCCESS);
45229949e86Sstevel }
45329949e86Sstevel
45429949e86Sstevel /*
45529949e86Sstevel * Note: we unlock the table in resume.
45629949e86Sstevel * We only need to lock the callback table if we are actually
45729949e86Sstevel * suspending kernel threads.
45829949e86Sstevel */
45929949e86Sstevel callb_lock_table();
46029949e86Sstevel if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
46129949e86Sstevel CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
46229949e86Sstevel
46329949e86Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
46429949e86Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
46529949e86Sstevel return (EBUSY);
46629949e86Sstevel }
46729949e86Sstevel
46829949e86Sstevel /*
46929949e86Sstevel * Verify that all threads are accounted for
47029949e86Sstevel */
47129949e86Sstevel mutex_enter(&pidlock);
47229949e86Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
47329949e86Sstevel proc_t *p = ttoproc(tp);
47429949e86Sstevel
47529949e86Sstevel if (p->p_as != &kas)
47629949e86Sstevel continue;
47729949e86Sstevel
47829949e86Sstevel if (tp->t_flag & T_INTR_THREAD)
47929949e86Sstevel continue;
48029949e86Sstevel
48129949e86Sstevel if (!callb_is_stopped(tp, &name)) {
48229949e86Sstevel mutex_exit(&pidlock);
48329949e86Sstevel (void) strncpy(pkt->errbuf, name, SYSC_OUTPUT_LEN);
48429949e86Sstevel SYSC_ERR_SET(pkt, SYSC_ERR_KTHREAD);
48529949e86Sstevel return (EBUSY);
48629949e86Sstevel }
48729949e86Sstevel }
48829949e86Sstevel
48929949e86Sstevel mutex_exit(&pidlock);
49029949e86Sstevel return (DDI_SUCCESS);
49129949e86Sstevel }
49229949e86Sstevel
49329949e86Sstevel static void
sysctrl_start_user_threads(void)49429949e86Sstevel sysctrl_start_user_threads(void)
49529949e86Sstevel {
49629949e86Sstevel kthread_id_t tp;
49729949e86Sstevel
49829949e86Sstevel mutex_enter(&pidlock);
49929949e86Sstevel
50029949e86Sstevel /* walk all threads and release them */
50129949e86Sstevel for (tp = curthread->t_next; tp != curthread; tp = tp->t_next) {
50229949e86Sstevel proc_t *p = ttoproc(tp);
50329949e86Sstevel
50429949e86Sstevel /* skip kernel threads */
50529949e86Sstevel if (ttoproc(tp)->p_as == &kas)
50629949e86Sstevel continue;
50729949e86Sstevel
50829949e86Sstevel mutex_enter(&p->p_lock);
50929949e86Sstevel tp->t_proc_flag &= ~TP_CHKPT;
51029949e86Sstevel mutex_exit(&p->p_lock);
51129949e86Sstevel
51229949e86Sstevel thread_lock(tp);
51329949e86Sstevel if (CPR_ISTOPPED(tp)) {
51429949e86Sstevel /* back on the runq */
51529949e86Sstevel tp->t_schedflag |= TS_RESUME;
51629949e86Sstevel setrun_locked(tp);
51729949e86Sstevel }
51829949e86Sstevel thread_unlock(tp);
51929949e86Sstevel }
52029949e86Sstevel
52129949e86Sstevel mutex_exit(&pidlock);
52229949e86Sstevel }
52329949e86Sstevel
52429949e86Sstevel static void
sysctrl_signal_user(int sig)52529949e86Sstevel sysctrl_signal_user(int sig)
52629949e86Sstevel {
52729949e86Sstevel struct proc *p;
52829949e86Sstevel
52929949e86Sstevel mutex_enter(&pidlock);
53029949e86Sstevel
53129949e86Sstevel for (p = practive; p != NULL; p = p->p_next) {
53229949e86Sstevel /* only user threads */
53329949e86Sstevel if (p->p_exec == NULL || p->p_stat == SZOMB ||
53429949e86Sstevel p == proc_init || p == ttoproc(curthread))
53529949e86Sstevel continue;
53629949e86Sstevel
53729949e86Sstevel mutex_enter(&p->p_lock);
53829949e86Sstevel sigtoproc(p, NULL, sig);
53929949e86Sstevel mutex_exit(&p->p_lock);
54029949e86Sstevel }
54129949e86Sstevel
54229949e86Sstevel mutex_exit(&pidlock);
54329949e86Sstevel
54429949e86Sstevel /* add a bit of delay */
54529949e86Sstevel delay(hz);
54629949e86Sstevel }
54729949e86Sstevel
54829949e86Sstevel void
sysctrl_resume(sysc_cfga_pkt_t * pkt)54929949e86Sstevel sysctrl_resume(sysc_cfga_pkt_t *pkt)
55029949e86Sstevel {
55129949e86Sstevel #ifndef Bug_4154263
55229949e86Sstevel DEBUGP(errp("resume system...\n"));
55329949e86Sstevel #endif
55429949e86Sstevel switch (suspend_state) {
55529949e86Sstevel case SYSC_STATE_FULL:
55629949e86Sstevel /*
55729949e86Sstevel * release all the other cpus
55829949e86Sstevel */
55929949e86Sstevel #ifndef Bug_4154263
56029949e86Sstevel DEBUGP(errp("release cpus..."));
56129949e86Sstevel #endif
562646e55b6Scth /*
563646e55b6Scth * Prevent false alarm in tod_validate() due to tod
564646e55b6Scth * value change between suspend and resume
565646e55b6Scth */
566646e55b6Scth mutex_enter(&tod_lock);
567*8fc99e42STrevor Thompson tod_status_set(TOD_DR_RESUME_DONE);
568646e55b6Scth mutex_exit(&tod_lock);
569646e55b6Scth
57029949e86Sstevel sysctrl_release_cpus();
57129949e86Sstevel DEBUGP(errp("cpus resumed...\n"));
57229949e86Sstevel
57329949e86Sstevel /*
57429949e86Sstevel * If we suspended hw watchdog at suspend,
57529949e86Sstevel * re-enable it now.
57629949e86Sstevel */
57729949e86Sstevel if (sysc_watchdog_suspended) {
57829949e86Sstevel mutex_enter(&tod_lock);
57929949e86Sstevel tod_ops.tod_set_watchdog_timer(
58029949e86Sstevel watchdog_timeout_seconds);
58129949e86Sstevel mutex_exit(&tod_lock);
58229949e86Sstevel }
58329949e86Sstevel
58429949e86Sstevel /*
58529949e86Sstevel * resume callout
58629949e86Sstevel */
58729949e86Sstevel (void) callb_execute_class(CB_CL_CPR_RPC, CB_CODE_CPR_RESUME);
58829949e86Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT,
58929949e86Sstevel CB_CODE_CPR_RESUME);
59029949e86Sstevel sysctrl_enable_intr();
59129949e86Sstevel /* FALLTHROUGH */
59229949e86Sstevel
59329949e86Sstevel case SYSC_STATE_DRIVER:
59429949e86Sstevel /*
59529949e86Sstevel * resume drivers
59629949e86Sstevel */
59729949e86Sstevel DEBUGP(errp("resume drivers..."));
59829949e86Sstevel sysctrl_resume_devices(ddi_root_node(), pkt);
59929949e86Sstevel DEBUGP(errp("done\n"));
60029949e86Sstevel
60129949e86Sstevel /*
60229949e86Sstevel * resume the lock manager
60329949e86Sstevel */
60429949e86Sstevel lm_cprresume();
60529949e86Sstevel
60629949e86Sstevel /* FALLTHROUGH */
60729949e86Sstevel
60829949e86Sstevel case SYSC_STATE_DAEMON:
60929949e86Sstevel /*
61029949e86Sstevel * resume kernel daemons
61129949e86Sstevel */
61229949e86Sstevel if (!sysctrl_skip_kernel_threads) {
61329949e86Sstevel DEBUGP(errp("starting kernel daemons..."));
61429949e86Sstevel (void) callb_execute_class(CB_CL_CPR_DAEMON,
61529949e86Sstevel CB_CODE_CPR_RESUME);
61629949e86Sstevel callb_unlock_table();
61729949e86Sstevel }
61829949e86Sstevel DEBUGP(errp("done\n"));
61929949e86Sstevel
62029949e86Sstevel /* FALLTHROUGH */
62129949e86Sstevel
62229949e86Sstevel case SYSC_STATE_USER:
62329949e86Sstevel /*
62429949e86Sstevel * finally, resume user threads
62529949e86Sstevel */
62629949e86Sstevel if (!sysctrl_skip_user_threads) {
62729949e86Sstevel DEBUGP(errp("starting user threads..."));
62829949e86Sstevel sysctrl_start_user_threads();
62929949e86Sstevel DEBUGP(errp("done\n"));
63029949e86Sstevel }
63129949e86Sstevel /* FALLTHROUGH */
63229949e86Sstevel
63329949e86Sstevel case SYSC_STATE_BEGIN:
63429949e86Sstevel default:
63529949e86Sstevel /*
63629949e86Sstevel * let those who care know that we've just resumed
63729949e86Sstevel */
63829949e86Sstevel DEBUGP(errp("sending SIGTHAW..."));
63929949e86Sstevel sysctrl_signal_user(SIGTHAW);
64029949e86Sstevel DEBUGP(errp("done\n"));
64129949e86Sstevel break;
64229949e86Sstevel }
64329949e86Sstevel
64429949e86Sstevel /* Restore sysctrl detach/suspend to its original value */
64529949e86Sstevel sysctrl_enable_detach_suspend = sysc_lastval;
64629949e86Sstevel
64729949e86Sstevel DEBUGP(errp("system state restored\n"));
64829949e86Sstevel }
64929949e86Sstevel
65029949e86Sstevel void
sysctrl_suspend_prepare(void)65129949e86Sstevel sysctrl_suspend_prepare(void)
65229949e86Sstevel {
65329949e86Sstevel /*
65429949e86Sstevel * We use a function, lm_cprsuspend(), in the suspend flow that
65529949e86Sstevel * is redirected to a module through the modstubs mechanism.
65629949e86Sstevel * If the module is currently not loaded, modstubs attempts
65729949e86Sstevel * the modload. The context this happens in below causes the
65829949e86Sstevel * module load to block forever, so this function must be called
65929949e86Sstevel * in the normal system call context ahead of time.
66029949e86Sstevel */
66129949e86Sstevel (void) modload("misc", "klmmod");
66229949e86Sstevel }
66329949e86Sstevel
66429949e86Sstevel int
sysctrl_suspend(sysc_cfga_pkt_t * pkt)66529949e86Sstevel sysctrl_suspend(sysc_cfga_pkt_t *pkt)
66629949e86Sstevel {
66729949e86Sstevel int rc = DDI_SUCCESS;
66829949e86Sstevel
66929949e86Sstevel /* enable sysctrl detach/suspend function */
67029949e86Sstevel sysc_lastval = sysctrl_enable_detach_suspend;
67129949e86Sstevel sysctrl_enable_detach_suspend = 1;
67229949e86Sstevel
67329949e86Sstevel /*
67429949e86Sstevel * first, stop all user threads
67529949e86Sstevel */
67629949e86Sstevel DEBUGP(errp("\nstopping user threads..."));
67729949e86Sstevel suspend_state = SYSC_STATE_USER;
67829949e86Sstevel if (((rc = sysctrl_stop_user_threads(pkt)) != DDI_SUCCESS) &&
67929949e86Sstevel sysctrl_check_user_stop_result) {
68029949e86Sstevel sysctrl_resume(pkt);
68129949e86Sstevel return (rc);
68229949e86Sstevel }
68329949e86Sstevel DEBUGP(errp("done\n"));
68429949e86Sstevel
68529949e86Sstevel /*
68629949e86Sstevel * now stop daemon activities
68729949e86Sstevel */
68829949e86Sstevel DEBUGP(errp("stopping kernel daemons..."));
68929949e86Sstevel suspend_state = SYSC_STATE_DAEMON;
69029949e86Sstevel if (rc = sysctrl_stop_kernel_threads(pkt)) {
69129949e86Sstevel sysctrl_resume(pkt);
69229949e86Sstevel return (rc);
69329949e86Sstevel }
69429949e86Sstevel DEBUGP(errp("done\n"));
69529949e86Sstevel
69629949e86Sstevel /*
69729949e86Sstevel * This sync swap out all user pages
69829949e86Sstevel */
69929949e86Sstevel vfs_sync(SYNC_ALL);
70029949e86Sstevel
70129949e86Sstevel /*
70229949e86Sstevel * special treatment for lock manager
70329949e86Sstevel */
70429949e86Sstevel lm_cprsuspend();
70529949e86Sstevel
70629949e86Sstevel /*
70729949e86Sstevel * sync the file system in case we never make it back
70829949e86Sstevel */
70929949e86Sstevel sync();
71029949e86Sstevel
71129949e86Sstevel /*
71229949e86Sstevel * now suspend drivers
71329949e86Sstevel */
71429949e86Sstevel DEBUGP(errp("suspending drivers..."));
71529949e86Sstevel suspend_state = SYSC_STATE_DRIVER;
71629949e86Sstevel if (rc = sysctrl_suspend_devices(ddi_root_node(), pkt)) {
71729949e86Sstevel sysctrl_resume(pkt);
71829949e86Sstevel return (rc);
71929949e86Sstevel }
72029949e86Sstevel DEBUGP(errp("done\n"));
72129949e86Sstevel
72229949e86Sstevel /*
72329949e86Sstevel * handle the callout table
72429949e86Sstevel */
72529949e86Sstevel sysctrl_stop_intr();
72629949e86Sstevel
72729949e86Sstevel (void) callb_execute_class(CB_CL_CPR_CALLOUT, CB_CODE_CPR_CHKPT);
72829949e86Sstevel
72929949e86Sstevel /*
73029949e86Sstevel * if watchdog was activated, disable it
73129949e86Sstevel */
73229949e86Sstevel if (watchdog_activated) {
73329949e86Sstevel mutex_enter(&tod_lock);
73429949e86Sstevel tod_ops.tod_clear_watchdog_timer();
73529949e86Sstevel mutex_exit(&tod_lock);
73629949e86Sstevel sysc_watchdog_suspended = 1;
73729949e86Sstevel } else {
73829949e86Sstevel sysc_watchdog_suspended = 0;
73929949e86Sstevel }
74029949e86Sstevel
74129949e86Sstevel /*
74229949e86Sstevel * finally, grab all cpus
74329949e86Sstevel */
74429949e86Sstevel DEBUGP(errp("freezing all cpus...\n"));
74529949e86Sstevel suspend_state = SYSC_STATE_FULL;
74629949e86Sstevel sysctrl_grab_cpus();
74729949e86Sstevel #ifndef Bug_4154263
74829949e86Sstevel DEBUGP(errp("done\n"));
74929949e86Sstevel
75029949e86Sstevel DEBUGP(errp("system is quiesced\n"));
75129949e86Sstevel #endif
75229949e86Sstevel
75329949e86Sstevel return (rc);
75429949e86Sstevel }
755