xref: /titanic_50/usr/src/uts/common/cpr/cpr_uthread.c (revision 985be8f145003c39bf82ad09a81ad394e4d7d4b6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/thread.h>
30 #include <sys/conf.h>
31 #include <sys/cpuvar.h>
32 #include <sys/cpr.h>
33 #include <sys/user.h>
34 #include <sys/cmn_err.h>
35 #include <sys/callb.h>
36 
37 extern void utstop_init(void);
38 extern void add_one_utstop(void);
39 extern void utstop_timedwait(long ticks);
40 
41 static void cpr_stop_user(int);
42 static int cpr_check_user_threads(void);
43 
44 /*
45  * CPR user thread related support routines
46  */
47 void
48 cpr_signal_user(int sig)
49 {
50 /*
51  * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet
52  * since openwin is catching every signal and default action is to exit.
53  * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads.
54  */
55 	struct proc *p;
56 
57 	mutex_enter(&pidlock);
58 
59 	for (p = practive; p; p = p->p_next) {
60 		/* only user threads */
61 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
62 			p == proc_init || p == ttoproc(curthread))
63 			continue;
64 
65 		mutex_enter(&p->p_lock);
66 		sigtoproc(p, NULL, sig);
67 		mutex_exit(&p->p_lock);
68 	}
69 	mutex_exit(&pidlock);
70 
71 	DELAY(MICROSEC);
72 }
73 
74 /* max wait time for user thread stop */
75 #define	CPR_UTSTOP_WAIT		hz
76 #define	CPR_UTSTOP_RETRY	4
77 static int count;
78 
79 int
80 cpr_stop_user_threads()
81 {
82 	utstop_init();
83 
84 	count = 0;
85 	do {
86 		if (++count > CPR_UTSTOP_RETRY)
87 			return (ESRCH);
88 		cpr_stop_user(count * count * CPR_UTSTOP_WAIT);
89 	} while (cpr_check_user_threads() &&
90 		(count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE));
91 
92 	return (0);
93 }
94 
95 /*
96  * This routine tries to stop all user threads before we get rid of all
97  * its pages.It goes through allthreads list and set the TP_CHKPT flag
98  * for all user threads and make them runnable. If all of the threads
99  * can be stopped within the max wait time, CPR will proceed. Otherwise
100  * CPR is aborted after a few of similiar retries.
101  */
102 static void
103 cpr_stop_user(int wait)
104 {
105 	kthread_id_t tp;
106 	proc_t *p;
107 
108 	/* The whole loop below needs to be atomic */
109 	mutex_enter(&pidlock);
110 
111 	/* faster this way */
112 	tp = curthread->t_next;
113 	do {
114 		/* kernel threads will be handled later */
115 		p = ttoproc(tp);
116 		if (p->p_as == &kas || p->p_stat == SZOMB)
117 			continue;
118 
119 		/*
120 		 * If the thread is stopped (by CPR) already, do nothing;
121 		 * if running, mark TP_CHKPT;
122 		 * if sleeping normally, mark TP_CHKPT and setrun;
123 		 * if sleeping non-interruptable, mark TP_CHKPT only for now;
124 		 * if sleeping with t_wchan0 != 0 etc, virtually stopped,
125 		 * do nothing.
126 		 */
127 
128 		/* p_lock is needed for modifying t_proc_flag */
129 		mutex_enter(&p->p_lock);
130 		thread_lock(tp); /* needed to check CPR_ISTOPPED */
131 
132 		if (tp->t_state == TS_STOPPED) {
133 			/*
134 			 * if already stopped by other reasons, add this new
135 			 * reason to it.
136 			 */
137 			if (tp->t_schedflag & TS_RESUME)
138 				tp->t_schedflag &= ~TS_RESUME;
139 		} else {
140 
141 			tp->t_proc_flag |= TP_CHKPT;
142 
143 			thread_unlock(tp);
144 			mutex_exit(&p->p_lock);
145 			add_one_utstop();
146 			mutex_enter(&p->p_lock);
147 			thread_lock(tp);
148 
149 			aston(tp);
150 
151 			if (tp->t_state == TS_SLEEP &&
152 			    (tp->t_flag & T_WAKEABLE)) {
153 				setrun_locked(tp);
154 			}
155 		}
156 		/*
157 		 * force the thread into the kernel if it is not already there.
158 		 */
159 		if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
160 			poke_cpu(tp->t_cpu->cpu_id);
161 		thread_unlock(tp);
162 		mutex_exit(&p->p_lock);
163 
164 	} while ((tp = tp->t_next) != curthread);
165 	mutex_exit(&pidlock);
166 
167 	utstop_timedwait(wait);
168 }
169 
170 /*
171  * Checks and makes sure all user threads are stopped
172  */
173 static int
174 cpr_check_user_threads()
175 {
176 	kthread_id_t tp;
177 	int rc = 0;
178 
179 	mutex_enter(&pidlock);
180 	tp = curthread->t_next;
181 	do {
182 		if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB)
183 			continue;
184 
185 		thread_lock(tp);
186 		/*
187 		 * make sure that we are off all the queues and in a stopped
188 		 * state.
189 		 */
190 		if (!CPR_ISTOPPED(tp)) {
191 			thread_unlock(tp);
192 			mutex_exit(&pidlock);
193 
194 			if (count == CPR_UTSTOP_RETRY) {
195 			CPR_DEBUG(CPR_DEBUG1, "Suspend failed: "
196 			    "cannot stop uthread\n");
197 			cpr_err(CE_WARN, "Suspend cannot stop "
198 				"process %s (%p:%x).",
199 				ttoproc(tp)->p_user.u_psargs, (void *)tp,
200 				tp->t_state);
201 			cpr_err(CE_WARN, "Process may be waiting for"
202 				" network request, please try again.");
203 			}
204 
205 			CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x "
206 			    "sched=%x\n", tp, tp->t_state, tp->t_proc_flag,
207 			    tp->t_schedflag);
208 			CPR_DEBUG(CPR_DEBUG2, "proc %p state=%x pid=%d\n",
209 			    ttoproc(tp), ttoproc(tp)->p_stat,
210 			    ttoproc(tp)->p_pidp->pid_id);
211 			return (1);
212 		}
213 		thread_unlock(tp);
214 
215 	} while ((tp = tp->t_next) != curthread && rc == 0);
216 
217 	mutex_exit(&pidlock);
218 	return (0);
219 }
220 
221 
222 /*
223  * start all threads that were stopped for checkpoint.
224  */
225 void
226 cpr_start_user_threads()
227 {
228 	kthread_id_t tp;
229 	proc_t *p;
230 
231 	mutex_enter(&pidlock);
232 	tp = curthread->t_next;
233 	do {
234 		p = ttoproc(tp);
235 		/*
236 		 * kernel threads are callback'ed rather than setrun.
237 		 */
238 		if (ttoproc(tp)->p_as == &kas) continue;
239 		/*
240 		 * t_proc_flag should have been cleared. Just to make sure here
241 		 */
242 		mutex_enter(&p->p_lock);
243 		tp->t_proc_flag &= ~TP_CHKPT;
244 		mutex_exit(&p->p_lock);
245 
246 		thread_lock(tp);
247 		if (CPR_ISTOPPED(tp)) {
248 
249 			/*
250 			 * put it back on the runq
251 			 */
252 			tp->t_schedflag |= TS_RESUME;
253 			setrun_locked(tp);
254 		}
255 		thread_unlock(tp);
256 		/*
257 		 * DEBUG - Keep track of current and next thread pointer.
258 		 */
259 	} while ((tp = tp->t_next) != curthread);
260 
261 	mutex_exit(&pidlock);
262 }
263 
264 
265 /*
266  * re/start kernel threads
267  */
268 void
269 cpr_start_kernel_threads(void)
270 {
271 	CPR_DEBUG(CPR_DEBUG1, "starting kernel daemons...");
272 	(void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME);
273 	CPR_DEBUG(CPR_DEBUG1, "done\n");
274 
275 	/* see table lock below */
276 	callb_unlock_table();
277 }
278 
279 
280 /*
281  * Stop kernel threads by using the callback mechanism.  If any thread
282  * cannot be stopped, return failure.
283  */
284 int
285 cpr_stop_kernel_threads(void)
286 {
287 	caddr_t	name;
288 	kthread_id_t tp;
289 	proc_t *p;
290 
291 	callb_lock_table();	/* Note: we unlock the table in resume. */
292 
293 	CPR_DEBUG(CPR_DEBUG1, "stopping kernel daemons...");
294 	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
295 	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
296 		cpr_err(CE_WARN,
297 		    "Could not stop \"%s\" kernel thread.  "
298 		    "Please try again later.", name);
299 		return (EBUSY);
300 	}
301 
302 	/*
303 	 * We think we stopped all the kernel threads.  Just in case
304 	 * someone is not playing by the rules, take a spin through
305 	 * the threadlist and see if we can account for everybody.
306 	 */
307 	mutex_enter(&pidlock);
308 	tp = curthread->t_next;
309 	do {
310 		p = ttoproc(tp);
311 		if (p->p_as != &kas)
312 			continue;
313 
314 		if (tp->t_flag & T_INTR_THREAD)
315 			continue;
316 
317 		if (! callb_is_stopped(tp, &name)) {
318 			mutex_exit(&pidlock);
319 			cpr_err(CE_WARN,
320 			    "\"%s\" kernel thread not stopped.", name);
321 			return (EBUSY);
322 		}
323 	} while ((tp = tp->t_next) != curthread);
324 	mutex_exit(&pidlock);
325 
326 	CPR_DEBUG(CPR_DEBUG1, "done\n");
327 	return (0);
328 }
329