xref: /titanic_50/usr/src/uts/common/cpr/cpr_uthread.c (revision 088e9d477eee66081e407fbc5a33c4da25f66f6a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/thread.h>
31 #include <sys/conf.h>
32 #include <sys/cpuvar.h>
33 #include <sys/cpr.h>
34 #include <sys/user.h>
35 #include <sys/cmn_err.h>
36 #include <sys/callb.h>
37 
38 extern void utstop_init(void);
39 extern void add_one_utstop(void);
40 extern void utstop_timedwait(long ticks);
41 
42 static void cpr_stop_user(int);
43 static int cpr_check_user_threads(void);
44 
45 /*
46  * CPR user thread related support routines
47  */
48 void
49 cpr_signal_user(int sig)
50 {
51 /*
52  * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet
53  * since openwin is catching every signal and default action is to exit.
54  * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads.
55  */
56 	struct proc *p;
57 
58 	mutex_enter(&pidlock);
59 
60 	for (p = practive; p; p = p->p_next) {
61 		/* only user threads */
62 		if (p->p_exec == NULL || p->p_stat == SZOMB ||
63 			p == proc_init || p == ttoproc(curthread))
64 			continue;
65 
66 		mutex_enter(&p->p_lock);
67 		sigtoproc(p, NULL, sig);
68 		mutex_exit(&p->p_lock);
69 	}
70 	mutex_exit(&pidlock);
71 
72 	DELAY(MICROSEC);
73 }
74 
75 /* max wait time for user thread stop */
76 #define	CPR_UTSTOP_WAIT		hz
77 #define	CPR_UTSTOP_RETRY	4
78 static int count;
79 
80 int
81 cpr_stop_user_threads()
82 {
83 	utstop_init();
84 
85 	count = 0;
86 	do {
87 		if (++count > CPR_UTSTOP_RETRY)
88 			return (ESRCH);
89 		cpr_stop_user(count * count * CPR_UTSTOP_WAIT);
90 	} while (cpr_check_user_threads() &&
91 		(count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE));
92 
93 	return (0);
94 }
95 
96 /*
97  * This routine tries to stop all user threads before we get rid of all
98  * its pages.It goes through allthreads list and set the TP_CHKPT flag
99  * for all user threads and make them runnable. If all of the threads
100  * can be stopped within the max wait time, CPR will proceed. Otherwise
101  * CPR is aborted after a few of similiar retries.
102  */
103 static void
104 cpr_stop_user(int wait)
105 {
106 	kthread_id_t tp;
107 	proc_t *p;
108 
109 	/* The whole loop below needs to be atomic */
110 	mutex_enter(&pidlock);
111 
112 	/* faster this way */
113 	tp = curthread->t_next;
114 	do {
115 		/* kernel threads will be handled later */
116 		p = ttoproc(tp);
117 		if (p->p_as == &kas || p->p_stat == SZOMB)
118 			continue;
119 
120 		/*
121 		 * If the thread is stopped (by CPR) already, do nothing;
122 		 * if running, mark TP_CHKPT;
123 		 * if sleeping normally, mark TP_CHKPT and setrun;
124 		 * if sleeping non-interruptable, mark TP_CHKPT only for now;
125 		 * if sleeping with t_wchan0 != 0 etc, virtually stopped,
126 		 * do nothing.
127 		 */
128 
129 		/* p_lock is needed for modifying t_proc_flag */
130 		mutex_enter(&p->p_lock);
131 		thread_lock(tp); /* needed to check CPR_ISTOPPED */
132 
133 		if (tp->t_state == TS_STOPPED) {
134 			/*
135 			 * if already stopped by other reasons, add this new
136 			 * reason to it.
137 			 */
138 			if (tp->t_schedflag & TS_RESUME)
139 				tp->t_schedflag &= ~TS_RESUME;
140 		} else {
141 
142 			tp->t_proc_flag |= TP_CHKPT;
143 
144 			thread_unlock(tp);
145 			mutex_exit(&p->p_lock);
146 			add_one_utstop();
147 			mutex_enter(&p->p_lock);
148 			thread_lock(tp);
149 
150 			aston(tp);
151 
152 			if (tp->t_state == TS_SLEEP &&
153 			    (tp->t_flag & T_WAKEABLE)) {
154 				setrun_locked(tp);
155 			}
156 		}
157 		/*
158 		 * force the thread into the kernel if it is not already there.
159 		 */
160 		if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU)
161 			poke_cpu(tp->t_cpu->cpu_id);
162 		thread_unlock(tp);
163 		mutex_exit(&p->p_lock);
164 
165 	} while ((tp = tp->t_next) != curthread);
166 	mutex_exit(&pidlock);
167 
168 	utstop_timedwait(wait);
169 }
170 
171 /*
172  * Checks and makes sure all user threads are stopped
173  */
174 static int
175 cpr_check_user_threads()
176 {
177 	kthread_id_t tp;
178 	int rc = 0;
179 
180 	mutex_enter(&pidlock);
181 	tp = curthread->t_next;
182 	do {
183 		if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB)
184 			continue;
185 
186 		thread_lock(tp);
187 		/*
188 		 * make sure that we are off all the queues and in a stopped
189 		 * state.
190 		 */
191 		if (!CPR_ISTOPPED(tp)) {
192 			thread_unlock(tp);
193 			mutex_exit(&pidlock);
194 
195 			if (count == CPR_UTSTOP_RETRY) {
196 			DEBUG1(errp("Suspend failed: cannt stop "
197 				"uthread\n"));
198 			cpr_err(CE_WARN, "Suspend cannot stop "
199 				"process %s (%p:%x).",
200 				ttoproc(tp)->p_user.u_psargs, (void *)tp,
201 				tp->t_state);
202 			cpr_err(CE_WARN, "Process may be waiting for"
203 				" network request, please try again.");
204 			}
205 
206 			DEBUG2(errp("cant stop t=%p state=%x pfg=%x sched=%x\n",
207 			tp, tp->t_state, tp->t_proc_flag, tp->t_schedflag));
208 			DEBUG2(errp("proc %p state=%x pid=%d\n",
209 				ttoproc(tp), ttoproc(tp)->p_stat,
210 				ttoproc(tp)->p_pidp->pid_id));
211 			return (1);
212 		}
213 		thread_unlock(tp);
214 
215 	} while ((tp = tp->t_next) != curthread && rc == 0);
216 
217 	mutex_exit(&pidlock);
218 	return (0);
219 }
220 
221 
222 /*
223  * start all threads that were stopped for checkpoint.
224  */
225 void
226 cpr_start_user_threads()
227 {
228 	kthread_id_t tp;
229 	proc_t *p;
230 
231 	mutex_enter(&pidlock);
232 	tp = curthread->t_next;
233 	do {
234 		p = ttoproc(tp);
235 		/*
236 		 * kernel threads are callback'ed rather than setrun.
237 		 */
238 		if (ttoproc(tp)->p_as == &kas) continue;
239 		/*
240 		 * t_proc_flag should have been cleared. Just to make sure here
241 		 */
242 		mutex_enter(&p->p_lock);
243 		tp->t_proc_flag &= ~TP_CHKPT;
244 		mutex_exit(&p->p_lock);
245 
246 		thread_lock(tp);
247 		if (CPR_ISTOPPED(tp)) {
248 
249 			/*
250 			 * put it back on the runq
251 			 */
252 			tp->t_schedflag |= TS_RESUME;
253 			setrun_locked(tp);
254 		}
255 		thread_unlock(tp);
256 		/*
257 		 * DEBUG - Keep track of current and next thread pointer.
258 		 */
259 	} while ((tp = tp->t_next) != curthread);
260 
261 	mutex_exit(&pidlock);
262 }
263 
264 
265 /*
266  * re/start kernel threads
267  */
268 void
269 cpr_start_kernel_threads(void)
270 {
271 	DEBUG1(errp("starting kernel daemons..."));
272 	(void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME);
273 	DEBUG1(errp("done\n"));
274 
275 	/* see table lock below */
276 	callb_unlock_table();
277 }
278 
279 
280 /*
281  * Stop kernel threads by using the callback mechanism.  If any thread
282  * cannot be stopped, return failure.
283  */
284 int
285 cpr_stop_kernel_threads(void)
286 {
287 	caddr_t	name;
288 	kthread_id_t tp;
289 	proc_t *p;
290 
291 	callb_lock_table();	/* Note: we unlock the table in resume. */
292 
293 	DEBUG1(errp("stopping kernel daemons..."));
294 	if ((name = callb_execute_class(CB_CL_CPR_DAEMON,
295 	    CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) {
296 		cpr_err(CE_WARN,
297 		    "Could not stop \"%s\" kernel thread.  "
298 		    "Please try again later.", name);
299 		return (EBUSY);
300 	}
301 
302 	/*
303 	 * We think we stopped all the kernel threads.  Just in case
304 	 * someone is not playing by the rules, take a spin through
305 	 * the threadlist and see if we can account for everybody.
306 	 */
307 	mutex_enter(&pidlock);
308 	tp = curthread->t_next;
309 	do {
310 		p = ttoproc(tp);
311 		if (p->p_as != &kas)
312 			continue;
313 
314 		if (tp->t_flag & T_INTR_THREAD)
315 			continue;
316 
317 		if (! callb_is_stopped(tp, &name)) {
318 			mutex_exit(&pidlock);
319 			cpr_err(CE_WARN,
320 			    "\"%s\" kernel thread not stopped.", name);
321 			return (EBUSY);
322 		}
323 	} while ((tp = tp->t_next) != curthread);
324 	mutex_exit(&pidlock);
325 
326 	DEBUG1(errp("done\n"));
327 	return (0);
328 }
329