1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/thread.h> 31 #include <sys/conf.h> 32 #include <sys/cpuvar.h> 33 #include <sys/cpr.h> 34 #include <sys/user.h> 35 #include <sys/cmn_err.h> 36 #include <sys/callb.h> 37 38 extern void utstop_init(void); 39 extern void add_one_utstop(void); 40 extern void utstop_timedwait(long ticks); 41 42 static void cpr_stop_user(int); 43 static int cpr_check_user_threads(void); 44 45 /* 46 * CPR user thread related support routines 47 */ 48 void 49 cpr_signal_user(int sig) 50 { 51 /* 52 * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet 53 * since openwin is catching every signal and default action is to exit. 54 * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads. 55 */ 56 struct proc *p; 57 58 mutex_enter(&pidlock); 59 60 for (p = practive; p; p = p->p_next) { 61 /* only user threads */ 62 if (p->p_exec == NULL || p->p_stat == SZOMB || 63 p == proc_init || p == ttoproc(curthread)) 64 continue; 65 66 mutex_enter(&p->p_lock); 67 sigtoproc(p, NULL, sig); 68 mutex_exit(&p->p_lock); 69 } 70 mutex_exit(&pidlock); 71 72 DELAY(MICROSEC); 73 } 74 75 /* max wait time for user thread stop */ 76 #define CPR_UTSTOP_WAIT hz 77 #define CPR_UTSTOP_RETRY 4 78 static int count; 79 80 int 81 cpr_stop_user_threads() 82 { 83 utstop_init(); 84 85 count = 0; 86 do { 87 if (++count > CPR_UTSTOP_RETRY) 88 return (ESRCH); 89 cpr_stop_user(count * count * CPR_UTSTOP_WAIT); 90 } while (cpr_check_user_threads() && 91 (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE)); 92 93 return (0); 94 } 95 96 /* 97 * This routine tries to stop all user threads before we get rid of all 98 * its pages.It goes through allthreads list and set the TP_CHKPT flag 99 * for all user threads and make them runnable. If all of the threads 100 * can be stopped within the max wait time, CPR will proceed. Otherwise 101 * CPR is aborted after a few of similiar retries. 102 */ 103 static void 104 cpr_stop_user(int wait) 105 { 106 kthread_id_t tp; 107 proc_t *p; 108 109 /* The whole loop below needs to be atomic */ 110 mutex_enter(&pidlock); 111 112 /* faster this way */ 113 tp = curthread->t_next; 114 do { 115 /* kernel threads will be handled later */ 116 p = ttoproc(tp); 117 if (p->p_as == &kas || p->p_stat == SZOMB) 118 continue; 119 120 /* 121 * If the thread is stopped (by CPR) already, do nothing; 122 * if running, mark TP_CHKPT; 123 * if sleeping normally, mark TP_CHKPT and setrun; 124 * if sleeping non-interruptable, mark TP_CHKPT only for now; 125 * if sleeping with t_wchan0 != 0 etc, virtually stopped, 126 * do nothing. 127 */ 128 129 /* p_lock is needed for modifying t_proc_flag */ 130 mutex_enter(&p->p_lock); 131 thread_lock(tp); /* needed to check CPR_ISTOPPED */ 132 133 if (tp->t_state == TS_STOPPED) { 134 /* 135 * if already stopped by other reasons, add this new 136 * reason to it. 137 */ 138 if (tp->t_schedflag & TS_RESUME) 139 tp->t_schedflag &= ~TS_RESUME; 140 } else { 141 142 tp->t_proc_flag |= TP_CHKPT; 143 144 thread_unlock(tp); 145 mutex_exit(&p->p_lock); 146 add_one_utstop(); 147 mutex_enter(&p->p_lock); 148 thread_lock(tp); 149 150 aston(tp); 151 152 if (tp->t_state == TS_SLEEP && 153 (tp->t_flag & T_WAKEABLE)) { 154 setrun_locked(tp); 155 } 156 } 157 /* 158 * force the thread into the kernel if it is not already there. 159 */ 160 if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU) 161 poke_cpu(tp->t_cpu->cpu_id); 162 thread_unlock(tp); 163 mutex_exit(&p->p_lock); 164 165 } while ((tp = tp->t_next) != curthread); 166 mutex_exit(&pidlock); 167 168 utstop_timedwait(wait); 169 } 170 171 /* 172 * Checks and makes sure all user threads are stopped 173 */ 174 static int 175 cpr_check_user_threads() 176 { 177 kthread_id_t tp; 178 int rc = 0; 179 180 mutex_enter(&pidlock); 181 tp = curthread->t_next; 182 do { 183 if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB) 184 continue; 185 186 thread_lock(tp); 187 /* 188 * make sure that we are off all the queues and in a stopped 189 * state. 190 */ 191 if (!CPR_ISTOPPED(tp)) { 192 thread_unlock(tp); 193 mutex_exit(&pidlock); 194 195 if (count == CPR_UTSTOP_RETRY) { 196 DEBUG1(errp("Suspend failed: cannt stop " 197 "uthread\n")); 198 cpr_err(CE_WARN, "Suspend cannot stop " 199 "process %s (%p:%x).", 200 ttoproc(tp)->p_user.u_psargs, (void *)tp, 201 tp->t_state); 202 cpr_err(CE_WARN, "Process may be waiting for" 203 " network request, please try again."); 204 } 205 206 DEBUG2(errp("cant stop t=%p state=%x pfg=%x sched=%x\n", 207 tp, tp->t_state, tp->t_proc_flag, tp->t_schedflag)); 208 DEBUG2(errp("proc %p state=%x pid=%d\n", 209 ttoproc(tp), ttoproc(tp)->p_stat, 210 ttoproc(tp)->p_pidp->pid_id)); 211 return (1); 212 } 213 thread_unlock(tp); 214 215 } while ((tp = tp->t_next) != curthread && rc == 0); 216 217 mutex_exit(&pidlock); 218 return (0); 219 } 220 221 222 /* 223 * start all threads that were stopped for checkpoint. 224 */ 225 void 226 cpr_start_user_threads() 227 { 228 kthread_id_t tp; 229 proc_t *p; 230 231 mutex_enter(&pidlock); 232 tp = curthread->t_next; 233 do { 234 p = ttoproc(tp); 235 /* 236 * kernel threads are callback'ed rather than setrun. 237 */ 238 if (ttoproc(tp)->p_as == &kas) continue; 239 /* 240 * t_proc_flag should have been cleared. Just to make sure here 241 */ 242 mutex_enter(&p->p_lock); 243 tp->t_proc_flag &= ~TP_CHKPT; 244 mutex_exit(&p->p_lock); 245 246 thread_lock(tp); 247 if (CPR_ISTOPPED(tp)) { 248 249 /* 250 * put it back on the runq 251 */ 252 tp->t_schedflag |= TS_RESUME; 253 setrun_locked(tp); 254 } 255 thread_unlock(tp); 256 /* 257 * DEBUG - Keep track of current and next thread pointer. 258 */ 259 } while ((tp = tp->t_next) != curthread); 260 261 mutex_exit(&pidlock); 262 } 263 264 265 /* 266 * re/start kernel threads 267 */ 268 void 269 cpr_start_kernel_threads(void) 270 { 271 DEBUG1(errp("starting kernel daemons...")); 272 (void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME); 273 DEBUG1(errp("done\n")); 274 275 /* see table lock below */ 276 callb_unlock_table(); 277 } 278 279 280 /* 281 * Stop kernel threads by using the callback mechanism. If any thread 282 * cannot be stopped, return failure. 283 */ 284 int 285 cpr_stop_kernel_threads(void) 286 { 287 caddr_t name; 288 kthread_id_t tp; 289 proc_t *p; 290 291 callb_lock_table(); /* Note: we unlock the table in resume. */ 292 293 DEBUG1(errp("stopping kernel daemons...")); 294 if ((name = callb_execute_class(CB_CL_CPR_DAEMON, 295 CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) { 296 cpr_err(CE_WARN, 297 "Could not stop \"%s\" kernel thread. " 298 "Please try again later.", name); 299 return (EBUSY); 300 } 301 302 /* 303 * We think we stopped all the kernel threads. Just in case 304 * someone is not playing by the rules, take a spin through 305 * the threadlist and see if we can account for everybody. 306 */ 307 mutex_enter(&pidlock); 308 tp = curthread->t_next; 309 do { 310 p = ttoproc(tp); 311 if (p->p_as != &kas) 312 continue; 313 314 if (tp->t_flag & T_INTR_THREAD) 315 continue; 316 317 if (! callb_is_stopped(tp, &name)) { 318 mutex_exit(&pidlock); 319 cpr_err(CE_WARN, 320 "\"%s\" kernel thread not stopped.", name); 321 return (EBUSY); 322 } 323 } while ((tp = tp->t_next) != curthread); 324 mutex_exit(&pidlock); 325 326 DEBUG1(errp("done\n")); 327 return (0); 328 } 329