1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/thread.h> 30 #include <sys/conf.h> 31 #include <sys/cpuvar.h> 32 #include <sys/cpr.h> 33 #include <sys/user.h> 34 #include <sys/cmn_err.h> 35 #include <sys/callb.h> 36 37 extern void utstop_init(void); 38 extern void add_one_utstop(void); 39 extern void utstop_timedwait(long ticks); 40 41 static void cpr_stop_user(int); 42 static int cpr_check_user_threads(void); 43 44 /* 45 * CPR user thread related support routines 46 */ 47 void 48 cpr_signal_user(int sig) 49 { 50 /* 51 * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet 52 * since openwin is catching every signal and default action is to exit. 53 * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads. 54 */ 55 struct proc *p; 56 57 mutex_enter(&pidlock); 58 59 for (p = practive; p; p = p->p_next) { 60 /* only user threads */ 61 if (p->p_exec == NULL || p->p_stat == SZOMB || 62 p == proc_init || p == ttoproc(curthread)) 63 continue; 64 65 mutex_enter(&p->p_lock); 66 sigtoproc(p, NULL, sig); 67 mutex_exit(&p->p_lock); 68 } 69 mutex_exit(&pidlock); 70 71 DELAY(MICROSEC); 72 } 73 74 /* max wait time for user thread stop */ 75 #define CPR_UTSTOP_WAIT hz 76 #define CPR_UTSTOP_RETRY 4 77 static int count; 78 79 int 80 cpr_stop_user_threads() 81 { 82 utstop_init(); 83 84 count = 0; 85 do { 86 if (++count > CPR_UTSTOP_RETRY) 87 return (ESRCH); 88 cpr_stop_user(count * count * CPR_UTSTOP_WAIT); 89 } while (cpr_check_user_threads() && 90 (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE)); 91 92 return (0); 93 } 94 95 /* 96 * This routine tries to stop all user threads before we get rid of all 97 * its pages.It goes through allthreads list and set the TP_CHKPT flag 98 * for all user threads and make them runnable. If all of the threads 99 * can be stopped within the max wait time, CPR will proceed. Otherwise 100 * CPR is aborted after a few of similiar retries. 101 */ 102 static void 103 cpr_stop_user(int wait) 104 { 105 kthread_id_t tp; 106 proc_t *p; 107 108 /* The whole loop below needs to be atomic */ 109 mutex_enter(&pidlock); 110 111 /* faster this way */ 112 tp = curthread->t_next; 113 do { 114 /* kernel threads will be handled later */ 115 p = ttoproc(tp); 116 if (p->p_as == &kas || p->p_stat == SZOMB) 117 continue; 118 119 /* 120 * If the thread is stopped (by CPR) already, do nothing; 121 * if running, mark TP_CHKPT; 122 * if sleeping normally, mark TP_CHKPT and setrun; 123 * if sleeping non-interruptable, mark TP_CHKPT only for now; 124 * if sleeping with t_wchan0 != 0 etc, virtually stopped, 125 * do nothing. 126 */ 127 128 /* p_lock is needed for modifying t_proc_flag */ 129 mutex_enter(&p->p_lock); 130 thread_lock(tp); /* needed to check CPR_ISTOPPED */ 131 132 if (tp->t_state == TS_STOPPED) { 133 /* 134 * if already stopped by other reasons, add this new 135 * reason to it. 136 */ 137 if (tp->t_schedflag & TS_RESUME) 138 tp->t_schedflag &= ~TS_RESUME; 139 } else { 140 141 tp->t_proc_flag |= TP_CHKPT; 142 143 thread_unlock(tp); 144 mutex_exit(&p->p_lock); 145 add_one_utstop(); 146 mutex_enter(&p->p_lock); 147 thread_lock(tp); 148 149 aston(tp); 150 151 if (tp->t_state == TS_SLEEP && 152 (tp->t_flag & T_WAKEABLE)) { 153 setrun_locked(tp); 154 } 155 } 156 /* 157 * force the thread into the kernel if it is not already there. 158 */ 159 if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU) 160 poke_cpu(tp->t_cpu->cpu_id); 161 thread_unlock(tp); 162 mutex_exit(&p->p_lock); 163 164 } while ((tp = tp->t_next) != curthread); 165 mutex_exit(&pidlock); 166 167 utstop_timedwait(wait); 168 } 169 170 /* 171 * Checks and makes sure all user threads are stopped 172 */ 173 static int 174 cpr_check_user_threads() 175 { 176 kthread_id_t tp; 177 int rc = 0; 178 179 mutex_enter(&pidlock); 180 tp = curthread->t_next; 181 do { 182 if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB) 183 continue; 184 185 thread_lock(tp); 186 /* 187 * make sure that we are off all the queues and in a stopped 188 * state. 189 */ 190 if (!CPR_ISTOPPED(tp)) { 191 thread_unlock(tp); 192 mutex_exit(&pidlock); 193 194 if (count == CPR_UTSTOP_RETRY) { 195 CPR_DEBUG(CPR_DEBUG1, "Suspend failed: " 196 "cannot stop uthread\n"); 197 cpr_err(CE_WARN, "Suspend cannot stop " 198 "process %s (%p:%x).", 199 ttoproc(tp)->p_user.u_psargs, (void *)tp, 200 tp->t_state); 201 cpr_err(CE_WARN, "Process may be waiting for" 202 " network request, please try again."); 203 } 204 205 CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x " 206 "sched=%x\n", tp, tp->t_state, tp->t_proc_flag, 207 tp->t_schedflag); 208 CPR_DEBUG(CPR_DEBUG2, "proc %p state=%x pid=%d\n", 209 ttoproc(tp), ttoproc(tp)->p_stat, 210 ttoproc(tp)->p_pidp->pid_id); 211 return (1); 212 } 213 thread_unlock(tp); 214 215 } while ((tp = tp->t_next) != curthread && rc == 0); 216 217 mutex_exit(&pidlock); 218 return (0); 219 } 220 221 222 /* 223 * start all threads that were stopped for checkpoint. 224 */ 225 void 226 cpr_start_user_threads() 227 { 228 kthread_id_t tp; 229 proc_t *p; 230 231 mutex_enter(&pidlock); 232 tp = curthread->t_next; 233 do { 234 p = ttoproc(tp); 235 /* 236 * kernel threads are callback'ed rather than setrun. 237 */ 238 if (ttoproc(tp)->p_as == &kas) continue; 239 /* 240 * t_proc_flag should have been cleared. Just to make sure here 241 */ 242 mutex_enter(&p->p_lock); 243 tp->t_proc_flag &= ~TP_CHKPT; 244 mutex_exit(&p->p_lock); 245 246 thread_lock(tp); 247 if (CPR_ISTOPPED(tp)) { 248 249 /* 250 * put it back on the runq 251 */ 252 tp->t_schedflag |= TS_RESUME; 253 setrun_locked(tp); 254 } 255 thread_unlock(tp); 256 /* 257 * DEBUG - Keep track of current and next thread pointer. 258 */ 259 } while ((tp = tp->t_next) != curthread); 260 261 mutex_exit(&pidlock); 262 } 263 264 265 /* 266 * re/start kernel threads 267 */ 268 void 269 cpr_start_kernel_threads(void) 270 { 271 CPR_DEBUG(CPR_DEBUG1, "starting kernel daemons..."); 272 (void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME); 273 CPR_DEBUG(CPR_DEBUG1, "done\n"); 274 275 /* see table lock below */ 276 callb_unlock_table(); 277 } 278 279 280 /* 281 * Stop kernel threads by using the callback mechanism. If any thread 282 * cannot be stopped, return failure. 283 */ 284 int 285 cpr_stop_kernel_threads(void) 286 { 287 caddr_t name; 288 kthread_id_t tp; 289 proc_t *p; 290 291 callb_lock_table(); /* Note: we unlock the table in resume. */ 292 293 CPR_DEBUG(CPR_DEBUG1, "stopping kernel daemons..."); 294 if ((name = callb_execute_class(CB_CL_CPR_DAEMON, 295 CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) { 296 cpr_err(CE_WARN, 297 "Could not stop \"%s\" kernel thread. " 298 "Please try again later.", name); 299 return (EBUSY); 300 } 301 302 /* 303 * We think we stopped all the kernel threads. Just in case 304 * someone is not playing by the rules, take a spin through 305 * the threadlist and see if we can account for everybody. 306 */ 307 mutex_enter(&pidlock); 308 tp = curthread->t_next; 309 do { 310 p = ttoproc(tp); 311 if (p->p_as != &kas) 312 continue; 313 314 if (tp->t_flag & T_INTR_THREAD) 315 continue; 316 317 if (! callb_is_stopped(tp, &name)) { 318 mutex_exit(&pidlock); 319 cpr_err(CE_WARN, 320 "\"%s\" kernel thread not stopped.", name); 321 return (EBUSY); 322 } 323 } while ((tp = tp->t_next) != curthread); 324 mutex_exit(&pidlock); 325 326 CPR_DEBUG(CPR_DEBUG1, "done\n"); 327 return (0); 328 } 329