1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/thread.h> 30 #include <sys/conf.h> 31 #include <sys/cpuvar.h> 32 #include <sys/cpr.h> 33 #include <sys/user.h> 34 #include <sys/cmn_err.h> 35 #include <sys/callb.h> 36 37 extern void utstop_init(void); 38 extern void add_one_utstop(void); 39 extern void utstop_timedwait(long ticks); 40 41 static void cpr_stop_user(int); 42 static int cpr_check_user_threads(void); 43 44 /* 45 * CPR user thread related support routines 46 */ 47 void 48 cpr_signal_user(int sig) 49 { 50 /* 51 * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet 52 * since openwin is catching every signal and default action is to exit. 53 * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads. 54 */ 55 struct proc *p; 56 57 mutex_enter(&pidlock); 58 59 for (p = practive; p; p = p->p_next) { 60 /* only user threads */ 61 if (p->p_exec == NULL || p->p_stat == SZOMB || 62 p == proc_init || p == ttoproc(curthread)) 63 continue; 64 65 mutex_enter(&p->p_lock); 66 sigtoproc(p, NULL, sig); 67 mutex_exit(&p->p_lock); 68 } 69 mutex_exit(&pidlock); 70 71 DELAY(MICROSEC); 72 } 73 74 /* max wait time for user thread stop */ 75 #define CPR_UTSTOP_WAIT hz 76 #define CPR_UTSTOP_RETRY 4 77 static int count; 78 79 int 80 cpr_stop_user_threads() 81 { 82 utstop_init(); 83 84 count = 0; 85 do { 86 if (++count > CPR_UTSTOP_RETRY) 87 return (ESRCH); 88 cpr_stop_user(count * count * CPR_UTSTOP_WAIT); 89 } while (cpr_check_user_threads() && 90 (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE)); 91 92 return (0); 93 } 94 95 /* 96 * This routine tries to stop all user threads before we get rid of all 97 * its pages.It goes through allthreads list and set the TP_CHKPT flag 98 * for all user threads and make them runnable. If all of the threads 99 * can be stopped within the max wait time, CPR will proceed. Otherwise 100 * CPR is aborted after a few of similiar retries. 101 */ 102 static void 103 cpr_stop_user(int wait) 104 { 105 kthread_id_t tp; 106 proc_t *p; 107 108 /* The whole loop below needs to be atomic */ 109 mutex_enter(&pidlock); 110 111 /* faster this way */ 112 tp = curthread->t_next; 113 do { 114 /* kernel threads will be handled later */ 115 p = ttoproc(tp); 116 if (p->p_as == &kas || p->p_stat == SZOMB) 117 continue; 118 119 /* 120 * If the thread is stopped (by CPR) already, do nothing; 121 * if running, mark TP_CHKPT; 122 * if sleeping normally, mark TP_CHKPT and setrun; 123 * if sleeping non-interruptable, mark TP_CHKPT only for now; 124 * if sleeping with t_wchan0 != 0 etc, virtually stopped, 125 * do nothing. 126 */ 127 128 /* p_lock is needed for modifying t_proc_flag */ 129 mutex_enter(&p->p_lock); 130 thread_lock(tp); /* needed to check CPR_ISTOPPED */ 131 132 if (tp->t_state == TS_STOPPED) { 133 /* 134 * if already stopped by other reasons, add this new 135 * reason to it. 136 */ 137 if (tp->t_schedflag & TS_RESUME) 138 tp->t_schedflag &= ~TS_RESUME; 139 } else { 140 141 tp->t_proc_flag |= TP_CHKPT; 142 143 thread_unlock(tp); 144 mutex_exit(&p->p_lock); 145 add_one_utstop(); 146 mutex_enter(&p->p_lock); 147 thread_lock(tp); 148 149 aston(tp); 150 151 if (ISWAKEABLE(tp) || ISWAITING(tp)) { 152 setrun_locked(tp); 153 } 154 } 155 /* 156 * force the thread into the kernel if it is not already there. 157 */ 158 if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU) 159 poke_cpu(tp->t_cpu->cpu_id); 160 thread_unlock(tp); 161 mutex_exit(&p->p_lock); 162 163 } while ((tp = tp->t_next) != curthread); 164 mutex_exit(&pidlock); 165 166 utstop_timedwait(wait); 167 } 168 169 /* 170 * Checks and makes sure all user threads are stopped 171 */ 172 static int 173 cpr_check_user_threads() 174 { 175 kthread_id_t tp; 176 int rc = 0; 177 178 mutex_enter(&pidlock); 179 tp = curthread->t_next; 180 do { 181 if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB) 182 continue; 183 184 thread_lock(tp); 185 /* 186 * make sure that we are off all the queues and in a stopped 187 * state. 188 */ 189 if (!CPR_ISTOPPED(tp)) { 190 thread_unlock(tp); 191 mutex_exit(&pidlock); 192 193 if (count == CPR_UTSTOP_RETRY) { 194 CPR_DEBUG(CPR_DEBUG1, "Suspend failed: " 195 "cannot stop uthread\n"); 196 cpr_err(CE_WARN, "Suspend cannot stop " 197 "process %s (%p:%x).", 198 ttoproc(tp)->p_user.u_psargs, (void *)tp, 199 tp->t_state); 200 cpr_err(CE_WARN, "Process may be waiting for" 201 " network request, please try again."); 202 } 203 204 CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x " 205 "sched=%x\n", tp, tp->t_state, tp->t_proc_flag, 206 tp->t_schedflag); 207 CPR_DEBUG(CPR_DEBUG2, "proc %p state=%x pid=%d\n", 208 ttoproc(tp), ttoproc(tp)->p_stat, 209 ttoproc(tp)->p_pidp->pid_id); 210 return (1); 211 } 212 thread_unlock(tp); 213 214 } while ((tp = tp->t_next) != curthread && rc == 0); 215 216 mutex_exit(&pidlock); 217 return (0); 218 } 219 220 221 /* 222 * start all threads that were stopped for checkpoint. 223 */ 224 void 225 cpr_start_user_threads() 226 { 227 kthread_id_t tp; 228 proc_t *p; 229 230 mutex_enter(&pidlock); 231 tp = curthread->t_next; 232 do { 233 p = ttoproc(tp); 234 /* 235 * kernel threads are callback'ed rather than setrun. 236 */ 237 if (ttoproc(tp)->p_as == &kas) continue; 238 /* 239 * t_proc_flag should have been cleared. Just to make sure here 240 */ 241 mutex_enter(&p->p_lock); 242 tp->t_proc_flag &= ~TP_CHKPT; 243 mutex_exit(&p->p_lock); 244 245 thread_lock(tp); 246 if (CPR_ISTOPPED(tp)) { 247 248 /* 249 * put it back on the runq 250 */ 251 tp->t_schedflag |= TS_RESUME; 252 setrun_locked(tp); 253 } 254 thread_unlock(tp); 255 /* 256 * DEBUG - Keep track of current and next thread pointer. 257 */ 258 } while ((tp = tp->t_next) != curthread); 259 260 mutex_exit(&pidlock); 261 } 262 263 264 /* 265 * re/start kernel threads 266 */ 267 void 268 cpr_start_kernel_threads(void) 269 { 270 CPR_DEBUG(CPR_DEBUG1, "starting kernel daemons..."); 271 (void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME); 272 CPR_DEBUG(CPR_DEBUG1, "done\n"); 273 274 /* see table lock below */ 275 callb_unlock_table(); 276 } 277 278 279 /* 280 * Stop kernel threads by using the callback mechanism. If any thread 281 * cannot be stopped, return failure. 282 */ 283 int 284 cpr_stop_kernel_threads(void) 285 { 286 caddr_t name; 287 kthread_id_t tp; 288 proc_t *p; 289 290 callb_lock_table(); /* Note: we unlock the table in resume. */ 291 292 CPR_DEBUG(CPR_DEBUG1, "stopping kernel daemons..."); 293 if ((name = callb_execute_class(CB_CL_CPR_DAEMON, 294 CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) { 295 cpr_err(CE_WARN, 296 "Could not stop \"%s\" kernel thread. " 297 "Please try again later.", name); 298 return (EBUSY); 299 } 300 301 /* 302 * We think we stopped all the kernel threads. Just in case 303 * someone is not playing by the rules, take a spin through 304 * the threadlist and see if we can account for everybody. 305 */ 306 mutex_enter(&pidlock); 307 tp = curthread->t_next; 308 do { 309 p = ttoproc(tp); 310 if (p->p_as != &kas) 311 continue; 312 313 if (tp->t_flag & T_INTR_THREAD) 314 continue; 315 316 if (! callb_is_stopped(tp, &name)) { 317 mutex_exit(&pidlock); 318 cpr_err(CE_WARN, 319 "\"%s\" kernel thread not stopped.", name); 320 return (EBUSY); 321 } 322 } while ((tp = tp->t_next) != curthread); 323 mutex_exit(&pidlock); 324 325 CPR_DEBUG(CPR_DEBUG1, "done\n"); 326 return (0); 327 } 328