1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/thread.h> 28 #include <sys/conf.h> 29 #include <sys/cpuvar.h> 30 #include <sys/cpr.h> 31 #include <sys/user.h> 32 #include <sys/cmn_err.h> 33 #include <sys/callb.h> 34 35 extern void utstop_init(void); 36 extern void add_one_utstop(void); 37 extern void utstop_timedwait(long ticks); 38 39 static void cpr_stop_user(int); 40 static int cpr_check_user_threads(void); 41 42 /* 43 * CPR user thread related support routines 44 */ 45 void 46 cpr_signal_user(int sig) 47 { 48 /* 49 * The signal SIGTHAW and SIGFREEZE cannot be sent to every thread yet 50 * since openwin is catching every signal and default action is to exit. 51 * We also need to implement the true SIGFREEZE and SIGTHAW to stop threads. 52 */ 53 struct proc *p; 54 55 mutex_enter(&pidlock); 56 57 for (p = practive; p; p = p->p_next) { 58 /* only user threads */ 59 if (p->p_exec == NULL || p->p_stat == SZOMB || 60 p == proc_init || p == ttoproc(curthread)) 61 continue; 62 63 mutex_enter(&p->p_lock); 64 sigtoproc(p, NULL, sig); 65 mutex_exit(&p->p_lock); 66 } 67 mutex_exit(&pidlock); 68 69 DELAY(MICROSEC); 70 } 71 72 /* max wait time for user thread stop */ 73 #define CPR_UTSTOP_WAIT hz 74 #define CPR_UTSTOP_RETRY 4 75 static int count; 76 77 int 78 cpr_stop_user_threads() 79 { 80 utstop_init(); 81 82 count = 0; 83 do { 84 if (++count > CPR_UTSTOP_RETRY) 85 return (ESRCH); 86 cpr_stop_user(count * count * CPR_UTSTOP_WAIT); 87 } while (cpr_check_user_threads() && 88 (count < CPR_UTSTOP_RETRY || CPR->c_fcn != AD_CPR_FORCE)); 89 90 return (0); 91 } 92 93 /* 94 * This routine tries to stop all user threads before we get rid of all 95 * its pages.It goes through allthreads list and set the TP_CHKPT flag 96 * for all user threads and make them runnable. If all of the threads 97 * can be stopped within the max wait time, CPR will proceed. Otherwise 98 * CPR is aborted after a few of similiar retries. 99 */ 100 static void 101 cpr_stop_user(int wait) 102 { 103 kthread_id_t tp; 104 proc_t *p; 105 106 /* The whole loop below needs to be atomic */ 107 mutex_enter(&pidlock); 108 109 /* faster this way */ 110 tp = curthread->t_next; 111 do { 112 /* kernel threads will be handled later */ 113 p = ttoproc(tp); 114 if (p->p_as == &kas || p->p_stat == SZOMB) 115 continue; 116 117 /* 118 * If the thread is stopped (by CPR) already, do nothing; 119 * if running, mark TP_CHKPT; 120 * if sleeping normally, mark TP_CHKPT and setrun; 121 * if sleeping non-interruptable, mark TP_CHKPT only for now; 122 * if sleeping with t_wchan0 != 0 etc, virtually stopped, 123 * do nothing. 124 */ 125 126 /* p_lock is needed for modifying t_proc_flag */ 127 mutex_enter(&p->p_lock); 128 thread_lock(tp); /* needed to check CPR_ISTOPPED */ 129 130 if (tp->t_state == TS_STOPPED) { 131 /* 132 * if already stopped by other reasons, add this new 133 * reason to it. 134 */ 135 if (tp->t_schedflag & TS_RESUME) 136 tp->t_schedflag &= ~TS_RESUME; 137 } else { 138 139 tp->t_proc_flag |= TP_CHKPT; 140 141 thread_unlock(tp); 142 mutex_exit(&p->p_lock); 143 add_one_utstop(); 144 mutex_enter(&p->p_lock); 145 thread_lock(tp); 146 147 aston(tp); 148 149 if (ISWAKEABLE(tp) || ISWAITING(tp)) { 150 setrun_locked(tp); 151 } 152 } 153 /* 154 * force the thread into the kernel if it is not already there. 155 */ 156 if (tp->t_state == TS_ONPROC && tp->t_cpu != CPU) 157 poke_cpu(tp->t_cpu->cpu_id); 158 thread_unlock(tp); 159 mutex_exit(&p->p_lock); 160 161 } while ((tp = tp->t_next) != curthread); 162 mutex_exit(&pidlock); 163 164 utstop_timedwait(wait); 165 } 166 167 /* 168 * Checks and makes sure all user threads are stopped 169 */ 170 static int 171 cpr_check_user_threads() 172 { 173 kthread_id_t tp; 174 int rc = 0; 175 176 mutex_enter(&pidlock); 177 tp = curthread->t_next; 178 do { 179 if (ttoproc(tp)->p_as == &kas || ttoproc(tp)->p_stat == SZOMB) 180 continue; 181 182 thread_lock(tp); 183 /* 184 * make sure that we are off all the queues and in a stopped 185 * state. 186 */ 187 if (!CPR_ISTOPPED(tp)) { 188 thread_unlock(tp); 189 mutex_exit(&pidlock); 190 191 if (count == CPR_UTSTOP_RETRY) { 192 CPR_DEBUG(CPR_DEBUG1, "Suspend failed: " 193 "cannot stop uthread\n"); 194 cpr_err(CE_WARN, "Suspend cannot stop " 195 "process %s (%p:%x).", 196 ttoproc(tp)->p_user.u_psargs, (void *)tp, 197 tp->t_state); 198 cpr_err(CE_WARN, "Process may be waiting for" 199 " network request, please try again."); 200 } 201 202 CPR_DEBUG(CPR_DEBUG2, "cant stop t=%p state=%x pfg=%x " 203 "sched=%x\n", (void *)tp, tp->t_state, 204 tp->t_proc_flag, tp->t_schedflag); 205 CPR_DEBUG(CPR_DEBUG2, "proc %p state=%x pid=%d\n", 206 (void *)ttoproc(tp), ttoproc(tp)->p_stat, 207 ttoproc(tp)->p_pidp->pid_id); 208 return (1); 209 } 210 thread_unlock(tp); 211 212 } while ((tp = tp->t_next) != curthread && rc == 0); 213 214 mutex_exit(&pidlock); 215 return (0); 216 } 217 218 219 /* 220 * start all threads that were stopped for checkpoint. 221 */ 222 void 223 cpr_start_user_threads() 224 { 225 kthread_id_t tp; 226 proc_t *p; 227 228 mutex_enter(&pidlock); 229 tp = curthread->t_next; 230 do { 231 p = ttoproc(tp); 232 /* 233 * kernel threads are callback'ed rather than setrun. 234 */ 235 if (ttoproc(tp)->p_as == &kas) continue; 236 /* 237 * t_proc_flag should have been cleared. Just to make sure here 238 */ 239 mutex_enter(&p->p_lock); 240 tp->t_proc_flag &= ~TP_CHKPT; 241 mutex_exit(&p->p_lock); 242 243 thread_lock(tp); 244 if (CPR_ISTOPPED(tp)) { 245 246 /* 247 * put it back on the runq 248 */ 249 tp->t_schedflag |= TS_RESUME; 250 setrun_locked(tp); 251 } 252 thread_unlock(tp); 253 /* 254 * DEBUG - Keep track of current and next thread pointer. 255 */ 256 } while ((tp = tp->t_next) != curthread); 257 258 mutex_exit(&pidlock); 259 } 260 261 262 /* 263 * re/start kernel threads 264 */ 265 void 266 cpr_start_kernel_threads(void) 267 { 268 CPR_DEBUG(CPR_DEBUG1, "starting kernel daemons..."); 269 (void) callb_execute_class(CB_CL_CPR_DAEMON, CB_CODE_CPR_RESUME); 270 CPR_DEBUG(CPR_DEBUG1, "done\n"); 271 272 /* see table lock below */ 273 callb_unlock_table(); 274 } 275 276 277 /* 278 * Stop kernel threads by using the callback mechanism. If any thread 279 * cannot be stopped, return failure. 280 */ 281 int 282 cpr_stop_kernel_threads(void) 283 { 284 caddr_t name; 285 286 callb_lock_table(); /* Note: we unlock the table in resume. */ 287 288 CPR_DEBUG(CPR_DEBUG1, "stopping kernel daemons..."); 289 if ((name = callb_execute_class(CB_CL_CPR_DAEMON, 290 CB_CODE_CPR_CHKPT)) != (caddr_t)NULL) { 291 cpr_err(CE_WARN, 292 "Could not stop \"%s\" kernel thread. " 293 "Please try again later.", name); 294 return (EBUSY); 295 } 296 297 CPR_DEBUG(CPR_DEBUG1, ("done\n")); 298 return (0); 299 } 300 301 /* 302 * Check to see that kernel threads are stopped. 303 * This should be called while CPU's are paused, and the caller is 304 * effectively running single user, or else we are virtually guaranteed 305 * to fail. The routine should not ASSERT on the paused state or spl 306 * level, as there may be a use for this to verify that things are running 307 * again. 308 */ 309 int 310 cpr_threads_are_stopped(void) 311 { 312 caddr_t name; 313 kthread_id_t tp; 314 proc_t *p; 315 316 /* 317 * We think we stopped all the kernel threads. Just in case 318 * someone is not playing by the rules, take a spin through 319 * the threadlist and see if we can account for everybody. 320 */ 321 mutex_enter(&pidlock); 322 tp = curthread->t_next; 323 do { 324 p = ttoproc(tp); 325 if (p->p_as != &kas) 326 continue; 327 328 if (tp->t_flag & T_INTR_THREAD) 329 continue; 330 331 if (! callb_is_stopped(tp, &name)) { 332 mutex_exit(&pidlock); 333 cpr_err(CE_WARN, 334 "\"%s\" kernel thread not stopped.", name); 335 return (EBUSY); 336 } 337 } while ((tp = tp->t_next) != curthread); 338 339 mutex_exit(&pidlock); 340 return (0); 341 } 342