1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/systm.h> 31 #include <sys/errno.h> 32 #include <sys/vfs.h> 33 #include <sys/vnode.h> 34 #include <sys/swap.h> 35 #include <sys/file.h> 36 #include <sys/proc.h> 37 #include <sys/var.h> 38 #include <sys/uadmin.h> 39 #include <sys/signal.h> 40 #include <sys/time.h> 41 #include <vm/seg_kmem.h> 42 #include <sys/modctl.h> 43 #include <sys/callb.h> 44 #include <sys/dumphdr.h> 45 #include <sys/debug.h> 46 #include <sys/ftrace.h> 47 #include <sys/cmn_err.h> 48 #include <sys/panic.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/policy.h> 52 #include <sys/zone.h> 53 #include <sys/condvar.h> 54 #include <sys/thread.h> 55 #include <sys/sdt.h> 56 57 /* 58 * Administrivia system call. We provide this in two flavors: one for calling 59 * from the system call path (uadmin), and the other for calling from elsewhere 60 * within the kernel (kadmin). Callers must beware that certain uadmin cmd 61 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin. 62 */ 63 64 extern ksema_t fsflush_sema; 65 kmutex_t ualock; 66 kcondvar_t uacond; 67 kthread_t *ua_shutdown_thread = NULL; 68 69 int sys_shutdown = 0; 70 volatile int fastreboot_dryrun = 0; 71 72 /* 73 * Kill all user processes in said zone. A special argument of ALL_ZONES is 74 * passed in when the system as a whole is shutting down. The lack of per-zone 75 * process lists is likely to make the following a performance bottleneck on a 76 * system with many zones. 77 */ 78 void 79 killall(zoneid_t zoneid) 80 { 81 proc_t *p; 82 83 ASSERT(zoneid != GLOBAL_ZONEID); 84 /* 85 * Kill all processes except kernel daemons and ourself. 86 * Make a first pass to stop all processes so they won't 87 * be trying to restart children as we kill them. 88 */ 89 mutex_enter(&pidlock); 90 for (p = practive; p != NULL; p = p->p_next) { 91 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 92 p->p_exec != NULLVP && /* kernel daemons */ 93 p->p_as != &kas && 94 p->p_stat != SZOMB) { 95 mutex_enter(&p->p_lock); 96 p->p_flag |= SNOWAIT; 97 sigtoproc(p, NULL, SIGSTOP); 98 mutex_exit(&p->p_lock); 99 } 100 } 101 p = practive; 102 while (p != NULL) { 103 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 104 p->p_exec != NULLVP && /* kernel daemons */ 105 p->p_as != &kas && 106 p->p_stat != SIDL && 107 p->p_stat != SZOMB) { 108 mutex_enter(&p->p_lock); 109 if (sigismember(&p->p_sig, SIGKILL)) { 110 mutex_exit(&p->p_lock); 111 p = p->p_next; 112 } else { 113 sigtoproc(p, NULL, SIGKILL); 114 mutex_exit(&p->p_lock); 115 (void) cv_reltimedwait(&p->p_srwchan_cv, 116 &pidlock, hz, TR_CLOCK_TICK); 117 p = practive; 118 } 119 } else { 120 p = p->p_next; 121 } 122 } 123 mutex_exit(&pidlock); 124 } 125 126 int 127 kadmin(int cmd, int fcn, void *mdep, cred_t *credp) 128 { 129 int error = 0; 130 char *buf; 131 size_t buflen = 0; 132 boolean_t invoke_cb = B_FALSE; 133 134 /* 135 * We might be called directly by the kernel's fault-handling code, so 136 * we can't assert that the caller is in the global zone. 137 */ 138 139 /* 140 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes 141 * and that we have appropriate privileges for this action. 142 */ 143 switch (cmd) { 144 case A_FTRACE: 145 case A_SHUTDOWN: 146 case A_REBOOT: 147 case A_REMOUNT: 148 case A_FREEZE: 149 case A_DUMP: 150 case A_SDTTEST: 151 case A_CONFIG: 152 if (secpolicy_sys_config(credp, B_FALSE) != 0) 153 return (EPERM); 154 break; 155 156 default: 157 return (EINVAL); 158 } 159 160 /* 161 * Serialize these operations on ualock. If it is held, the 162 * system should shutdown, reboot, or remount shortly, unless there is 163 * an error. We need a cv rather than just a mutex because proper 164 * functioning of A_REBOOT relies on being able to interrupt blocked 165 * userland callers. 166 * 167 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG. 168 * Other commands should never return. 169 */ 170 if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT || 171 cmd == A_CONFIG) { 172 mutex_enter(&ualock); 173 while (ua_shutdown_thread != NULL) { 174 if (cv_wait_sig(&uacond, &ualock) == 0) { 175 /* 176 * If we were interrupted, leave, and handle 177 * the signal (or exit, depending on what 178 * happened) 179 */ 180 mutex_exit(&ualock); 181 return (EINTR); 182 } 183 } 184 ua_shutdown_thread = curthread; 185 mutex_exit(&ualock); 186 } 187 188 switch (cmd) { 189 case A_SHUTDOWN: 190 { 191 proc_t *p = ttoproc(curthread); 192 193 /* 194 * Release (almost) all of our own resources if we are called 195 * from a user context, however if we are calling kadmin() from 196 * a kernel context then we do not release these resources. 197 */ 198 if (p != &p0) { 199 proc_is_exiting(p); 200 if ((error = exitlwps(0)) != 0) { 201 /* 202 * Another thread in this process also called 203 * exitlwps(). 204 */ 205 mutex_enter(&ualock); 206 ua_shutdown_thread = NULL; 207 cv_signal(&uacond); 208 mutex_exit(&ualock); 209 return (error); 210 } 211 mutex_enter(&p->p_lock); 212 p->p_flag |= SNOWAIT; 213 sigfillset(&p->p_ignore); 214 curthread->t_lwp->lwp_cursig = 0; 215 curthread->t_lwp->lwp_extsig = 0; 216 if (p->p_exec) { 217 vnode_t *exec_vp = p->p_exec; 218 p->p_exec = NULLVP; 219 mutex_exit(&p->p_lock); 220 VN_RELE(exec_vp); 221 } else { 222 mutex_exit(&p->p_lock); 223 } 224 225 pollcleanup(); 226 closeall(P_FINFO(curproc)); 227 relvm(); 228 229 } else { 230 /* 231 * Reset t_cred if not set because much of the 232 * filesystem code depends on CRED() being valid. 233 */ 234 if (curthread->t_cred == NULL) 235 curthread->t_cred = kcred; 236 } 237 238 /* indicate shutdown in progress */ 239 sys_shutdown = 1; 240 241 /* 242 * Communcate that init shouldn't be restarted. 243 */ 244 zone_shutdown_global(); 245 246 killall(ALL_ZONES); 247 /* 248 * If we are calling kadmin() from a kernel context then we 249 * do not release these resources. 250 */ 251 if (ttoproc(curthread) != &p0) { 252 VN_RELE(PTOU(curproc)->u_cdir); 253 if (PTOU(curproc)->u_rdir) 254 VN_RELE(PTOU(curproc)->u_rdir); 255 if (PTOU(curproc)->u_cwd) 256 refstr_rele(PTOU(curproc)->u_cwd); 257 258 PTOU(curproc)->u_cdir = rootdir; 259 PTOU(curproc)->u_rdir = NULL; 260 PTOU(curproc)->u_cwd = NULL; 261 } 262 263 /* 264 * Allow the reboot/halt/poweroff code a chance to do 265 * anything it needs to whilst we still have filesystems 266 * mounted, like loading any modules necessary for later 267 * performing the actual poweroff. 268 */ 269 if ((mdep != NULL) && (*(char *)mdep == '/')) { 270 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 271 mdpreboot(cmd, fcn, buf); 272 } else 273 mdpreboot(cmd, fcn, mdep); 274 275 /* 276 * Allow fsflush to finish running and then prevent it 277 * from ever running again so that vfs_unmountall() and 278 * vfs_syncall() can acquire the vfs locks they need. 279 */ 280 sema_p(&fsflush_sema); 281 (void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL); 282 283 vfs_unmountall(); 284 (void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT); 285 vfs_syncall(); 286 287 dump_ereports(); 288 dump_messages(); 289 290 invoke_cb = B_TRUE; 291 292 /* FALLTHROUGH */ 293 } 294 295 case A_REBOOT: 296 if ((mdep != NULL) && (*(char *)mdep == '/')) { 297 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 298 mdboot(cmd, fcn, buf, invoke_cb); 299 } else 300 mdboot(cmd, fcn, mdep, invoke_cb); 301 /* no return expected */ 302 break; 303 304 case A_CONFIG: 305 switch (fcn) { 306 case AD_UPDATE_BOOT_CONFIG: 307 #ifndef __sparc 308 { 309 extern void fastboot_update_config(const char *); 310 311 fastboot_update_config(mdep); 312 } 313 #endif 314 315 break; 316 } 317 /* Let other threads enter the shutdown path now */ 318 mutex_enter(&ualock); 319 ua_shutdown_thread = NULL; 320 cv_signal(&uacond); 321 mutex_exit(&ualock); 322 break; 323 324 case A_REMOUNT: 325 (void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT); 326 /* Let other threads enter the shutdown path now */ 327 mutex_enter(&ualock); 328 ua_shutdown_thread = NULL; 329 cv_signal(&uacond); 330 mutex_exit(&ualock); 331 break; 332 333 case A_FREEZE: 334 { 335 /* 336 * This is the entrypoint for all suspend/resume actions. 337 */ 338 extern int cpr(int, void *); 339 340 if (modload("misc", "cpr") == -1) 341 return (ENOTSUP); 342 /* Let the CPR module decide what to do with mdep */ 343 error = cpr(fcn, mdep); 344 break; 345 } 346 347 case A_FTRACE: 348 { 349 switch (fcn) { 350 case AD_FTRACE_START: 351 (void) FTRACE_START(); 352 break; 353 case AD_FTRACE_STOP: 354 (void) FTRACE_STOP(); 355 break; 356 default: 357 error = EINVAL; 358 } 359 break; 360 } 361 362 case A_DUMP: 363 { 364 if (fcn == AD_NOSYNC) { 365 in_sync = 1; 366 break; 367 } 368 369 panic_bootfcn = fcn; 370 panic_forced = 1; 371 372 if ((mdep != NULL) && (*(char *)mdep == '/')) { 373 panic_bootstr = i_convert_boot_device_name(mdep, 374 NULL, &buflen); 375 } else 376 panic_bootstr = mdep; 377 378 #ifndef __sparc 379 extern void fastboot_update_and_load(int, char *); 380 381 fastboot_update_and_load(fcn, mdep); 382 #endif 383 384 panic("forced crash dump initiated at user request"); 385 /*NOTREACHED*/ 386 } 387 388 case A_SDTTEST: 389 { 390 DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5, 391 int, 6, int, 7); 392 break; 393 } 394 395 default: 396 error = EINVAL; 397 } 398 399 return (error); 400 } 401 402 int 403 uadmin(int cmd, int fcn, uintptr_t mdep) 404 { 405 int error = 0, rv = 0; 406 size_t nbytes = 0; 407 cred_t *credp = CRED(); 408 char *bootargs = NULL; 409 int reset_status = 0; 410 411 if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) { 412 ddi_walk_devs(ddi_root_node(), check_driver_quiesce, 413 &reset_status); 414 if (reset_status != 0) 415 return (EIO); 416 else 417 return (0); 418 } 419 420 /* 421 * The swapctl system call doesn't have its own entry point: it uses 422 * uadmin as a wrapper so we just call it directly from here. 423 */ 424 if (cmd == A_SWAPCTL) { 425 if (get_udatamodel() == DATAMODEL_NATIVE) 426 error = swapctl(fcn, (void *)mdep, &rv); 427 #if defined(_SYSCALL32_IMPL) 428 else 429 error = swapctl32(fcn, (void *)mdep, &rv); 430 #endif /* _SYSCALL32_IMPL */ 431 return (error ? set_errno(error) : rv); 432 } 433 434 /* 435 * Certain subcommands intepret a non-NULL mdep value as a pointer to 436 * a boot string. We pull that in as bootargs, if applicable. 437 */ 438 if (mdep != NULL && 439 (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP || 440 cmd == A_FREEZE || cmd == A_CONFIG)) { 441 bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); 442 if ((error = copyinstr((const char *)mdep, bootargs, 443 BOOTARGS_MAX, &nbytes)) != 0) { 444 kmem_free(bootargs, BOOTARGS_MAX); 445 return (set_errno(error)); 446 } 447 } 448 449 /* 450 * Invoke the appropriate kadmin() routine. 451 */ 452 if (getzoneid() != GLOBAL_ZONEID) 453 error = zone_kadmin(cmd, fcn, bootargs, credp); 454 else 455 error = kadmin(cmd, fcn, bootargs, credp); 456 457 if (bootargs != NULL) 458 kmem_free(bootargs, BOOTARGS_MAX); 459 return (error ? set_errno(error) : 0); 460 } 461