1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/errno.h> 33 #include <sys/vfs.h> 34 #include <sys/vnode.h> 35 #include <sys/swap.h> 36 #include <sys/file.h> 37 #include <sys/proc.h> 38 #include <sys/var.h> 39 #include <sys/uadmin.h> 40 #include <sys/signal.h> 41 #include <sys/time.h> 42 #include <vm/seg_kmem.h> 43 #include <sys/modctl.h> 44 #include <sys/callb.h> 45 #include <sys/dumphdr.h> 46 #include <sys/debug.h> 47 #include <sys/ftrace.h> 48 #include <sys/cmn_err.h> 49 #include <sys/panic.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/policy.h> 53 #include <sys/zone.h> 54 #include <sys/condvar.h> 55 #include <sys/thread.h> 56 #include <sys/sdt.h> 57 58 /* 59 * Administrivia system call. We provide this in two flavors: one for calling 60 * from the system call path (uadmin), and the other for calling from elsewhere 61 * within the kernel (kadmin). Callers must beware that certain uadmin cmd 62 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin. 63 */ 64 65 extern ksema_t fsflush_sema; 66 kmutex_t ualock; 67 kcondvar_t uacond; 68 kthread_t *ua_shutdown_thread = NULL; 69 70 int sys_shutdown = 0; 71 volatile int fastreboot_dryrun = 0; 72 73 /* 74 * Kill all user processes in said zone. A special argument of ALL_ZONES is 75 * passed in when the system as a whole is shutting down. The lack of per-zone 76 * process lists is likely to make the following a performance bottleneck on a 77 * system with many zones. 78 */ 79 void 80 killall(zoneid_t zoneid) 81 { 82 proc_t *p; 83 84 ASSERT(zoneid != GLOBAL_ZONEID); 85 /* 86 * Kill all processes except kernel daemons and ourself. 87 * Make a first pass to stop all processes so they won't 88 * be trying to restart children as we kill them. 89 */ 90 mutex_enter(&pidlock); 91 for (p = practive; p != NULL; p = p->p_next) { 92 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 93 p->p_exec != NULLVP && /* kernel daemons */ 94 p->p_as != &kas && 95 p->p_stat != SZOMB) { 96 mutex_enter(&p->p_lock); 97 p->p_flag |= SNOWAIT; 98 sigtoproc(p, NULL, SIGSTOP); 99 mutex_exit(&p->p_lock); 100 } 101 } 102 p = practive; 103 while (p != NULL) { 104 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 105 p->p_exec != NULLVP && /* kernel daemons */ 106 p->p_as != &kas && 107 p->p_stat != SIDL && 108 p->p_stat != SZOMB) { 109 mutex_enter(&p->p_lock); 110 if (sigismember(&p->p_sig, SIGKILL)) { 111 mutex_exit(&p->p_lock); 112 p = p->p_next; 113 } else { 114 sigtoproc(p, NULL, SIGKILL); 115 mutex_exit(&p->p_lock); 116 (void) cv_timedwait(&p->p_srwchan_cv, &pidlock, 117 lbolt + hz); 118 p = practive; 119 } 120 } else { 121 p = p->p_next; 122 } 123 } 124 mutex_exit(&pidlock); 125 } 126 127 int 128 kadmin(int cmd, int fcn, void *mdep, cred_t *credp) 129 { 130 int error = 0; 131 char *buf; 132 size_t buflen = 0; 133 boolean_t invoke_cb = B_FALSE; 134 135 /* 136 * We might be called directly by the kernel's fault-handling code, so 137 * we can't assert that the caller is in the global zone. 138 */ 139 140 /* 141 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes 142 * and that we have appropriate privileges for this action. 143 */ 144 switch (cmd) { 145 case A_FTRACE: 146 case A_SHUTDOWN: 147 case A_REBOOT: 148 case A_REMOUNT: 149 case A_FREEZE: 150 case A_DUMP: 151 case A_SDTTEST: 152 case A_CONFIG: 153 if (secpolicy_sys_config(credp, B_FALSE) != 0) 154 return (EPERM); 155 break; 156 157 default: 158 return (EINVAL); 159 } 160 161 /* 162 * Serialize these operations on ualock. If it is held, the 163 * system should shutdown, reboot, or remount shortly, unless there is 164 * an error. We need a cv rather than just a mutex because proper 165 * functioning of A_REBOOT relies on being able to interrupt blocked 166 * userland callers. 167 * 168 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG. 169 * Other commands should never return. 170 */ 171 if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT || 172 cmd == A_CONFIG) { 173 mutex_enter(&ualock); 174 while (ua_shutdown_thread != NULL) { 175 if (cv_wait_sig(&uacond, &ualock) == 0) { 176 /* 177 * If we were interrupted, leave, and handle 178 * the signal (or exit, depending on what 179 * happened) 180 */ 181 mutex_exit(&ualock); 182 return (EINTR); 183 } 184 } 185 ua_shutdown_thread = curthread; 186 mutex_exit(&ualock); 187 } 188 189 switch (cmd) { 190 case A_SHUTDOWN: 191 { 192 proc_t *p = ttoproc(curthread); 193 194 /* 195 * Release (almost) all of our own resources if we are called 196 * from a user context, however if we are calling kadmin() from 197 * a kernel context then we do not release these resources. 198 */ 199 if (p != &p0) { 200 proc_is_exiting(p); 201 if ((error = exitlwps(0)) != 0) { 202 /* 203 * Another thread in this process also called 204 * exitlwps(). 205 */ 206 mutex_enter(&ualock); 207 ua_shutdown_thread = NULL; 208 cv_signal(&uacond); 209 mutex_exit(&ualock); 210 return (error); 211 } 212 mutex_enter(&p->p_lock); 213 p->p_flag |= SNOWAIT; 214 sigfillset(&p->p_ignore); 215 curthread->t_lwp->lwp_cursig = 0; 216 curthread->t_lwp->lwp_extsig = 0; 217 if (p->p_exec) { 218 vnode_t *exec_vp = p->p_exec; 219 p->p_exec = NULLVP; 220 mutex_exit(&p->p_lock); 221 VN_RELE(exec_vp); 222 } else { 223 mutex_exit(&p->p_lock); 224 } 225 226 pollcleanup(); 227 closeall(P_FINFO(curproc)); 228 relvm(); 229 230 } else { 231 /* 232 * Reset t_cred if not set because much of the 233 * filesystem code depends on CRED() being valid. 234 */ 235 if (curthread->t_cred == NULL) 236 curthread->t_cred = kcred; 237 } 238 239 /* indicate shutdown in progress */ 240 sys_shutdown = 1; 241 242 /* 243 * Communcate that init shouldn't be restarted. 244 */ 245 zone_shutdown_global(); 246 247 killall(ALL_ZONES); 248 /* 249 * If we are calling kadmin() from a kernel context then we 250 * do not release these resources. 251 */ 252 if (ttoproc(curthread) != &p0) { 253 VN_RELE(PTOU(curproc)->u_cdir); 254 if (PTOU(curproc)->u_rdir) 255 VN_RELE(PTOU(curproc)->u_rdir); 256 if (PTOU(curproc)->u_cwd) 257 refstr_rele(PTOU(curproc)->u_cwd); 258 259 PTOU(curproc)->u_cdir = rootdir; 260 PTOU(curproc)->u_rdir = NULL; 261 PTOU(curproc)->u_cwd = NULL; 262 } 263 264 /* 265 * Allow the reboot/halt/poweroff code a chance to do 266 * anything it needs to whilst we still have filesystems 267 * mounted, like loading any modules necessary for later 268 * performing the actual poweroff. 269 */ 270 if ((mdep != NULL) && (*(char *)mdep == '/')) { 271 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 272 mdpreboot(cmd, fcn, buf); 273 } else 274 mdpreboot(cmd, fcn, mdep); 275 276 /* 277 * Allow fsflush to finish running and then prevent it 278 * from ever running again so that vfs_unmountall() and 279 * vfs_syncall() can acquire the vfs locks they need. 280 */ 281 sema_p(&fsflush_sema); 282 (void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL); 283 284 vfs_unmountall(); 285 (void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT); 286 vfs_syncall(); 287 288 dump_ereports(); 289 dump_messages(); 290 291 invoke_cb = B_TRUE; 292 293 /* FALLTHROUGH */ 294 } 295 296 case A_REBOOT: 297 if ((mdep != NULL) && (*(char *)mdep == '/')) { 298 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 299 mdboot(cmd, fcn, buf, invoke_cb); 300 } else 301 mdboot(cmd, fcn, mdep, invoke_cb); 302 /* no return expected */ 303 break; 304 305 case A_CONFIG: 306 switch (fcn) { 307 case AD_UPDATE_BOOT_CONFIG: 308 #ifndef __sparc 309 { 310 extern int fastreboot_capable; 311 extern void fastboot_update_config(const char *); 312 313 if (fastreboot_capable) 314 fastboot_update_config(mdep); 315 } 316 #endif 317 318 break; 319 } 320 /* Let other threads enter the shutdown path now */ 321 mutex_enter(&ualock); 322 ua_shutdown_thread = NULL; 323 cv_signal(&uacond); 324 mutex_exit(&ualock); 325 break; 326 327 case A_REMOUNT: 328 (void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT); 329 /* Let other threads enter the shutdown path now */ 330 mutex_enter(&ualock); 331 ua_shutdown_thread = NULL; 332 cv_signal(&uacond); 333 mutex_exit(&ualock); 334 break; 335 336 case A_FREEZE: 337 { 338 /* 339 * This is the entrypoint for all suspend/resume actions. 340 */ 341 extern int cpr(int, void *); 342 343 if (modload("misc", "cpr") == -1) 344 return (ENOTSUP); 345 /* Let the CPR module decide what to do with mdep */ 346 error = cpr(fcn, mdep); 347 break; 348 } 349 350 case A_FTRACE: 351 { 352 switch (fcn) { 353 case AD_FTRACE_START: 354 (void) FTRACE_START(); 355 break; 356 case AD_FTRACE_STOP: 357 (void) FTRACE_STOP(); 358 break; 359 default: 360 error = EINVAL; 361 } 362 break; 363 } 364 365 case A_DUMP: 366 { 367 if (fcn == AD_NOSYNC) { 368 in_sync = 1; 369 break; 370 } 371 372 panic_bootfcn = fcn; 373 panic_forced = 1; 374 375 if ((mdep != NULL) && (*(char *)mdep == '/')) { 376 panic_bootstr = i_convert_boot_device_name(mdep, 377 NULL, &buflen); 378 } else 379 panic_bootstr = mdep; 380 381 #ifndef __sparc 382 extern int fastreboot_onpanic; 383 if (fcn != AD_FASTREBOOT) { 384 extern void fastboot_update_config(const char *); 385 /* 386 * If user has explicitly requested reboot to prom, 387 * or uadmin(1M) was invoked with other functions, 388 * don't try to fast reboot after dumping. 389 */ 390 fastreboot_onpanic = 0; 391 fastboot_update_config((char *)&fastreboot_onpanic); 392 } 393 394 if (fastreboot_onpanic) { 395 extern void fastboot_load_kernel(char *); 396 fastboot_load_kernel(mdep); 397 } 398 #endif 399 400 panic("forced crash dump initiated at user request"); 401 /*NOTREACHED*/ 402 } 403 404 case A_SDTTEST: 405 { 406 DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5, 407 int, 6, int, 7); 408 break; 409 } 410 411 default: 412 error = EINVAL; 413 } 414 415 return (error); 416 } 417 418 int 419 uadmin(int cmd, int fcn, uintptr_t mdep) 420 { 421 int error = 0, rv = 0; 422 size_t nbytes = 0; 423 cred_t *credp = CRED(); 424 char *bootargs = NULL; 425 int reset_status = 0; 426 427 if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) { 428 ddi_walk_devs(ddi_root_node(), check_driver_quiesce, 429 &reset_status); 430 if (reset_status != 0) 431 return (EIO); 432 else 433 return (0); 434 } 435 436 /* 437 * The swapctl system call doesn't have its own entry point: it uses 438 * uadmin as a wrapper so we just call it directly from here. 439 */ 440 if (cmd == A_SWAPCTL) { 441 if (get_udatamodel() == DATAMODEL_NATIVE) 442 error = swapctl(fcn, (void *)mdep, &rv); 443 #if defined(_SYSCALL32_IMPL) 444 else 445 error = swapctl32(fcn, (void *)mdep, &rv); 446 #endif /* _SYSCALL32_IMPL */ 447 return (error ? set_errno(error) : rv); 448 } 449 450 /* 451 * Certain subcommands intepret a non-NULL mdep value as a pointer to 452 * a boot string. We pull that in as bootargs, if applicable. 453 */ 454 if (mdep != NULL && 455 (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP || 456 cmd == A_FREEZE || cmd == A_CONFIG)) { 457 bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); 458 if ((error = copyinstr((const char *)mdep, bootargs, 459 BOOTARGS_MAX, &nbytes)) != 0) { 460 kmem_free(bootargs, BOOTARGS_MAX); 461 return (set_errno(error)); 462 } 463 } 464 465 /* 466 * Invoke the appropriate kadmin() routine. 467 */ 468 if (getzoneid() != GLOBAL_ZONEID) 469 error = zone_kadmin(cmd, fcn, bootargs, credp); 470 else 471 error = kadmin(cmd, fcn, bootargs, credp); 472 473 if (bootargs != NULL) 474 kmem_free(bootargs, BOOTARGS_MAX); 475 return (error ? set_errno(error) : 0); 476 } 477