1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2013 Joyent, Inc. All rights reserved. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/errno.h> 33 #include <sys/vfs.h> 34 #include <sys/vnode.h> 35 #include <sys/swap.h> 36 #include <sys/file.h> 37 #include <sys/proc.h> 38 #include <sys/var.h> 39 #include <sys/uadmin.h> 40 #include <sys/signal.h> 41 #include <sys/time.h> 42 #include <vm/seg_kmem.h> 43 #include <sys/modctl.h> 44 #include <sys/callb.h> 45 #include <sys/dumphdr.h> 46 #include <sys/debug.h> 47 #include <sys/ftrace.h> 48 #include <sys/cmn_err.h> 49 #include <sys/panic.h> 50 #include <sys/ddi.h> 51 #include <sys/ddi_periodic.h> 52 #include <sys/sunddi.h> 53 #include <sys/policy.h> 54 #include <sys/zone.h> 55 #include <sys/condvar.h> 56 #include <sys/thread.h> 57 #include <sys/sdt.h> 58 59 /* 60 * Administrivia system call. We provide this in two flavors: one for calling 61 * from the system call path (uadmin), and the other for calling from elsewhere 62 * within the kernel (kadmin). Callers must beware that certain uadmin cmd 63 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin. 64 */ 65 66 extern ksema_t fsflush_sema; 67 kmutex_t ualock; 68 kcondvar_t uacond; 69 kthread_t *ua_shutdown_thread = NULL; 70 71 int sys_shutdown = 0; 72 volatile int fastreboot_dryrun = 0; 73 74 /* 75 * Kill all user processes in said zone. A special argument of ALL_ZONES is 76 * passed in when the system as a whole is shutting down. The lack of per-zone 77 * process lists is likely to make the following a performance bottleneck on a 78 * system with many zones. 79 */ 80 void 81 killall(zoneid_t zoneid) 82 { 83 proc_t *p; 84 85 ASSERT(zoneid != GLOBAL_ZONEID); 86 /* 87 * Kill all processes except kernel daemons and ourself. 88 * Make a first pass to stop all processes so they won't 89 * be trying to restart children as we kill them. 90 */ 91 mutex_enter(&pidlock); 92 for (p = practive; p != NULL; p = p->p_next) { 93 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 94 p->p_exec != NULLVP && /* kernel daemons */ 95 p->p_as != &kas && 96 p->p_stat != SZOMB) { 97 mutex_enter(&p->p_lock); 98 p->p_flag |= SNOWAIT; 99 sigtoproc(p, NULL, SIGSTOP); 100 mutex_exit(&p->p_lock); 101 } 102 } 103 p = practive; 104 while (p != NULL) { 105 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 106 p->p_exec != NULLVP && /* kernel daemons */ 107 p->p_as != &kas && 108 p->p_stat != SIDL && 109 p->p_stat != SZOMB) { 110 mutex_enter(&p->p_lock); 111 if (sigismember(&p->p_sig, SIGKILL)) { 112 mutex_exit(&p->p_lock); 113 p = p->p_next; 114 } else { 115 sigtoproc(p, NULL, SIGKILL); 116 mutex_exit(&p->p_lock); 117 (void) cv_reltimedwait(&p->p_srwchan_cv, 118 &pidlock, hz, TR_CLOCK_TICK); 119 p = practive; 120 } 121 } else { 122 p = p->p_next; 123 } 124 } 125 mutex_exit(&pidlock); 126 } 127 128 int 129 kadmin(int cmd, int fcn, void *mdep, cred_t *credp) 130 { 131 int error = 0; 132 char *buf; 133 size_t buflen = 0; 134 boolean_t invoke_cb = B_FALSE; 135 136 /* 137 * We might be called directly by the kernel's fault-handling code, so 138 * we can't assert that the caller is in the global zone. 139 */ 140 141 /* 142 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes 143 * and that we have appropriate privileges for this action. 144 */ 145 switch (cmd) { 146 case A_FTRACE: 147 case A_SHUTDOWN: 148 case A_REBOOT: 149 case A_REMOUNT: 150 case A_FREEZE: 151 case A_DUMP: 152 case A_SDTTEST: 153 case A_CONFIG: 154 if (secpolicy_sys_config(credp, B_FALSE) != 0) 155 return (EPERM); 156 break; 157 158 default: 159 return (EINVAL); 160 } 161 162 /* 163 * Serialize these operations on ualock. If it is held, the 164 * system should shutdown, reboot, or remount shortly, unless there is 165 * an error. We need a cv rather than just a mutex because proper 166 * functioning of A_REBOOT relies on being able to interrupt blocked 167 * userland callers. 168 * 169 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG. 170 * Other commands should never return. 171 */ 172 if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT || 173 cmd == A_CONFIG) { 174 mutex_enter(&ualock); 175 while (ua_shutdown_thread != NULL) { 176 if (cv_wait_sig(&uacond, &ualock) == 0) { 177 /* 178 * If we were interrupted, leave, and handle 179 * the signal (or exit, depending on what 180 * happened) 181 */ 182 mutex_exit(&ualock); 183 return (EINTR); 184 } 185 } 186 ua_shutdown_thread = curthread; 187 mutex_exit(&ualock); 188 } 189 190 switch (cmd) { 191 case A_SHUTDOWN: 192 { 193 proc_t *p = ttoproc(curthread); 194 195 /* 196 * Release (almost) all of our own resources if we are called 197 * from a user context, however if we are calling kadmin() from 198 * a kernel context then we do not release these resources. 199 */ 200 if (p != &p0) { 201 proc_is_exiting(p); 202 if ((error = exitlwps(0)) != 0) { 203 /* 204 * Another thread in this process also called 205 * exitlwps(). 206 */ 207 mutex_enter(&ualock); 208 ua_shutdown_thread = NULL; 209 cv_signal(&uacond); 210 mutex_exit(&ualock); 211 return (error); 212 } 213 mutex_enter(&p->p_lock); 214 p->p_flag |= SNOWAIT; 215 sigfillset(&p->p_ignore); 216 curthread->t_lwp->lwp_cursig = 0; 217 curthread->t_lwp->lwp_extsig = 0; 218 if (p->p_exec) { 219 vnode_t *exec_vp = p->p_exec; 220 p->p_exec = NULLVP; 221 mutex_exit(&p->p_lock); 222 VN_RELE(exec_vp); 223 } else { 224 mutex_exit(&p->p_lock); 225 } 226 227 pollcleanup(); 228 closeall(P_FINFO(curproc)); 229 relvm(); 230 231 } else { 232 /* 233 * Reset t_cred if not set because much of the 234 * filesystem code depends on CRED() being valid. 235 */ 236 if (curthread->t_cred == NULL) 237 curthread->t_cred = kcred; 238 } 239 240 /* indicate shutdown in progress */ 241 sys_shutdown = 1; 242 243 /* 244 * Communcate that init shouldn't be restarted. 245 */ 246 zone_shutdown_global(); 247 248 killall(ALL_ZONES); 249 /* 250 * If we are calling kadmin() from a kernel context then we 251 * do not release these resources. 252 */ 253 if (ttoproc(curthread) != &p0) { 254 VN_RELE(PTOU(curproc)->u_cdir); 255 if (PTOU(curproc)->u_rdir) 256 VN_RELE(PTOU(curproc)->u_rdir); 257 if (PTOU(curproc)->u_cwd) 258 refstr_rele(PTOU(curproc)->u_cwd); 259 260 PTOU(curproc)->u_cdir = rootdir; 261 PTOU(curproc)->u_rdir = NULL; 262 PTOU(curproc)->u_cwd = NULL; 263 } 264 265 /* 266 * Allow the reboot/halt/poweroff code a chance to do 267 * anything it needs to whilst we still have filesystems 268 * mounted, like loading any modules necessary for later 269 * performing the actual poweroff. 270 */ 271 if ((mdep != NULL) && (*(char *)mdep == '/')) { 272 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 273 mdpreboot(cmd, fcn, buf); 274 } else 275 mdpreboot(cmd, fcn, mdep); 276 277 /* 278 * Allow fsflush to finish running and then prevent it 279 * from ever running again so that vfs_unmountall() and 280 * vfs_syncall() can acquire the vfs locks they need. 281 */ 282 sema_p(&fsflush_sema); 283 (void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, 0); 284 285 vfs_unmountall(); 286 (void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT); 287 vfs_syncall(); 288 289 /* 290 * Check for (and unregister) any DDI periodic handlers that 291 * still exist, as they most likely constitute resource leaks: 292 */ 293 ddi_periodic_fini(); 294 295 dump_ereports(); 296 dump_messages(); 297 298 invoke_cb = B_TRUE; 299 } 300 /* FALLTHROUGH */ 301 302 case A_REBOOT: 303 if ((mdep != NULL) && (*(char *)mdep == '/')) { 304 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 305 mdboot(cmd, fcn, buf, invoke_cb); 306 } else 307 mdboot(cmd, fcn, mdep, invoke_cb); 308 /* no return expected */ 309 break; 310 311 case A_CONFIG: 312 switch (fcn) { 313 case AD_UPDATE_BOOT_CONFIG: 314 #ifndef __sparc 315 { 316 extern void fastboot_update_config(const char *); 317 318 fastboot_update_config(mdep); 319 } 320 #endif 321 322 break; 323 } 324 /* Let other threads enter the shutdown path now */ 325 mutex_enter(&ualock); 326 ua_shutdown_thread = NULL; 327 cv_signal(&uacond); 328 mutex_exit(&ualock); 329 break; 330 331 case A_REMOUNT: 332 (void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT); 333 /* Let other threads enter the shutdown path now */ 334 mutex_enter(&ualock); 335 ua_shutdown_thread = NULL; 336 cv_signal(&uacond); 337 mutex_exit(&ualock); 338 break; 339 340 case A_FREEZE: 341 { 342 /* 343 * This is the entrypoint for all suspend/resume actions. 344 */ 345 extern int cpr(int, void *); 346 347 if (modload("misc", "cpr") == -1) 348 return (ENOTSUP); 349 /* Let the CPR module decide what to do with mdep */ 350 error = cpr(fcn, mdep); 351 break; 352 } 353 354 case A_FTRACE: 355 { 356 switch (fcn) { 357 case AD_FTRACE_START: 358 (void) FTRACE_START(); 359 break; 360 case AD_FTRACE_STOP: 361 (void) FTRACE_STOP(); 362 break; 363 default: 364 error = EINVAL; 365 } 366 break; 367 } 368 369 case A_DUMP: 370 { 371 if (fcn == AD_NOSYNC) { 372 in_sync = 1; 373 break; 374 } 375 376 panic_bootfcn = fcn; 377 panic_forced = 1; 378 379 if ((mdep != NULL) && (*(char *)mdep == '/')) { 380 panic_bootstr = i_convert_boot_device_name(mdep, 381 NULL, &buflen); 382 } else 383 panic_bootstr = mdep; 384 385 #ifndef __sparc 386 extern void fastboot_update_and_load(int, char *); 387 388 fastboot_update_and_load(fcn, mdep); 389 #endif 390 391 panic("forced crash dump initiated at user request"); 392 /*NOTREACHED*/ 393 } 394 395 case A_SDTTEST: 396 { 397 DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5, 398 int, 6, int, 7); 399 break; 400 } 401 402 default: 403 error = EINVAL; 404 } 405 406 return (error); 407 } 408 409 int 410 uadmin(int cmd, int fcn, uintptr_t mdep) 411 { 412 int error = 0, rv = 0; 413 size_t nbytes = 0; 414 cred_t *credp = CRED(); 415 char *bootargs = NULL; 416 int reset_status = 0; 417 418 if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) { 419 ddi_walk_devs(ddi_root_node(), check_driver_quiesce, 420 &reset_status); 421 if (reset_status != 0) 422 return (EIO); 423 else 424 return (0); 425 } 426 427 /* 428 * The swapctl system call doesn't have its own entry point: it uses 429 * uadmin as a wrapper so we just call it directly from here. 430 */ 431 if (cmd == A_SWAPCTL) { 432 if (get_udatamodel() == DATAMODEL_NATIVE) 433 error = swapctl(fcn, (void *)mdep, &rv); 434 #if defined(_SYSCALL32_IMPL) 435 else 436 error = swapctl32(fcn, (void *)mdep, &rv); 437 #endif /* _SYSCALL32_IMPL */ 438 return (error ? set_errno(error) : rv); 439 } 440 441 /* 442 * Certain subcommands intepret a non-NULL mdep value as a pointer to 443 * a boot string. We pull that in as bootargs, if applicable. 444 */ 445 if (mdep != (uintptr_t)NULL && 446 (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP || 447 cmd == A_FREEZE || cmd == A_CONFIG)) { 448 bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); 449 if ((error = copyinstr((const char *)mdep, bootargs, 450 BOOTARGS_MAX, &nbytes)) != 0) { 451 kmem_free(bootargs, BOOTARGS_MAX); 452 return (set_errno(error)); 453 } 454 } 455 456 /* 457 * Invoke the appropriate kadmin() routine. 458 */ 459 if (getzoneid() != GLOBAL_ZONEID) 460 error = zone_kadmin(cmd, fcn, bootargs, credp); 461 else 462 error = kadmin(cmd, fcn, bootargs, credp); 463 464 if (bootargs != NULL) 465 kmem_free(bootargs, BOOTARGS_MAX); 466 return (error ? set_errno(error) : 0); 467 } 468