1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/types.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/errno.h> 34 #include <sys/vfs.h> 35 #include <sys/vnode.h> 36 #include <sys/swap.h> 37 #include <sys/file.h> 38 #include <sys/proc.h> 39 #include <sys/var.h> 40 #include <sys/uadmin.h> 41 #include <sys/signal.h> 42 #include <sys/time.h> 43 #include <vm/seg_kmem.h> 44 #include <sys/modctl.h> 45 #include <sys/callb.h> 46 #include <sys/dumphdr.h> 47 #include <sys/debug.h> 48 #include <sys/ftrace.h> 49 #include <sys/cmn_err.h> 50 #include <sys/panic.h> 51 #include <sys/ddi.h> 52 #include <sys/sunddi.h> 53 #include <sys/policy.h> 54 #include <sys/zone.h> 55 56 /* 57 * Administrivia system call. We provide this in two flavors: one for calling 58 * from the system call path (uadmin), and the other for calling from elsewhere 59 * within the kernel (kadmin). Callers must beware that certain uadmin cmd 60 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin. 61 */ 62 63 extern ksema_t fsflush_sema; 64 kmutex_t ualock; 65 66 int sys_shutdown = 0; 67 68 /* 69 * Kill all user processes in said zone. A special argument of ALL_ZONES is 70 * passed in when the system as a whole is shutting down. The lack of per-zone 71 * process lists is likely to make the following a performance bottleneck on a 72 * system with many zones. 73 */ 74 void 75 killall(zoneid_t zoneid) 76 { 77 proc_t *p; 78 79 ASSERT(zoneid != GLOBAL_ZONEID); 80 /* 81 * Kill all processes except kernel daemons and ourself. 82 * Make a first pass to stop all processes so they won't 83 * be trying to restart children as we kill them. 84 */ 85 mutex_enter(&pidlock); 86 for (p = practive; p != NULL; p = p->p_next) { 87 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 88 p->p_exec != NULLVP && /* kernel daemons */ 89 p->p_as != &kas && 90 p->p_stat != SZOMB) { 91 mutex_enter(&p->p_lock); 92 p->p_flag |= SNOWAIT; 93 sigtoproc(p, NULL, SIGSTOP); 94 mutex_exit(&p->p_lock); 95 } 96 } 97 p = practive; 98 while (p != NULL) { 99 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) && 100 p->p_exec != NULLVP && /* kernel daemons */ 101 p->p_as != &kas && 102 p->p_stat != SIDL && 103 p->p_stat != SZOMB) { 104 mutex_enter(&p->p_lock); 105 if (sigismember(&p->p_sig, SIGKILL)) { 106 mutex_exit(&p->p_lock); 107 p = p->p_next; 108 } else { 109 sigtoproc(p, NULL, SIGKILL); 110 mutex_exit(&p->p_lock); 111 (void) cv_timedwait(&p->p_srwchan_cv, &pidlock, 112 lbolt + hz); 113 p = practive; 114 } 115 } else { 116 p = p->p_next; 117 } 118 } 119 mutex_exit(&pidlock); 120 } 121 122 int 123 kadmin(int cmd, int fcn, void *mdep, cred_t *credp) 124 { 125 int error = 0; 126 int locked = 0; 127 char *buf; 128 size_t buflen = 0; 129 boolean_t invoke_cb = B_FALSE; 130 131 /* 132 * We might be called directly by the kernel's fault-handling code, so 133 * we can't assert that the caller is in the global zone. 134 */ 135 136 /* 137 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes 138 * and that we have appropriate privileges for this action. 139 */ 140 switch (cmd) { 141 case A_FTRACE: 142 case A_SHUTDOWN: 143 case A_REBOOT: 144 case A_REMOUNT: 145 case A_FREEZE: 146 case A_DUMP: 147 if (secpolicy_sys_config(credp, B_FALSE) != 0) 148 return (EPERM); 149 break; 150 151 default: 152 return (EINVAL); 153 } 154 155 /* 156 * Serialize these operations on ualock. If it is held, just return 157 * as if successful since the system will soon reset or remount. 158 */ 159 if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT) { 160 if (!mutex_tryenter(&ualock)) 161 return (0); 162 locked = 1; 163 } 164 165 switch (cmd) { 166 case A_SHUTDOWN: 167 { 168 proc_t *p = ttoproc(curthread); 169 170 /* 171 * Release (almost) all of our own resources if we are called 172 * from a user context, however if we are calling kadmin() from 173 * a kernel context then we do not release these resources. 174 */ 175 if (p != &p0) { 176 proc_is_exiting(p); 177 if ((error = exitlwps(0)) != 0) { 178 ASSERT(locked); 179 mutex_exit(&ualock); 180 return (error); 181 } 182 mutex_enter(&p->p_lock); 183 p->p_flag |= SNOWAIT; 184 sigfillset(&p->p_ignore); 185 curthread->t_lwp->lwp_cursig = 0; 186 curthread->t_lwp->lwp_extsig = 0; 187 if (p->p_exec) { 188 vnode_t *exec_vp = p->p_exec; 189 p->p_exec = NULLVP; 190 mutex_exit(&p->p_lock); 191 VN_RELE(exec_vp); 192 } else { 193 mutex_exit(&p->p_lock); 194 } 195 196 pollcleanup(); 197 closeall(P_FINFO(curproc)); 198 relvm(); 199 200 } else { 201 /* 202 * Reset t_cred if not set because much of the 203 * filesystem code depends on CRED() being valid. 204 */ 205 if (curthread->t_cred == NULL) 206 curthread->t_cred = kcred; 207 } 208 209 /* indicate shutdown in progress */ 210 sys_shutdown = 1; 211 212 /* 213 * Communcate that init shouldn't be restarted. 214 */ 215 zone_shutdown_global(); 216 217 killall(ALL_ZONES); 218 /* 219 * If we are calling kadmin() from a kernel context then we 220 * do not release these resources. 221 */ 222 if (ttoproc(curthread) != &p0) { 223 VN_RELE(PTOU(curproc)->u_cdir); 224 if (PTOU(curproc)->u_rdir) 225 VN_RELE(PTOU(curproc)->u_rdir); 226 if (PTOU(curproc)->u_cwd) 227 refstr_rele(PTOU(curproc)->u_cwd); 228 229 PTOU(curproc)->u_cdir = rootdir; 230 PTOU(curproc)->u_rdir = NULL; 231 PTOU(curproc)->u_cwd = NULL; 232 } 233 234 /* 235 * Allow the reboot/halt/poweroff code a chance to do 236 * anything it needs to whilst we still have filesystems 237 * mounted, like loading any modules necessary for later 238 * performing the actual poweroff. 239 */ 240 if ((mdep != NULL) && (*(char *)mdep == '/')) { 241 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 242 mdpreboot(cmd, fcn, buf); 243 } else 244 mdpreboot(cmd, fcn, mdep); 245 246 /* 247 * Allow fsflush to finish running and then prevent it 248 * from ever running again so that vfs_unmountall() and 249 * vfs_syncall() can acquire the vfs locks they need. 250 */ 251 sema_p(&fsflush_sema); 252 (void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL); 253 254 vfs_unmountall(); 255 (void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT); 256 vfs_syncall(); 257 258 dump_ereports(); 259 dump_messages(); 260 261 invoke_cb = B_TRUE; 262 263 /* FALLTHROUGH */ 264 } 265 266 case A_REBOOT: 267 if ((mdep != NULL) && (*(char *)mdep == '/')) { 268 buf = i_convert_boot_device_name(mdep, NULL, &buflen); 269 mdboot(cmd, fcn, buf, invoke_cb); 270 } else 271 mdboot(cmd, fcn, mdep, invoke_cb); 272 /* no return expected */ 273 break; 274 275 case A_REMOUNT: 276 (void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT); 277 break; 278 279 case A_FREEZE: 280 { 281 /* XXX: declare in some header file */ 282 extern int cpr(int); 283 284 if (modload("misc", "cpr") == -1) 285 return (ENOTSUP); 286 error = cpr(fcn); 287 break; 288 } 289 290 case A_FTRACE: 291 { 292 switch (fcn) { 293 case AD_FTRACE_START: 294 (void) FTRACE_START(); 295 break; 296 case AD_FTRACE_STOP: 297 (void) FTRACE_STOP(); 298 break; 299 default: 300 error = EINVAL; 301 } 302 break; 303 } 304 305 case A_DUMP: 306 { 307 if (fcn == AD_NOSYNC) { 308 in_sync = 1; 309 break; 310 } 311 312 panic_bootfcn = fcn; 313 panic_forced = 1; 314 315 if ((mdep != NULL) && (*(char *)mdep == '/')) { 316 panic_bootstr = i_convert_boot_device_name(mdep, 317 NULL, &buflen); 318 } else 319 panic_bootstr = mdep; 320 321 panic("forced crash dump initiated at user request"); 322 /*NOTREACHED*/ 323 } 324 325 default: 326 error = EINVAL; 327 } 328 329 if (locked) 330 mutex_exit(&ualock); 331 332 return (error); 333 } 334 335 int 336 uadmin(int cmd, int fcn, uintptr_t mdep) 337 { 338 int error = 0, rv = 0; 339 size_t nbytes = 0; 340 cred_t *credp = CRED(); 341 char *bootargs = NULL; 342 343 /* 344 * The swapctl system call doesn't have its own entry point: it uses 345 * uadmin as a wrapper so we just call it directly from here. 346 */ 347 if (cmd == A_SWAPCTL) { 348 if (get_udatamodel() == DATAMODEL_NATIVE) 349 error = swapctl(fcn, (void *)mdep, &rv); 350 #if defined(_SYSCALL32_IMPL) 351 else 352 error = swapctl32(fcn, (void *)mdep, &rv); 353 #endif /* _SYSCALL32_IMPL */ 354 return (error ? set_errno(error) : rv); 355 } 356 357 /* 358 * Certain subcommands intepret a non-NULL mdep value as a pointer to 359 * a boot string. We pull that in as bootargs, if applicable. 360 */ 361 if (mdep != NULL && 362 (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP)) { 363 bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP); 364 if ((error = copyinstr((const char *)mdep, bootargs, 365 BOOTARGS_MAX, &nbytes)) != 0) { 366 kmem_free(bootargs, BOOTARGS_MAX); 367 return (set_errno(error)); 368 } 369 } 370 371 /* 372 * Invoke the appropriate kadmin() routine. 373 */ 374 if (getzoneid() != GLOBAL_ZONEID) 375 error = zone_kadmin(cmd, fcn, bootargs, credp); 376 else 377 error = kadmin(cmd, fcn, bootargs, credp); 378 379 if (bootargs != NULL) 380 kmem_free(bootargs, BOOTARGS_MAX); 381 return (error ? set_errno(error) : 0); 382 } 383