1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2013 Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/errno.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/swap.h>
36 #include <sys/file.h>
37 #include <sys/proc.h>
38 #include <sys/var.h>
39 #include <sys/uadmin.h>
40 #include <sys/signal.h>
41 #include <sys/time.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/modctl.h>
44 #include <sys/callb.h>
45 #include <sys/dumphdr.h>
46 #include <sys/debug.h>
47 #include <sys/ftrace.h>
48 #include <sys/cmn_err.h>
49 #include <sys/panic.h>
50 #include <sys/ddi.h>
51 #include <sys/ddi_periodic.h>
52 #include <sys/sunddi.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 #include <sys/condvar.h>
56 #include <sys/thread.h>
57 #include <sys/sdt.h>
58
59 /*
60 * Administrivia system call. We provide this in two flavors: one for calling
61 * from the system call path (uadmin), and the other for calling from elsewhere
62 * within the kernel (kadmin). Callers must beware that certain uadmin cmd
63 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
64 */
65
66 extern ksema_t fsflush_sema;
67 kmutex_t ualock;
68 kcondvar_t uacond;
69 kthread_t *ua_shutdown_thread = NULL;
70
71 int sys_shutdown = 0;
72 volatile int fastreboot_dryrun = 0;
73
74 /*
75 * Kill all user processes in said zone. A special argument of ALL_ZONES is
76 * passed in when the system as a whole is shutting down. The lack of per-zone
77 * process lists is likely to make the following a performance bottleneck on a
78 * system with many zones.
79 */
80 void
killall(zoneid_t zoneid)81 killall(zoneid_t zoneid)
82 {
83 proc_t *p;
84
85 ASSERT(zoneid != GLOBAL_ZONEID);
86 /*
87 * Kill all processes except kernel daemons and ourself.
88 * Make a first pass to stop all processes so they won't
89 * be trying to restart children as we kill them.
90 */
91 mutex_enter(&pidlock);
92 for (p = practive; p != NULL; p = p->p_next) {
93 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
94 p->p_exec != NULLVP && /* kernel daemons */
95 p->p_as != &kas &&
96 p->p_stat != SZOMB) {
97 mutex_enter(&p->p_lock);
98 p->p_flag |= SNOWAIT;
99 sigtoproc(p, NULL, SIGSTOP);
100 mutex_exit(&p->p_lock);
101 }
102 }
103 p = practive;
104 while (p != NULL) {
105 if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
106 p->p_exec != NULLVP && /* kernel daemons */
107 p->p_as != &kas &&
108 p->p_stat != SIDL &&
109 p->p_stat != SZOMB) {
110 mutex_enter(&p->p_lock);
111 if (sigismember(&p->p_sig, SIGKILL)) {
112 mutex_exit(&p->p_lock);
113 p = p->p_next;
114 } else {
115 sigtoproc(p, NULL, SIGKILL);
116 mutex_exit(&p->p_lock);
117 (void) cv_reltimedwait(&p->p_srwchan_cv,
118 &pidlock, hz, TR_CLOCK_TICK);
119 p = practive;
120 }
121 } else {
122 p = p->p_next;
123 }
124 }
125 mutex_exit(&pidlock);
126 }
127
128 int
kadmin(int cmd,int fcn,void * mdep,cred_t * credp)129 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
130 {
131 int error = 0;
132 char *buf;
133 size_t buflen = 0;
134 boolean_t invoke_cb = B_FALSE;
135
136 /*
137 * We might be called directly by the kernel's fault-handling code, so
138 * we can't assert that the caller is in the global zone.
139 */
140
141 /*
142 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
143 * and that we have appropriate privileges for this action.
144 */
145 switch (cmd) {
146 case A_FTRACE:
147 case A_SHUTDOWN:
148 case A_REBOOT:
149 case A_REMOUNT:
150 case A_FREEZE:
151 case A_DUMP:
152 case A_SDTTEST:
153 case A_CONFIG:
154 if (secpolicy_sys_config(credp, B_FALSE) != 0)
155 return (EPERM);
156 break;
157
158 default:
159 return (EINVAL);
160 }
161
162 /*
163 * Serialize these operations on ualock. If it is held, the
164 * system should shutdown, reboot, or remount shortly, unless there is
165 * an error. We need a cv rather than just a mutex because proper
166 * functioning of A_REBOOT relies on being able to interrupt blocked
167 * userland callers.
168 *
169 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
170 * Other commands should never return.
171 */
172 if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
173 cmd == A_CONFIG) {
174 mutex_enter(&ualock);
175 while (ua_shutdown_thread != NULL) {
176 if (cv_wait_sig(&uacond, &ualock) == 0) {
177 /*
178 * If we were interrupted, leave, and handle
179 * the signal (or exit, depending on what
180 * happened)
181 */
182 mutex_exit(&ualock);
183 return (EINTR);
184 }
185 }
186 ua_shutdown_thread = curthread;
187 mutex_exit(&ualock);
188 }
189
190 switch (cmd) {
191 case A_SHUTDOWN:
192 {
193 proc_t *p = ttoproc(curthread);
194
195 /*
196 * Release (almost) all of our own resources if we are called
197 * from a user context, however if we are calling kadmin() from
198 * a kernel context then we do not release these resources.
199 */
200 if (p != &p0) {
201 proc_is_exiting(p);
202 if ((error = exitlwps(0)) != 0) {
203 /*
204 * Another thread in this process also called
205 * exitlwps().
206 */
207 mutex_enter(&ualock);
208 ua_shutdown_thread = NULL;
209 cv_signal(&uacond);
210 mutex_exit(&ualock);
211 return (error);
212 }
213 mutex_enter(&p->p_lock);
214 p->p_flag |= SNOWAIT;
215 sigfillset(&p->p_ignore);
216 curthread->t_lwp->lwp_cursig = 0;
217 curthread->t_lwp->lwp_extsig = 0;
218 if (p->p_exec) {
219 vnode_t *exec_vp = p->p_exec;
220 p->p_exec = NULLVP;
221 mutex_exit(&p->p_lock);
222 VN_RELE(exec_vp);
223 } else {
224 mutex_exit(&p->p_lock);
225 }
226
227 pollcleanup();
228 closeall(P_FINFO(curproc));
229 relvm();
230
231 } else {
232 /*
233 * Reset t_cred if not set because much of the
234 * filesystem code depends on CRED() being valid.
235 */
236 if (curthread->t_cred == NULL)
237 curthread->t_cred = kcred;
238 }
239
240 /* indicate shutdown in progress */
241 sys_shutdown = 1;
242
243 /*
244 * Communcate that init shouldn't be restarted.
245 */
246 zone_shutdown_global();
247
248 killall(ALL_ZONES);
249 /*
250 * If we are calling kadmin() from a kernel context then we
251 * do not release these resources.
252 */
253 if (ttoproc(curthread) != &p0) {
254 VN_RELE(PTOU(curproc)->u_cdir);
255 if (PTOU(curproc)->u_rdir)
256 VN_RELE(PTOU(curproc)->u_rdir);
257 if (PTOU(curproc)->u_cwd)
258 refstr_rele(PTOU(curproc)->u_cwd);
259
260 PTOU(curproc)->u_cdir = rootdir;
261 PTOU(curproc)->u_rdir = NULL;
262 PTOU(curproc)->u_cwd = NULL;
263 }
264
265 /*
266 * Allow the reboot/halt/poweroff code a chance to do
267 * anything it needs to whilst we still have filesystems
268 * mounted, like loading any modules necessary for later
269 * performing the actual poweroff.
270 */
271 if ((mdep != NULL) && (*(char *)mdep == '/')) {
272 buf = i_convert_boot_device_name(mdep, NULL, &buflen);
273 mdpreboot(cmd, fcn, buf);
274 } else
275 mdpreboot(cmd, fcn, mdep);
276
277 /*
278 * Allow fsflush to finish running and then prevent it
279 * from ever running again so that vfs_unmountall() and
280 * vfs_syncall() can acquire the vfs locks they need.
281 */
282 sema_p(&fsflush_sema);
283 (void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
284
285 vfs_unmountall();
286 (void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
287 vfs_syncall();
288
289 /*
290 * Check for (and unregister) any DDI periodic handlers that
291 * still exist, as they most likely constitute resource leaks:
292 */
293 ddi_periodic_fini();
294
295 dump_ereports();
296 dump_messages();
297
298 invoke_cb = B_TRUE;
299
300 /* FALLTHROUGH */
301 }
302
303 case A_REBOOT:
304 if ((mdep != NULL) && (*(char *)mdep == '/')) {
305 buf = i_convert_boot_device_name(mdep, NULL, &buflen);
306 mdboot(cmd, fcn, buf, invoke_cb);
307 } else
308 mdboot(cmd, fcn, mdep, invoke_cb);
309 /* no return expected */
310 break;
311
312 case A_CONFIG:
313 switch (fcn) {
314 case AD_UPDATE_BOOT_CONFIG:
315 #ifndef __sparc
316 {
317 extern void fastboot_update_config(const char *);
318
319 fastboot_update_config(mdep);
320 }
321 #endif
322
323 break;
324 }
325 /* Let other threads enter the shutdown path now */
326 mutex_enter(&ualock);
327 ua_shutdown_thread = NULL;
328 cv_signal(&uacond);
329 mutex_exit(&ualock);
330 break;
331
332 case A_REMOUNT:
333 (void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
334 /* Let other threads enter the shutdown path now */
335 mutex_enter(&ualock);
336 ua_shutdown_thread = NULL;
337 cv_signal(&uacond);
338 mutex_exit(&ualock);
339 break;
340
341 case A_FREEZE:
342 {
343 /*
344 * This is the entrypoint for all suspend/resume actions.
345 */
346 extern int cpr(int, void *);
347
348 if (modload("misc", "cpr") == -1)
349 return (ENOTSUP);
350 /* Let the CPR module decide what to do with mdep */
351 error = cpr(fcn, mdep);
352 break;
353 }
354
355 case A_FTRACE:
356 {
357 switch (fcn) {
358 case AD_FTRACE_START:
359 (void) FTRACE_START();
360 break;
361 case AD_FTRACE_STOP:
362 (void) FTRACE_STOP();
363 break;
364 default:
365 error = EINVAL;
366 }
367 break;
368 }
369
370 case A_DUMP:
371 {
372 if (fcn == AD_NOSYNC) {
373 in_sync = 1;
374 break;
375 }
376
377 panic_bootfcn = fcn;
378 panic_forced = 1;
379
380 if ((mdep != NULL) && (*(char *)mdep == '/')) {
381 panic_bootstr = i_convert_boot_device_name(mdep,
382 NULL, &buflen);
383 } else
384 panic_bootstr = mdep;
385
386 #ifndef __sparc
387 extern void fastboot_update_and_load(int, char *);
388
389 fastboot_update_and_load(fcn, mdep);
390 #endif
391
392 panic("forced crash dump initiated at user request");
393 /*NOTREACHED*/
394 }
395
396 case A_SDTTEST:
397 {
398 DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
399 int, 6, int, 7);
400 break;
401 }
402
403 default:
404 error = EINVAL;
405 }
406
407 return (error);
408 }
409
410 int
uadmin(int cmd,int fcn,uintptr_t mdep)411 uadmin(int cmd, int fcn, uintptr_t mdep)
412 {
413 int error = 0, rv = 0;
414 size_t nbytes = 0;
415 cred_t *credp = CRED();
416 char *bootargs = NULL;
417 int reset_status = 0;
418
419 if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
420 ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
421 &reset_status);
422 if (reset_status != 0)
423 return (EIO);
424 else
425 return (0);
426 }
427
428 /*
429 * The swapctl system call doesn't have its own entry point: it uses
430 * uadmin as a wrapper so we just call it directly from here.
431 */
432 if (cmd == A_SWAPCTL) {
433 if (get_udatamodel() == DATAMODEL_NATIVE)
434 error = swapctl(fcn, (void *)mdep, &rv);
435 #if defined(_SYSCALL32_IMPL)
436 else
437 error = swapctl32(fcn, (void *)mdep, &rv);
438 #endif /* _SYSCALL32_IMPL */
439 return (error ? set_errno(error) : rv);
440 }
441
442 /*
443 * Certain subcommands intepret a non-NULL mdep value as a pointer to
444 * a boot string. We pull that in as bootargs, if applicable.
445 */
446 if (mdep != NULL &&
447 (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
448 cmd == A_FREEZE || cmd == A_CONFIG)) {
449 bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
450 if ((error = copyinstr((const char *)mdep, bootargs,
451 BOOTARGS_MAX, &nbytes)) != 0) {
452 kmem_free(bootargs, BOOTARGS_MAX);
453 return (set_errno(error));
454 }
455 }
456
457 /*
458 * Invoke the appropriate kadmin() routine.
459 */
460 if (getzoneid() != GLOBAL_ZONEID)
461 error = zone_kadmin(cmd, fcn, bootargs, credp);
462 else
463 error = kadmin(cmd, fcn, bootargs, credp);
464
465 if (bootargs != NULL)
466 kmem_free(bootargs, BOOTARGS_MAX);
467 return (error ? set_errno(error) : 0);
468 }
469