xref: /illumos-gate/usr/src/uts/common/syscall/uadmin.c (revision 7a088f03b431bdffa96c3b2175964d4d38420caa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/errno.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/swap.h>
36 #include <sys/file.h>
37 #include <sys/proc.h>
38 #include <sys/var.h>
39 #include <sys/uadmin.h>
40 #include <sys/signal.h>
41 #include <sys/time.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/modctl.h>
44 #include <sys/callb.h>
45 #include <sys/dumphdr.h>
46 #include <sys/debug.h>
47 #include <sys/ftrace.h>
48 #include <sys/cmn_err.h>
49 #include <sys/panic.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/policy.h>
53 #include <sys/zone.h>
54 #include <sys/condvar.h>
55 #include <sys/thread.h>
56 #include <sys/sdt.h>
57 
58 /*
59  * Administrivia system call.  We provide this in two flavors: one for calling
60  * from the system call path (uadmin), and the other for calling from elsewhere
61  * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
62  * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
63  */
64 
65 extern ksema_t fsflush_sema;
66 kmutex_t ualock;
67 kcondvar_t uacond;
68 kthread_t *ua_shutdown_thread = NULL;
69 
70 int sys_shutdown = 0;
71 volatile int fastreboot_dryrun = 0;
72 
73 /*
74  * Kill all user processes in said zone.  A special argument of ALL_ZONES is
75  * passed in when the system as a whole is shutting down.  The lack of per-zone
76  * process lists is likely to make the following a performance bottleneck on a
77  * system with many zones.
78  */
79 void
80 killall(zoneid_t zoneid)
81 {
82 	proc_t *p;
83 
84 	ASSERT(zoneid != GLOBAL_ZONEID);
85 	/*
86 	 * Kill all processes except kernel daemons and ourself.
87 	 * Make a first pass to stop all processes so they won't
88 	 * be trying to restart children as we kill them.
89 	 */
90 	mutex_enter(&pidlock);
91 	for (p = practive; p != NULL; p = p->p_next) {
92 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
93 		    p->p_exec != NULLVP &&	/* kernel daemons */
94 		    p->p_as != &kas &&
95 		    p->p_stat != SZOMB) {
96 			mutex_enter(&p->p_lock);
97 			p->p_flag |= SNOWAIT;
98 			sigtoproc(p, NULL, SIGSTOP);
99 			mutex_exit(&p->p_lock);
100 		}
101 	}
102 	p = practive;
103 	while (p != NULL) {
104 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
105 		    p->p_exec != NULLVP &&	/* kernel daemons */
106 		    p->p_as != &kas &&
107 		    p->p_stat != SIDL &&
108 		    p->p_stat != SZOMB) {
109 			mutex_enter(&p->p_lock);
110 			if (sigismember(&p->p_sig, SIGKILL)) {
111 				mutex_exit(&p->p_lock);
112 				p = p->p_next;
113 			} else {
114 				sigtoproc(p, NULL, SIGKILL);
115 				mutex_exit(&p->p_lock);
116 				(void) cv_timedwait(&p->p_srwchan_cv, &pidlock,
117 				    lbolt + hz);
118 				p = practive;
119 			}
120 		} else {
121 			p = p->p_next;
122 		}
123 	}
124 	mutex_exit(&pidlock);
125 }
126 
127 int
128 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
129 {
130 	int error = 0;
131 	char *buf;
132 	size_t buflen = 0;
133 	boolean_t invoke_cb = B_FALSE;
134 
135 	/*
136 	 * We might be called directly by the kernel's fault-handling code, so
137 	 * we can't assert that the caller is in the global zone.
138 	 */
139 
140 	/*
141 	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
142 	 * and that we have appropriate privileges for this action.
143 	 */
144 	switch (cmd) {
145 	case A_FTRACE:
146 	case A_SHUTDOWN:
147 	case A_REBOOT:
148 	case A_REMOUNT:
149 	case A_FREEZE:
150 	case A_DUMP:
151 	case A_SDTTEST:
152 	case A_CONFIG:
153 		if (secpolicy_sys_config(credp, B_FALSE) != 0)
154 			return (EPERM);
155 		break;
156 
157 	default:
158 		return (EINVAL);
159 	}
160 
161 	/*
162 	 * Serialize these operations on ualock.  If it is held, the
163 	 * system should shutdown, reboot, or remount shortly, unless there is
164 	 * an error.  We need a cv rather than just a mutex because proper
165 	 * functioning of A_REBOOT relies on being able to interrupt blocked
166 	 * userland callers.
167 	 *
168 	 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
169 	 * Other commands should never return.
170 	 */
171 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
172 	    cmd == A_CONFIG) {
173 		mutex_enter(&ualock);
174 		while (ua_shutdown_thread != NULL) {
175 			if (cv_wait_sig(&uacond, &ualock) == 0) {
176 				/*
177 				 * If we were interrupted, leave, and handle
178 				 * the signal (or exit, depending on what
179 				 * happened)
180 				 */
181 				mutex_exit(&ualock);
182 				return (EINTR);
183 			}
184 		}
185 		ua_shutdown_thread = curthread;
186 		mutex_exit(&ualock);
187 	}
188 
189 	switch (cmd) {
190 	case A_SHUTDOWN:
191 	{
192 		proc_t *p = ttoproc(curthread);
193 
194 		/*
195 		 * Release (almost) all of our own resources if we are called
196 		 * from a user context, however if we are calling kadmin() from
197 		 * a kernel context then we do not release these resources.
198 		 */
199 		if (p != &p0) {
200 			proc_is_exiting(p);
201 			if ((error = exitlwps(0)) != 0) {
202 				/*
203 				 * Another thread in this process also called
204 				 * exitlwps().
205 				 */
206 				mutex_enter(&ualock);
207 				ua_shutdown_thread = NULL;
208 				cv_signal(&uacond);
209 				mutex_exit(&ualock);
210 				return (error);
211 			}
212 			mutex_enter(&p->p_lock);
213 			p->p_flag |= SNOWAIT;
214 			sigfillset(&p->p_ignore);
215 			curthread->t_lwp->lwp_cursig = 0;
216 			curthread->t_lwp->lwp_extsig = 0;
217 			if (p->p_exec) {
218 				vnode_t *exec_vp = p->p_exec;
219 				p->p_exec = NULLVP;
220 				mutex_exit(&p->p_lock);
221 				VN_RELE(exec_vp);
222 			} else {
223 				mutex_exit(&p->p_lock);
224 			}
225 
226 			pollcleanup();
227 			closeall(P_FINFO(curproc));
228 			relvm();
229 
230 		} else {
231 			/*
232 			 * Reset t_cred if not set because much of the
233 			 * filesystem code depends on CRED() being valid.
234 			 */
235 			if (curthread->t_cred == NULL)
236 				curthread->t_cred = kcred;
237 		}
238 
239 		/* indicate shutdown in progress */
240 		sys_shutdown = 1;
241 
242 		/*
243 		 * Communcate that init shouldn't be restarted.
244 		 */
245 		zone_shutdown_global();
246 
247 		killall(ALL_ZONES);
248 		/*
249 		 * If we are calling kadmin() from a kernel context then we
250 		 * do not release these resources.
251 		 */
252 		if (ttoproc(curthread) != &p0) {
253 			VN_RELE(PTOU(curproc)->u_cdir);
254 			if (PTOU(curproc)->u_rdir)
255 				VN_RELE(PTOU(curproc)->u_rdir);
256 			if (PTOU(curproc)->u_cwd)
257 				refstr_rele(PTOU(curproc)->u_cwd);
258 
259 			PTOU(curproc)->u_cdir = rootdir;
260 			PTOU(curproc)->u_rdir = NULL;
261 			PTOU(curproc)->u_cwd = NULL;
262 		}
263 
264 		/*
265 		 * Allow the reboot/halt/poweroff code a chance to do
266 		 * anything it needs to whilst we still have filesystems
267 		 * mounted, like loading any modules necessary for later
268 		 * performing the actual poweroff.
269 		 */
270 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
271 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
272 			mdpreboot(cmd, fcn, buf);
273 		} else
274 			mdpreboot(cmd, fcn, mdep);
275 
276 		/*
277 		 * Allow fsflush to finish running and then prevent it
278 		 * from ever running again so that vfs_unmountall() and
279 		 * vfs_syncall() can acquire the vfs locks they need.
280 		 */
281 		sema_p(&fsflush_sema);
282 		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
283 
284 		vfs_unmountall();
285 		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
286 		vfs_syncall();
287 
288 		dump_ereports();
289 		dump_messages();
290 
291 		invoke_cb = B_TRUE;
292 
293 		/* FALLTHROUGH */
294 	}
295 
296 	case A_REBOOT:
297 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
298 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
299 			mdboot(cmd, fcn, buf, invoke_cb);
300 		} else
301 			mdboot(cmd, fcn, mdep, invoke_cb);
302 		/* no return expected */
303 		break;
304 
305 	case A_CONFIG:
306 		switch (fcn) {
307 		case AD_UPDATE_BOOT_CONFIG:
308 #ifndef	__sparc
309 		{
310 			extern int fastreboot_capable;
311 			extern void fastboot_update_config(const char *);
312 
313 			if (fastreboot_capable)
314 				fastboot_update_config(mdep);
315 		}
316 #endif
317 
318 			break;
319 		}
320 		/* Let other threads enter the shutdown path now */
321 		mutex_enter(&ualock);
322 		ua_shutdown_thread = NULL;
323 		cv_signal(&uacond);
324 		mutex_exit(&ualock);
325 		break;
326 
327 	case A_REMOUNT:
328 		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
329 		/* Let other threads enter the shutdown path now */
330 		mutex_enter(&ualock);
331 		ua_shutdown_thread = NULL;
332 		cv_signal(&uacond);
333 		mutex_exit(&ualock);
334 		break;
335 
336 	case A_FREEZE:
337 	{
338 		/*
339 		 * This is the entrypoint for all suspend/resume actions.
340 		 */
341 		extern int cpr(int, void *);
342 
343 		if (modload("misc", "cpr") == -1)
344 			return (ENOTSUP);
345 		/* Let the CPR module decide what to do with mdep */
346 		error = cpr(fcn, mdep);
347 		break;
348 	}
349 
350 	case A_FTRACE:
351 	{
352 		switch (fcn) {
353 		case AD_FTRACE_START:
354 			(void) FTRACE_START();
355 			break;
356 		case AD_FTRACE_STOP:
357 			(void) FTRACE_STOP();
358 			break;
359 		default:
360 			error = EINVAL;
361 		}
362 		break;
363 	}
364 
365 	case A_DUMP:
366 	{
367 		if (fcn == AD_NOSYNC) {
368 			in_sync = 1;
369 			break;
370 		}
371 
372 		panic_bootfcn = fcn;
373 		panic_forced = 1;
374 
375 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
376 			panic_bootstr = i_convert_boot_device_name(mdep,
377 			    NULL, &buflen);
378 		} else
379 			panic_bootstr = mdep;
380 
381 #ifndef	__sparc
382 		extern int fastreboot_onpanic;
383 		if (fcn != AD_FASTREBOOT) {
384 			extern void fastboot_update_config(const char *);
385 			/*
386 			 * If user has explicitly requested reboot to prom,
387 			 * or uadmin(1M) was invoked with other functions,
388 			 * don't try to fast reboot after dumping.
389 			 */
390 			fastreboot_onpanic = 0;
391 			fastboot_update_config((char *)&fastreboot_onpanic);
392 		}
393 
394 		if (fastreboot_onpanic) {
395 			extern void fastboot_load_kernel(char *);
396 			fastboot_load_kernel(mdep);
397 		}
398 #endif
399 
400 		panic("forced crash dump initiated at user request");
401 		/*NOTREACHED*/
402 	}
403 
404 	case A_SDTTEST:
405 	{
406 		DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
407 		    int, 6, int, 7);
408 		break;
409 	}
410 
411 	default:
412 		error = EINVAL;
413 	}
414 
415 	return (error);
416 }
417 
418 int
419 uadmin(int cmd, int fcn, uintptr_t mdep)
420 {
421 	int error = 0, rv = 0;
422 	size_t nbytes = 0;
423 	cred_t *credp = CRED();
424 	char *bootargs = NULL;
425 	int reset_status = 0;
426 
427 	if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
428 		ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
429 		    &reset_status);
430 		if (reset_status != 0)
431 			return (EIO);
432 		else
433 			return (0);
434 	}
435 
436 	/*
437 	 * The swapctl system call doesn't have its own entry point: it uses
438 	 * uadmin as a wrapper so we just call it directly from here.
439 	 */
440 	if (cmd == A_SWAPCTL) {
441 		if (get_udatamodel() == DATAMODEL_NATIVE)
442 			error = swapctl(fcn, (void *)mdep, &rv);
443 #if defined(_SYSCALL32_IMPL)
444 		else
445 			error = swapctl32(fcn, (void *)mdep, &rv);
446 #endif /* _SYSCALL32_IMPL */
447 		return (error ? set_errno(error) : rv);
448 	}
449 
450 	/*
451 	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
452 	 * a boot string.  We pull that in as bootargs, if applicable.
453 	 */
454 	if (mdep != NULL &&
455 	    (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
456 	    cmd == A_FREEZE || cmd == A_CONFIG)) {
457 		bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
458 		if ((error = copyinstr((const char *)mdep, bootargs,
459 		    BOOTARGS_MAX, &nbytes)) != 0) {
460 			kmem_free(bootargs, BOOTARGS_MAX);
461 			return (set_errno(error));
462 		}
463 	}
464 
465 	/*
466 	 * Invoke the appropriate kadmin() routine.
467 	 */
468 	if (getzoneid() != GLOBAL_ZONEID)
469 		error = zone_kadmin(cmd, fcn, bootargs, credp);
470 	else
471 		error = kadmin(cmd, fcn, bootargs, credp);
472 
473 	if (bootargs != NULL)
474 		kmem_free(bootargs, BOOTARGS_MAX);
475 	return (error ? set_errno(error) : 0);
476 }
477