xref: /illumos-gate/usr/src/uts/common/syscall/uadmin.c (revision 3ba944265c4ae1fcf23ef758537c2e4f4feec16e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2013 Joyent, Inc.  All rights reserved.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
32 #include <sys/errno.h>
33 #include <sys/vfs.h>
34 #include <sys/vnode.h>
35 #include <sys/swap.h>
36 #include <sys/file.h>
37 #include <sys/proc.h>
38 #include <sys/var.h>
39 #include <sys/uadmin.h>
40 #include <sys/signal.h>
41 #include <sys/time.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/modctl.h>
44 #include <sys/callb.h>
45 #include <sys/dumphdr.h>
46 #include <sys/debug.h>
47 #include <sys/ftrace.h>
48 #include <sys/cmn_err.h>
49 #include <sys/panic.h>
50 #include <sys/ddi.h>
51 #include <sys/ddi_periodic.h>
52 #include <sys/sunddi.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 #include <sys/condvar.h>
56 #include <sys/thread.h>
57 #include <sys/sdt.h>
58 
59 /*
60  * Administrivia system call.  We provide this in two flavors: one for calling
61  * from the system call path (uadmin), and the other for calling from elsewhere
62  * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
63  * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
64  */
65 
66 extern ksema_t fsflush_sema;
67 kmutex_t ualock;
68 kcondvar_t uacond;
69 kthread_t *ua_shutdown_thread = NULL;
70 
71 int sys_shutdown = 0;
72 volatile int fastreboot_dryrun = 0;
73 
74 /*
75  * Kill all user processes in said zone.  A special argument of ALL_ZONES is
76  * passed in when the system as a whole is shutting down.  The lack of per-zone
77  * process lists is likely to make the following a performance bottleneck on a
78  * system with many zones.
79  */
80 void
81 killall(zoneid_t zoneid)
82 {
83 	proc_t *p;
84 
85 	ASSERT(zoneid != GLOBAL_ZONEID);
86 	/*
87 	 * Kill all processes except kernel daemons and ourself.
88 	 * Make a first pass to stop all processes so they won't
89 	 * be trying to restart children as we kill them.
90 	 */
91 	mutex_enter(&pidlock);
92 	for (p = practive; p != NULL; p = p->p_next) {
93 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
94 		    p->p_exec != NULLVP &&	/* kernel daemons */
95 		    p->p_as != &kas &&
96 		    p->p_stat != SZOMB) {
97 			mutex_enter(&p->p_lock);
98 			p->p_flag |= SNOWAIT;
99 			sigtoproc(p, NULL, SIGSTOP);
100 			mutex_exit(&p->p_lock);
101 		}
102 	}
103 	p = practive;
104 	while (p != NULL) {
105 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
106 		    p->p_exec != NULLVP &&	/* kernel daemons */
107 		    p->p_as != &kas &&
108 		    p->p_stat != SIDL &&
109 		    p->p_stat != SZOMB) {
110 			mutex_enter(&p->p_lock);
111 			if (sigismember(&p->p_sig, SIGKILL)) {
112 				mutex_exit(&p->p_lock);
113 				p = p->p_next;
114 			} else {
115 				sigtoproc(p, NULL, SIGKILL);
116 				mutex_exit(&p->p_lock);
117 				(void) cv_reltimedwait(&p->p_srwchan_cv,
118 				    &pidlock, hz, TR_CLOCK_TICK);
119 				p = practive;
120 			}
121 		} else {
122 			p = p->p_next;
123 		}
124 	}
125 	mutex_exit(&pidlock);
126 }
127 
128 int
129 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
130 {
131 	int error = 0;
132 	char *buf;
133 	size_t buflen = 0;
134 	boolean_t invoke_cb = B_FALSE;
135 
136 	/*
137 	 * We might be called directly by the kernel's fault-handling code, so
138 	 * we can't assert that the caller is in the global zone.
139 	 */
140 
141 	/*
142 	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
143 	 * and that we have appropriate privileges for this action.
144 	 */
145 	switch (cmd) {
146 	case A_FTRACE:
147 	case A_SHUTDOWN:
148 	case A_REBOOT:
149 	case A_REMOUNT:
150 	case A_FREEZE:
151 	case A_DUMP:
152 	case A_SDTTEST:
153 	case A_CONFIG:
154 		if (secpolicy_sys_config(credp, B_FALSE) != 0)
155 			return (EPERM);
156 		break;
157 
158 	default:
159 		return (EINVAL);
160 	}
161 
162 	/*
163 	 * Serialize these operations on ualock.  If it is held, the
164 	 * system should shutdown, reboot, or remount shortly, unless there is
165 	 * an error.  We need a cv rather than just a mutex because proper
166 	 * functioning of A_REBOOT relies on being able to interrupt blocked
167 	 * userland callers.
168 	 *
169 	 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
170 	 * Other commands should never return.
171 	 */
172 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
173 	    cmd == A_CONFIG) {
174 		mutex_enter(&ualock);
175 		while (ua_shutdown_thread != NULL) {
176 			if (cv_wait_sig(&uacond, &ualock) == 0) {
177 				/*
178 				 * If we were interrupted, leave, and handle
179 				 * the signal (or exit, depending on what
180 				 * happened)
181 				 */
182 				mutex_exit(&ualock);
183 				return (EINTR);
184 			}
185 		}
186 		ua_shutdown_thread = curthread;
187 		mutex_exit(&ualock);
188 	}
189 
190 	switch (cmd) {
191 	case A_SHUTDOWN:
192 	{
193 		proc_t *p = ttoproc(curthread);
194 
195 		/*
196 		 * Release (almost) all of our own resources if we are called
197 		 * from a user context, however if we are calling kadmin() from
198 		 * a kernel context then we do not release these resources.
199 		 */
200 		if (p != &p0) {
201 			proc_is_exiting(p);
202 			if ((error = exitlwps(0)) != 0) {
203 				/*
204 				 * Another thread in this process also called
205 				 * exitlwps().
206 				 */
207 				mutex_enter(&ualock);
208 				ua_shutdown_thread = NULL;
209 				cv_signal(&uacond);
210 				mutex_exit(&ualock);
211 				return (error);
212 			}
213 			mutex_enter(&p->p_lock);
214 			p->p_flag |= SNOWAIT;
215 			sigfillset(&p->p_ignore);
216 			curthread->t_lwp->lwp_cursig = 0;
217 			curthread->t_lwp->lwp_extsig = 0;
218 			if (p->p_exec) {
219 				vnode_t *exec_vp = p->p_exec;
220 				p->p_exec = NULLVP;
221 				mutex_exit(&p->p_lock);
222 				VN_RELE(exec_vp);
223 			} else {
224 				mutex_exit(&p->p_lock);
225 			}
226 
227 			pollcleanup();
228 			closeall(P_FINFO(curproc));
229 			relvm();
230 
231 		} else {
232 			/*
233 			 * Reset t_cred if not set because much of the
234 			 * filesystem code depends on CRED() being valid.
235 			 */
236 			if (curthread->t_cred == NULL)
237 				curthread->t_cred = kcred;
238 		}
239 
240 		/* indicate shutdown in progress */
241 		sys_shutdown = 1;
242 
243 		/*
244 		 * Communcate that init shouldn't be restarted.
245 		 */
246 		zone_shutdown_global();
247 
248 		killall(ALL_ZONES);
249 		/*
250 		 * If we are calling kadmin() from a kernel context then we
251 		 * do not release these resources.
252 		 */
253 		if (ttoproc(curthread) != &p0) {
254 			VN_RELE(PTOU(curproc)->u_cdir);
255 			if (PTOU(curproc)->u_rdir)
256 				VN_RELE(PTOU(curproc)->u_rdir);
257 			if (PTOU(curproc)->u_cwd)
258 				refstr_rele(PTOU(curproc)->u_cwd);
259 
260 			PTOU(curproc)->u_cdir = rootdir;
261 			PTOU(curproc)->u_rdir = NULL;
262 			PTOU(curproc)->u_cwd = NULL;
263 		}
264 
265 		/*
266 		 * Allow the reboot/halt/poweroff code a chance to do
267 		 * anything it needs to whilst we still have filesystems
268 		 * mounted, like loading any modules necessary for later
269 		 * performing the actual poweroff.
270 		 */
271 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
272 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
273 			mdpreboot(cmd, fcn, buf);
274 		} else
275 			mdpreboot(cmd, fcn, mdep);
276 
277 		/*
278 		 * Allow fsflush to finish running and then prevent it
279 		 * from ever running again so that vfs_unmountall() and
280 		 * vfs_syncall() can acquire the vfs locks they need.
281 		 */
282 		sema_p(&fsflush_sema);
283 		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
284 
285 		vfs_unmountall();
286 		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
287 		vfs_syncall();
288 
289 		/*
290 		 * Check for (and unregister) any DDI periodic handlers that
291 		 * still exist, as they most likely constitute resource leaks:
292 		 */
293 		ddi_periodic_fini();
294 
295 		dump_ereports();
296 		dump_messages();
297 
298 		invoke_cb = B_TRUE;
299 
300 		/* FALLTHROUGH */
301 	}
302 
303 	case A_REBOOT:
304 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
305 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
306 			mdboot(cmd, fcn, buf, invoke_cb);
307 		} else
308 			mdboot(cmd, fcn, mdep, invoke_cb);
309 		/* no return expected */
310 		break;
311 
312 	case A_CONFIG:
313 		switch (fcn) {
314 		case AD_UPDATE_BOOT_CONFIG:
315 #ifndef	__sparc
316 		{
317 			extern void fastboot_update_config(const char *);
318 
319 			fastboot_update_config(mdep);
320 		}
321 #endif
322 
323 			break;
324 		}
325 		/* Let other threads enter the shutdown path now */
326 		mutex_enter(&ualock);
327 		ua_shutdown_thread = NULL;
328 		cv_signal(&uacond);
329 		mutex_exit(&ualock);
330 		break;
331 
332 	case A_REMOUNT:
333 		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
334 		/* Let other threads enter the shutdown path now */
335 		mutex_enter(&ualock);
336 		ua_shutdown_thread = NULL;
337 		cv_signal(&uacond);
338 		mutex_exit(&ualock);
339 		break;
340 
341 	case A_FREEZE:
342 	{
343 		/*
344 		 * This is the entrypoint for all suspend/resume actions.
345 		 */
346 		extern int cpr(int, void *);
347 
348 		if (modload("misc", "cpr") == -1)
349 			return (ENOTSUP);
350 		/* Let the CPR module decide what to do with mdep */
351 		error = cpr(fcn, mdep);
352 		break;
353 	}
354 
355 	case A_FTRACE:
356 	{
357 		switch (fcn) {
358 		case AD_FTRACE_START:
359 			(void) FTRACE_START();
360 			break;
361 		case AD_FTRACE_STOP:
362 			(void) FTRACE_STOP();
363 			break;
364 		default:
365 			error = EINVAL;
366 		}
367 		break;
368 	}
369 
370 	case A_DUMP:
371 	{
372 		if (fcn == AD_NOSYNC) {
373 			in_sync = 1;
374 			break;
375 		}
376 
377 		panic_bootfcn = fcn;
378 		panic_forced = 1;
379 
380 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
381 			panic_bootstr = i_convert_boot_device_name(mdep,
382 			    NULL, &buflen);
383 		} else
384 			panic_bootstr = mdep;
385 
386 #ifndef	__sparc
387 		extern void fastboot_update_and_load(int, char *);
388 
389 		fastboot_update_and_load(fcn, mdep);
390 #endif
391 
392 		panic("forced crash dump initiated at user request");
393 		/*NOTREACHED*/
394 	}
395 
396 	case A_SDTTEST:
397 	{
398 		DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
399 		    int, 6, int, 7);
400 		break;
401 	}
402 
403 	default:
404 		error = EINVAL;
405 	}
406 
407 	return (error);
408 }
409 
410 int
411 uadmin(int cmd, int fcn, uintptr_t mdep)
412 {
413 	int error = 0, rv = 0;
414 	size_t nbytes = 0;
415 	cred_t *credp = CRED();
416 	char *bootargs = NULL;
417 	int reset_status = 0;
418 
419 	if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
420 		ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
421 		    &reset_status);
422 		if (reset_status != 0)
423 			return (EIO);
424 		else
425 			return (0);
426 	}
427 
428 	/*
429 	 * The swapctl system call doesn't have its own entry point: it uses
430 	 * uadmin as a wrapper so we just call it directly from here.
431 	 */
432 	if (cmd == A_SWAPCTL) {
433 		if (get_udatamodel() == DATAMODEL_NATIVE)
434 			error = swapctl(fcn, (void *)mdep, &rv);
435 #if defined(_SYSCALL32_IMPL)
436 		else
437 			error = swapctl32(fcn, (void *)mdep, &rv);
438 #endif /* _SYSCALL32_IMPL */
439 		return (error ? set_errno(error) : rv);
440 	}
441 
442 	/*
443 	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
444 	 * a boot string.  We pull that in as bootargs, if applicable.
445 	 */
446 	if (mdep != NULL &&
447 	    (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
448 	    cmd == A_FREEZE || cmd == A_CONFIG)) {
449 		bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
450 		if ((error = copyinstr((const char *)mdep, bootargs,
451 		    BOOTARGS_MAX, &nbytes)) != 0) {
452 			kmem_free(bootargs, BOOTARGS_MAX);
453 			return (set_errno(error));
454 		}
455 	}
456 
457 	/*
458 	 * Invoke the appropriate kadmin() routine.
459 	 */
460 	if (getzoneid() != GLOBAL_ZONEID)
461 		error = zone_kadmin(cmd, fcn, bootargs, credp);
462 	else
463 		error = kadmin(cmd, fcn, bootargs, credp);
464 
465 	if (bootargs != NULL)
466 		kmem_free(bootargs, BOOTARGS_MAX);
467 	return (error ? set_errno(error) : 0);
468 }
469