xref: /illumos-gate/usr/src/uts/common/syscall/uadmin.c (revision 814a60b13c0ad90e5d2edfd29a7a84bbf416cc1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/errno.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/swap.h>
37 #include <sys/file.h>
38 #include <sys/proc.h>
39 #include <sys/var.h>
40 #include <sys/uadmin.h>
41 #include <sys/signal.h>
42 #include <sys/time.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/modctl.h>
45 #include <sys/callb.h>
46 #include <sys/dumphdr.h>
47 #include <sys/debug.h>
48 #include <sys/ftrace.h>
49 #include <sys/cmn_err.h>
50 #include <sys/panic.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 
56 /*
57  * Administrivia system call.  We provide this in two flavors: one for calling
58  * from the system call path (uadmin), and the other for calling from elsewhere
59  * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
60  * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
61  */
62 
63 extern ksema_t fsflush_sema;
64 kmutex_t ualock;
65 
66 
67 /*
68  * Kill all user processes in said zone.  A special argument of ALL_ZONES is
69  * passed in when the system as a whole is shutting down.  The lack of per-zone
70  * process lists is likely to make the following a performance bottleneck on a
71  * system with many zones.
72  */
73 void
74 killall(zoneid_t zoneid)
75 {
76 	proc_t *p;
77 
78 	ASSERT(zoneid != GLOBAL_ZONEID);
79 	/*
80 	 * Kill all processes except kernel daemons and ourself.
81 	 * Make a first pass to stop all processes so they won't
82 	 * be trying to restart children as we kill them.
83 	 */
84 	mutex_enter(&pidlock);
85 	for (p = practive; p != NULL; p = p->p_next) {
86 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
87 		    p->p_exec != NULLVP &&	/* kernel daemons */
88 		    p->p_as != &kas &&
89 		    p->p_stat != SZOMB) {
90 			mutex_enter(&p->p_lock);
91 			p->p_flag |= SNOWAIT;
92 			sigtoproc(p, NULL, SIGSTOP);
93 			mutex_exit(&p->p_lock);
94 		}
95 	}
96 	p = practive;
97 	while (p != NULL) {
98 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
99 		    p->p_exec != NULLVP &&	/* kernel daemons */
100 		    p->p_as != &kas &&
101 		    p->p_stat != SIDL &&
102 		    p->p_stat != SZOMB) {
103 			mutex_enter(&p->p_lock);
104 			if (sigismember(&p->p_sig, SIGKILL)) {
105 				mutex_exit(&p->p_lock);
106 				p = p->p_next;
107 			} else {
108 				sigtoproc(p, NULL, SIGKILL);
109 				mutex_exit(&p->p_lock);
110 				(void) cv_timedwait(&p->p_srwchan_cv,
111 					    &pidlock, lbolt + hz);
112 				p = practive;
113 			}
114 		} else {
115 			p = p->p_next;
116 		}
117 	}
118 	mutex_exit(&pidlock);
119 }
120 
121 int
122 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
123 {
124 	int error = 0;
125 	int locked = 0;
126 	char *buf;
127 	size_t buflen = 0;
128 	boolean_t invoke_cb = B_FALSE;
129 
130 	/*
131 	 * We might be called directly by the kernel's fault-handling code, so
132 	 * we can't assert that the caller is in the global zone.
133 	 */
134 
135 	/*
136 	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
137 	 * and that we have appropriate privileges for this action.
138 	 */
139 	switch (cmd) {
140 	case A_FTRACE:
141 	case A_SHUTDOWN:
142 	case A_REBOOT:
143 	case A_REMOUNT:
144 	case A_FREEZE:
145 	case A_DUMP:
146 		if (secpolicy_sys_config(credp, B_FALSE) != 0)
147 			return (EPERM);
148 		break;
149 
150 	default:
151 		return (EINVAL);
152 	}
153 
154 	/*
155 	 * Serialize these operations on ualock.  If it is held, just return
156 	 * as if successful since the system will soon reset or remount.
157 	 */
158 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT) {
159 		if (!mutex_tryenter(&ualock))
160 			return (0);
161 		locked = 1;
162 	}
163 
164 	switch (cmd) {
165 	case A_SHUTDOWN:
166 	{
167 		proc_t *p = ttoproc(curthread);
168 
169 		/*
170 		 * Release (almost) all of our own resources if we are called
171 		 * from a user context, however if we are calling kadmin() from
172 		 * a kernel context then we do not release these resources.
173 		 */
174 		if (ttoproc(curthread) != &p0) {
175 			if ((error = exitlwps(0)) != 0)
176 				return (error);
177 			mutex_enter(&p->p_lock);
178 			p->p_flag |= SNOWAIT;
179 			sigfillset(&p->p_ignore);
180 			curthread->t_lwp->lwp_cursig = 0;
181 			curthread->t_lwp->lwp_extsig = 0;
182 			if (p->p_exec) {
183 				vnode_t *exec_vp = p->p_exec;
184 				p->p_exec = NULLVP;
185 				mutex_exit(&p->p_lock);
186 				VN_RELE(exec_vp);
187 			} else {
188 				mutex_exit(&p->p_lock);
189 			}
190 
191 			pollcleanup();
192 			closeall(P_FINFO(curproc));
193 			relvm();
194 
195 		} else {
196 			/*
197 			 * Reset t_cred if not set because much of the
198 			 * filesystem code depends on CRED() being valid.
199 			 */
200 			if (curthread->t_cred == NULL)
201 				curthread->t_cred = kcred;
202 		}
203 
204 		/*
205 		 * Communcate that init shouldn't be restarted.
206 		 */
207 		zone_shutdown_global();
208 
209 		killall(ALL_ZONES);
210 		/*
211 		 * If we are calling kadmin() from a kernel context then we
212 		 * do not release these resources.
213 		 */
214 		if (ttoproc(curthread) != &p0) {
215 			VN_RELE(u.u_cdir);
216 			if (u.u_rdir)
217 				VN_RELE(u.u_rdir);
218 			if (u.u_cwd)
219 				refstr_rele(u.u_cwd);
220 
221 			u.u_cdir = rootdir;
222 			u.u_rdir = NULL;
223 			u.u_cwd = NULL;
224 		}
225 
226 		/*
227 		 * Allow the reboot/halt/poweroff code a chance to do
228 		 * anything it needs to whilst we still have filesystems
229 		 * mounted, like loading any modules necessary for later
230 		 * performing the actual poweroff.
231 		 */
232 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
233 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
234 			mdpreboot(cmd, fcn, buf);
235 		} else
236 			mdpreboot(cmd, fcn, mdep);
237 
238 		/*
239 		 * Allow fsflush to finish running and then prevent it
240 		 * from ever running again so that vfs_unmountall() and
241 		 * vfs_syncall() can acquire the vfs locks they need.
242 		 */
243 		sema_p(&fsflush_sema);
244 		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
245 
246 		vfs_unmountall();
247 		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
248 		vfs_syncall();
249 
250 		dump_ereports();
251 		dump_messages();
252 
253 		invoke_cb = B_TRUE;
254 
255 		/* FALLTHROUGH */
256 	}
257 
258 	case A_REBOOT:
259 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
260 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
261 			mdboot(cmd, fcn, buf, invoke_cb);
262 		} else
263 			mdboot(cmd, fcn, mdep, invoke_cb);
264 		/* no return expected */
265 		break;
266 
267 	case A_REMOUNT:
268 		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
269 		break;
270 
271 	case A_FREEZE:
272 	{
273 		/* XXX: declare in some header file */
274 		extern int cpr(int);
275 
276 		if (modload("misc", "cpr") == -1)
277 			return (ENOTSUP);
278 		error = cpr(fcn);
279 		break;
280 	}
281 
282 	case A_FTRACE:
283 	{
284 		switch (fcn) {
285 		case AD_FTRACE_START:
286 			(void) FTRACE_START();
287 			break;
288 		case AD_FTRACE_STOP:
289 			(void) FTRACE_STOP();
290 			break;
291 		default:
292 			error = EINVAL;
293 		}
294 		break;
295 	}
296 
297 	case A_DUMP:
298 	{
299 		if (fcn == AD_NOSYNC) {
300 			in_sync = 1;
301 			break;
302 		}
303 
304 		panic_bootfcn = fcn;
305 		panic_forced = 1;
306 
307 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
308 			panic_bootstr = i_convert_boot_device_name(mdep,
309 					    NULL, &buflen);
310 		} else
311 			panic_bootstr = mdep;
312 
313 		panic("forced crash dump initiated at user request");
314 		/*NOTREACHED*/
315 	}
316 
317 	default:
318 		error = EINVAL;
319 	}
320 
321 	if (locked)
322 		mutex_exit(&ualock);
323 
324 	return (error);
325 }
326 
327 int
328 uadmin(int cmd, int fcn, uintptr_t mdep)
329 {
330 	int error = 0, rv = 0;
331 	size_t nbytes = 0;
332 	char buf[257];
333 	cred_t *credp = CRED();
334 
335 	/*
336 	 * The swapctl system call doesn't have its own entry point: it uses
337 	 * uadmin as a wrapper so we just call it directly from here.
338 	 */
339 	if (cmd == A_SWAPCTL) {
340 		if (get_udatamodel() == DATAMODEL_NATIVE)
341 			error = swapctl(fcn, (void *)mdep, &rv);
342 #if defined(_SYSCALL32_IMPL)
343 		else
344 			error = swapctl32(fcn, (void *)mdep, &rv);
345 #endif /* _SYSCALL32_IMPL */
346 		return (error ? set_errno(error) : rv);
347 	}
348 
349 	/*
350 	 * Handle zones.
351 	 */
352 	if (getzoneid() != GLOBAL_ZONEID) {
353 		error = zone_uadmin(cmd, fcn, credp);
354 		return (error ? set_errno(error) : 0);
355 	}
356 
357 	/*
358 	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
359 	 * a boot string.  Attempt to copy it in now, or reset mdep to NULL.
360 	 */
361 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP) {
362 		if (mdep != NULL && copyinstr((const char *)mdep, buf,
363 		    sizeof (buf) - 1, &nbytes) == 0) {
364 			buf[nbytes] = '\0';
365 			mdep = (uintptr_t)buf;
366 		} else
367 			mdep = NULL;
368 	}
369 
370 	if ((error = kadmin(cmd, fcn, (void *)mdep, credp)) != 0)
371 		return (set_errno(error));
372 
373 	return (0);
374 }
375