xref: /titanic_44/usr/src/uts/common/syscall/uadmin.c (revision 1979231e1e29c981e5d1e6cee60f2db46d052b00)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/errno.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/swap.h>
37 #include <sys/file.h>
38 #include <sys/proc.h>
39 #include <sys/var.h>
40 #include <sys/uadmin.h>
41 #include <sys/signal.h>
42 #include <sys/time.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/modctl.h>
45 #include <sys/callb.h>
46 #include <sys/dumphdr.h>
47 #include <sys/debug.h>
48 #include <sys/ftrace.h>
49 #include <sys/cmn_err.h>
50 #include <sys/panic.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 
56 /*
57  * Administrivia system call.  We provide this in two flavors: one for calling
58  * from the system call path (uadmin), and the other for calling from elsewhere
59  * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
60  * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
61  */
62 
63 extern ksema_t fsflush_sema;
64 kmutex_t ualock;
65 
66 
67 /*
68  * Kill all user processes in said zone.  A special argument of ALL_ZONES is
69  * passed in when the system as a whole is shutting down.  The lack of per-zone
70  * process lists is likely to make the following a performance bottleneck on a
71  * system with many zones.
72  */
73 void
74 killall(zoneid_t zoneid)
75 {
76 	proc_t *p;
77 
78 	ASSERT(zoneid != GLOBAL_ZONEID);
79 	/*
80 	 * Kill all processes except kernel daemons and ourself.
81 	 * Make a first pass to stop all processes so they won't
82 	 * be trying to restart children as we kill them.
83 	 */
84 	mutex_enter(&pidlock);
85 	for (p = practive; p != NULL; p = p->p_next) {
86 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
87 		    p->p_exec != NULLVP &&	/* kernel daemons */
88 		    p->p_as != &kas &&
89 		    p->p_stat != SZOMB) {
90 			mutex_enter(&p->p_lock);
91 			p->p_flag |= SNOWAIT;
92 			sigtoproc(p, NULL, SIGSTOP);
93 			mutex_exit(&p->p_lock);
94 		}
95 	}
96 	p = practive;
97 	while (p != NULL) {
98 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
99 		    p->p_exec != NULLVP &&	/* kernel daemons */
100 		    p->p_as != &kas &&
101 		    p->p_stat != SIDL &&
102 		    p->p_stat != SZOMB) {
103 			mutex_enter(&p->p_lock);
104 			if (sigismember(&p->p_sig, SIGKILL)) {
105 				mutex_exit(&p->p_lock);
106 				p = p->p_next;
107 			} else {
108 				sigtoproc(p, NULL, SIGKILL);
109 				mutex_exit(&p->p_lock);
110 				(void) cv_timedwait(&p->p_srwchan_cv, &pidlock,
111 				    lbolt + hz);
112 				p = practive;
113 			}
114 		} else {
115 			p = p->p_next;
116 		}
117 	}
118 	mutex_exit(&pidlock);
119 }
120 
121 int
122 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
123 {
124 	int error = 0;
125 	int locked = 0;
126 	char *buf;
127 	size_t buflen = 0;
128 	boolean_t invoke_cb = B_FALSE;
129 
130 	/*
131 	 * We might be called directly by the kernel's fault-handling code, so
132 	 * we can't assert that the caller is in the global zone.
133 	 */
134 
135 	/*
136 	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
137 	 * and that we have appropriate privileges for this action.
138 	 */
139 	switch (cmd) {
140 	case A_FTRACE:
141 	case A_SHUTDOWN:
142 	case A_REBOOT:
143 	case A_REMOUNT:
144 	case A_FREEZE:
145 	case A_DUMP:
146 		if (secpolicy_sys_config(credp, B_FALSE) != 0)
147 			return (EPERM);
148 		break;
149 
150 	default:
151 		return (EINVAL);
152 	}
153 
154 	/*
155 	 * Serialize these operations on ualock.  If it is held, just return
156 	 * as if successful since the system will soon reset or remount.
157 	 */
158 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT) {
159 		if (!mutex_tryenter(&ualock))
160 			return (0);
161 		locked = 1;
162 	}
163 
164 	switch (cmd) {
165 	case A_SHUTDOWN:
166 	{
167 		proc_t *p = ttoproc(curthread);
168 
169 		/*
170 		 * Release (almost) all of our own resources if we are called
171 		 * from a user context, however if we are calling kadmin() from
172 		 * a kernel context then we do not release these resources.
173 		 */
174 		if (p != &p0) {
175 			proc_is_exiting(p);
176 			if ((error = exitlwps(0)) != 0)
177 				return (error);
178 			mutex_enter(&p->p_lock);
179 			p->p_flag |= SNOWAIT;
180 			sigfillset(&p->p_ignore);
181 			curthread->t_lwp->lwp_cursig = 0;
182 			curthread->t_lwp->lwp_extsig = 0;
183 			if (p->p_exec) {
184 				vnode_t *exec_vp = p->p_exec;
185 				p->p_exec = NULLVP;
186 				mutex_exit(&p->p_lock);
187 				VN_RELE(exec_vp);
188 			} else {
189 				mutex_exit(&p->p_lock);
190 			}
191 
192 			pollcleanup();
193 			closeall(P_FINFO(curproc));
194 			relvm();
195 
196 		} else {
197 			/*
198 			 * Reset t_cred if not set because much of the
199 			 * filesystem code depends on CRED() being valid.
200 			 */
201 			if (curthread->t_cred == NULL)
202 				curthread->t_cred = kcred;
203 		}
204 
205 		/*
206 		 * Communcate that init shouldn't be restarted.
207 		 */
208 		zone_shutdown_global();
209 
210 		killall(ALL_ZONES);
211 		/*
212 		 * If we are calling kadmin() from a kernel context then we
213 		 * do not release these resources.
214 		 */
215 		if (ttoproc(curthread) != &p0) {
216 			VN_RELE(u.u_cdir);
217 			if (u.u_rdir)
218 				VN_RELE(u.u_rdir);
219 			if (u.u_cwd)
220 				refstr_rele(u.u_cwd);
221 
222 			u.u_cdir = rootdir;
223 			u.u_rdir = NULL;
224 			u.u_cwd = NULL;
225 		}
226 
227 		/*
228 		 * Allow the reboot/halt/poweroff code a chance to do
229 		 * anything it needs to whilst we still have filesystems
230 		 * mounted, like loading any modules necessary for later
231 		 * performing the actual poweroff.
232 		 */
233 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
234 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
235 			mdpreboot(cmd, fcn, buf);
236 		} else
237 			mdpreboot(cmd, fcn, mdep);
238 
239 		/*
240 		 * Allow fsflush to finish running and then prevent it
241 		 * from ever running again so that vfs_unmountall() and
242 		 * vfs_syncall() can acquire the vfs locks they need.
243 		 */
244 		sema_p(&fsflush_sema);
245 		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
246 
247 		vfs_unmountall();
248 		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
249 		vfs_syncall();
250 
251 		dump_ereports();
252 		dump_messages();
253 
254 		invoke_cb = B_TRUE;
255 
256 		/* FALLTHROUGH */
257 	}
258 
259 	case A_REBOOT:
260 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
261 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
262 			mdboot(cmd, fcn, buf, invoke_cb);
263 		} else
264 			mdboot(cmd, fcn, mdep, invoke_cb);
265 		/* no return expected */
266 		break;
267 
268 	case A_REMOUNT:
269 		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
270 		break;
271 
272 	case A_FREEZE:
273 	{
274 		/* XXX: declare in some header file */
275 		extern int cpr(int);
276 
277 		if (modload("misc", "cpr") == -1)
278 			return (ENOTSUP);
279 		error = cpr(fcn);
280 		break;
281 	}
282 
283 	case A_FTRACE:
284 	{
285 		switch (fcn) {
286 		case AD_FTRACE_START:
287 			(void) FTRACE_START();
288 			break;
289 		case AD_FTRACE_STOP:
290 			(void) FTRACE_STOP();
291 			break;
292 		default:
293 			error = EINVAL;
294 		}
295 		break;
296 	}
297 
298 	case A_DUMP:
299 	{
300 		if (fcn == AD_NOSYNC) {
301 			in_sync = 1;
302 			break;
303 		}
304 
305 		panic_bootfcn = fcn;
306 		panic_forced = 1;
307 
308 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
309 			panic_bootstr = i_convert_boot_device_name(mdep,
310 			    NULL, &buflen);
311 		} else
312 			panic_bootstr = mdep;
313 
314 		panic("forced crash dump initiated at user request");
315 		/*NOTREACHED*/
316 	}
317 
318 	default:
319 		error = EINVAL;
320 	}
321 
322 	if (locked)
323 		mutex_exit(&ualock);
324 
325 	return (error);
326 }
327 
328 int
329 uadmin(int cmd, int fcn, uintptr_t mdep)
330 {
331 	int error = 0, rv = 0;
332 	size_t nbytes = 0;
333 	cred_t *credp = CRED();
334 	char *bootargs = NULL;
335 
336 	/*
337 	 * The swapctl system call doesn't have its own entry point: it uses
338 	 * uadmin as a wrapper so we just call it directly from here.
339 	 */
340 	if (cmd == A_SWAPCTL) {
341 		if (get_udatamodel() == DATAMODEL_NATIVE)
342 			error = swapctl(fcn, (void *)mdep, &rv);
343 #if defined(_SYSCALL32_IMPL)
344 		else
345 			error = swapctl32(fcn, (void *)mdep, &rv);
346 #endif /* _SYSCALL32_IMPL */
347 		return (error ? set_errno(error) : rv);
348 	}
349 
350 	/*
351 	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
352 	 * a boot string.  We pull that in as bootargs, if applicable.
353 	 */
354 	if (mdep != NULL &&
355 	    (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP)) {
356 		bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
357 		if ((error = copyinstr((const char *)mdep, bootargs,
358 		    BOOTARGS_MAX, &nbytes)) != 0) {
359 			kmem_free(bootargs, BOOTARGS_MAX);
360 			return (set_errno(error));
361 		}
362 	}
363 
364 	/*
365 	 * Invoke the appropriate kadmin() routine.
366 	 */
367 	if (getzoneid() != GLOBAL_ZONEID)
368 		error = zone_kadmin(cmd, fcn, bootargs, credp);
369 	else
370 		error = kadmin(cmd, fcn, bootargs, credp);
371 
372 	if (bootargs != NULL)
373 		kmem_free(bootargs, BOOTARGS_MAX);
374 	return (error ? set_errno(error) : 0);
375 }
376