xref: /freebsd/sys/kern/kern_jail.c (revision 2357939bc239bd5334a169b62313806178dd8f30)
1 /*
2  * ----------------------------------------------------------------------------
3  * "THE BEER-WARE LICENSE" (Revision 42):
4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
5  * can do whatever you want with this stuff. If we meet some day, and you think
6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
7  * ----------------------------------------------------------------------------
8  */
9 
10 #include <sys/cdefs.h>
11 __FBSDID("$FreeBSD$");
12 
13 #include <sys/param.h>
14 #include <sys/types.h>
15 #include <sys/kernel.h>
16 #include <sys/systm.h>
17 #include <sys/errno.h>
18 #include <sys/sysproto.h>
19 #include <sys/malloc.h>
20 #include <sys/proc.h>
21 #include <sys/taskqueue.h>
22 #include <sys/jail.h>
23 #include <sys/lock.h>
24 #include <sys/mutex.h>
25 #include <sys/namei.h>
26 #include <sys/queue.h>
27 #include <sys/socket.h>
28 #include <sys/syscallsubr.h>
29 #include <sys/sysctl.h>
30 #include <sys/vnode.h>
31 #include <net/if.h>
32 #include <netinet/in.h>
33 
34 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
35 
36 SYSCTL_DECL(_security);
37 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
38     "Jail rules");
39 
40 mp_fixme("these variables need a lock")
41 
42 int	jail_set_hostname_allowed = 1;
43 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
44     &jail_set_hostname_allowed, 0,
45     "Processes in jail can set their hostnames");
46 
47 int	jail_socket_unixiproute_only = 1;
48 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
49     &jail_socket_unixiproute_only, 0,
50     "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
51 
52 int	jail_sysvipc_allowed = 0;
53 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
54     &jail_sysvipc_allowed, 0,
55     "Processes in jail can use System V IPC primitives");
56 
57 int	jail_getfsstatroot_only = 1;
58 SYSCTL_INT(_security_jail, OID_AUTO, getfsstate_getfsstatroot_only, CTLFLAG_RW,
59     &jail_getfsstatroot_only, 0,
60     "Processes see only their root file system in getfsstat()");
61 
62 int	jail_allow_raw_sockets = 0;
63 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
64     &jail_allow_raw_sockets, 0,
65     "Prison root can create raw sockets");
66 
67 /* allprison, lastprid, and prisoncount are protected by allprison_mtx. */
68 struct	prisonlist allprison;
69 struct	mtx allprison_mtx;
70 int	lastprid = 0;
71 int	prisoncount = 0;
72 
73 static void		 init_prison(void *);
74 static void		 prison_complete(void *context, int pending);
75 static struct prison	*prison_find(int);
76 static int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
77 
78 static void
79 init_prison(void *data __unused)
80 {
81 
82 	mtx_init(&allprison_mtx, "allprison", NULL, MTX_DEF);
83 	LIST_INIT(&allprison);
84 }
85 
86 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
87 
88 /*
89  * MPSAFE
90  *
91  * struct jail_args {
92  *	struct jail *jail;
93  * };
94  */
95 int
96 jail(struct thread *td, struct jail_args *uap)
97 {
98 	struct nameidata nd;
99 	struct prison *pr, *tpr;
100 	struct jail j;
101 	struct jail_attach_args jaa;
102 	int error, tryprid;
103 
104 	error = copyin(uap->jail, &j, sizeof(j));
105 	if (error)
106 		return (error);
107 	if (j.version != 0)
108 		return (EINVAL);
109 
110 	MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
111 	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
112 	pr->pr_ref = 1;
113 	error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
114 	if (error)
115 		goto e_killmtx;
116 	mtx_lock(&Giant);
117 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, pr->pr_path, td);
118 	error = namei(&nd);
119 	if (error) {
120 		mtx_unlock(&Giant);
121 		goto e_killmtx;
122 	}
123 	pr->pr_root = nd.ni_vp;
124 	VOP_UNLOCK(nd.ni_vp, 0, td);
125 	NDFREE(&nd, NDF_ONLY_PNBUF);
126 	mtx_unlock(&Giant);
127 	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
128 	if (error)
129 		goto e_dropvnref;
130 	pr->pr_ip = j.ip_number;
131 	pr->pr_linux = NULL;
132 	pr->pr_securelevel = securelevel;
133 
134 	/* Determine next pr_id and add prison to allprison list. */
135 	mtx_lock(&allprison_mtx);
136 	tryprid = lastprid + 1;
137 	if (tryprid == JAIL_MAX)
138 		tryprid = 1;
139 next:
140 	LIST_FOREACH(tpr, &allprison, pr_list) {
141 		if (tpr->pr_id == tryprid) {
142 			tryprid++;
143 			if (tryprid == JAIL_MAX) {
144 				mtx_unlock(&allprison_mtx);
145 				error = EAGAIN;
146 				goto e_dropvnref;
147 			}
148 			goto next;
149 		}
150 	}
151 	pr->pr_id = jaa.jid = lastprid = tryprid;
152 	LIST_INSERT_HEAD(&allprison, pr, pr_list);
153 	prisoncount++;
154 	mtx_unlock(&allprison_mtx);
155 
156 	error = jail_attach(td, &jaa);
157 	if (error)
158 		goto e_dropprref;
159 	mtx_lock(&pr->pr_mtx);
160 	pr->pr_ref--;
161 	mtx_unlock(&pr->pr_mtx);
162 	td->td_retval[0] = jaa.jid;
163 	return (0);
164 e_dropprref:
165 	mtx_lock(&allprison_mtx);
166 	LIST_REMOVE(pr, pr_list);
167 	prisoncount--;
168 	mtx_unlock(&allprison_mtx);
169 e_dropvnref:
170 	mtx_lock(&Giant);
171 	vrele(pr->pr_root);
172 	mtx_unlock(&Giant);
173 e_killmtx:
174 	mtx_destroy(&pr->pr_mtx);
175 	FREE(pr, M_PRISON);
176 	return (error);
177 }
178 
179 /*
180  * MPSAFE
181  *
182  * struct jail_attach_args {
183  *	int jid;
184  * };
185  */
186 int
187 jail_attach(struct thread *td, struct jail_attach_args *uap)
188 {
189 	struct proc *p;
190 	struct ucred *newcred, *oldcred;
191 	struct prison *pr;
192 	int error;
193 
194 	/*
195 	 * XXX: Note that there is a slight race here if two threads
196 	 * in the same privileged process attempt to attach to two
197 	 * different jails at the same time.  It is important for
198 	 * user processes not to do this, or they might end up with
199 	 * a process root from one prison, but attached to the jail
200 	 * of another.
201 	 */
202 	error = suser(td);
203 	if (error)
204 		return (error);
205 
206 	p = td->td_proc;
207 	mtx_lock(&allprison_mtx);
208 	pr = prison_find(uap->jid);
209 	if (pr == NULL) {
210 		mtx_unlock(&allprison_mtx);
211 		return (EINVAL);
212 	}
213 	pr->pr_ref++;
214 	mtx_unlock(&pr->pr_mtx);
215 	mtx_unlock(&allprison_mtx);
216 
217 	mtx_lock(&Giant);
218 	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
219 	if ((error = change_dir(pr->pr_root, td)) != 0)
220 		goto e_unlock;
221 #ifdef MAC
222 	if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
223 		goto e_unlock;
224 #endif
225 	VOP_UNLOCK(pr->pr_root, 0, td);
226 	change_root(pr->pr_root, td);
227 	mtx_unlock(&Giant);
228 
229 	newcred = crget();
230 	PROC_LOCK(p);
231 	oldcred = p->p_ucred;
232 	setsugid(p);
233 	crcopy(newcred, oldcred);
234 	newcred->cr_prison = pr;
235 	p->p_ucred = newcred;
236 	PROC_UNLOCK(p);
237 	crfree(oldcred);
238 	return (0);
239 e_unlock:
240 	VOP_UNLOCK(pr->pr_root, 0, td);
241 	mtx_unlock(&Giant);
242 	mtx_lock(&pr->pr_mtx);
243 	pr->pr_ref--;
244 	mtx_unlock(&pr->pr_mtx);
245 	return (error);
246 }
247 
248 /*
249  * Returns a locked prison instance, or NULL on failure.
250  */
251 static struct prison *
252 prison_find(int prid)
253 {
254 	struct prison *pr;
255 
256 	mtx_assert(&allprison_mtx, MA_OWNED);
257 	LIST_FOREACH(pr, &allprison, pr_list) {
258 		if (pr->pr_id == prid) {
259 			mtx_lock(&pr->pr_mtx);
260 			return (pr);
261 		}
262 	}
263 	return (NULL);
264 }
265 
266 void
267 prison_free(struct prison *pr)
268 {
269 
270 	mtx_lock(&allprison_mtx);
271 	mtx_lock(&pr->pr_mtx);
272 	pr->pr_ref--;
273 	if (pr->pr_ref == 0) {
274 		LIST_REMOVE(pr, pr_list);
275 		mtx_unlock(&pr->pr_mtx);
276 		prisoncount--;
277 		mtx_unlock(&allprison_mtx);
278 
279 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
280 		taskqueue_enqueue(taskqueue_swi, &pr->pr_task);
281 		return;
282 	}
283 	mtx_unlock(&pr->pr_mtx);
284 	mtx_unlock(&allprison_mtx);
285 }
286 
287 static void
288 prison_complete(void *context, int pending)
289 {
290 	struct prison *pr;
291 
292 	pr = (struct prison *)context;
293 
294 	mtx_lock(&Giant);
295 	vrele(pr->pr_root);
296 	mtx_unlock(&Giant);
297 
298 	mtx_destroy(&pr->pr_mtx);
299 	if (pr->pr_linux != NULL)
300 		FREE(pr->pr_linux, M_PRISON);
301 	FREE(pr, M_PRISON);
302 }
303 
304 void
305 prison_hold(struct prison *pr)
306 {
307 
308 	mtx_lock(&pr->pr_mtx);
309 	pr->pr_ref++;
310 	mtx_unlock(&pr->pr_mtx);
311 }
312 
313 u_int32_t
314 prison_getip(struct ucred *cred)
315 {
316 
317 	return (cred->cr_prison->pr_ip);
318 }
319 
320 int
321 prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
322 {
323 	u_int32_t tmp;
324 
325 	if (!jailed(cred))
326 		return (0);
327 	if (flag)
328 		tmp = *ip;
329 	else
330 		tmp = ntohl(*ip);
331 	if (tmp == INADDR_ANY) {
332 		if (flag)
333 			*ip = cred->cr_prison->pr_ip;
334 		else
335 			*ip = htonl(cred->cr_prison->pr_ip);
336 		return (0);
337 	}
338 	if (tmp == INADDR_LOOPBACK) {
339 		if (flag)
340 			*ip = cred->cr_prison->pr_ip;
341 		else
342 			*ip = htonl(cred->cr_prison->pr_ip);
343 		return (0);
344 	}
345 	if (cred->cr_prison->pr_ip != tmp)
346 		return (1);
347 	return (0);
348 }
349 
350 void
351 prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
352 {
353 	u_int32_t tmp;
354 
355 	if (!jailed(cred))
356 		return;
357 	if (flag)
358 		tmp = *ip;
359 	else
360 		tmp = ntohl(*ip);
361 	if (tmp == INADDR_LOOPBACK) {
362 		if (flag)
363 			*ip = cred->cr_prison->pr_ip;
364 		else
365 			*ip = htonl(cred->cr_prison->pr_ip);
366 		return;
367 	}
368 	return;
369 }
370 
371 int
372 prison_if(struct ucred *cred, struct sockaddr *sa)
373 {
374 	struct sockaddr_in *sai;
375 	int ok;
376 
377 	sai = (struct sockaddr_in *)sa;
378 	if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
379 		ok = 1;
380 	else if (sai->sin_family != AF_INET)
381 		ok = 0;
382 	else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
383 		ok = 1;
384 	else
385 		ok = 0;
386 	return (ok);
387 }
388 
389 /*
390  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
391  */
392 int
393 prison_check(struct ucred *cred1, struct ucred *cred2)
394 {
395 
396 	if (jailed(cred1)) {
397 		if (!jailed(cred2))
398 			return (ESRCH);
399 		if (cred2->cr_prison != cred1->cr_prison)
400 			return (ESRCH);
401 	}
402 
403 	return (0);
404 }
405 
406 /*
407  * Return 1 if the passed credential is in a jail, otherwise 0.
408  */
409 int
410 jailed(struct ucred *cred)
411 {
412 
413 	return (cred->cr_prison != NULL);
414 }
415 
416 /*
417  * Return the correct hostname for the passed credential.
418  */
419 void
420 getcredhostname(struct ucred *cred, char *buf, size_t size)
421 {
422 
423 	if (jailed(cred)) {
424 		mtx_lock(&cred->cr_prison->pr_mtx);
425 		strlcpy(buf, cred->cr_prison->pr_host, size);
426 		mtx_unlock(&cred->cr_prison->pr_mtx);
427 	} else
428 		strlcpy(buf, hostname, size);
429 }
430 
431 /*
432  * Return 1 if the passed credential can "see" the passed mountpoint
433  * when performing a getfsstat(); otherwise, 0.
434  */
435 int
436 prison_check_mount(struct ucred *cred, struct mount *mp)
437 {
438 
439 	if (jail_getfsstatroot_only && cred->cr_prison != NULL) {
440 		if (cred->cr_prison->pr_root->v_mount != mp)
441 			return (0);
442 	}
443 	return (1);
444 }
445 
446 static int
447 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
448 {
449 	struct xprison *xp, *sxp;
450 	struct prison *pr;
451 	int count, error;
452 
453 	mtx_assert(&Giant, MA_OWNED);
454 	if (jailed(req->td->td_ucred))
455 		return (0);
456 retry:
457 	mtx_lock(&allprison_mtx);
458 	count = prisoncount;
459 	mtx_unlock(&allprison_mtx);
460 
461 	if (count == 0)
462 		return (0);
463 
464 	sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
465 	mtx_lock(&allprison_mtx);
466 	if (count != prisoncount) {
467 		mtx_unlock(&allprison_mtx);
468 		free(sxp, M_TEMP);
469 		goto retry;
470 	}
471 
472 	LIST_FOREACH(pr, &allprison, pr_list) {
473 		mtx_lock(&pr->pr_mtx);
474 		xp->pr_version = XPRISON_VERSION;
475 		xp->pr_id = pr->pr_id;
476 		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
477 		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
478 		xp->pr_ip = pr->pr_ip;
479 		mtx_unlock(&pr->pr_mtx);
480 		xp++;
481 	}
482 	mtx_unlock(&allprison_mtx);
483 
484 	error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
485 	free(sxp, M_TEMP);
486 	if (error)
487 		return (error);
488 	return (0);
489 }
490 
491 SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
492     NULL, 0, sysctl_jail_list, "S", "List of active jails");
493 
494 static int
495 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
496 {
497 	int error, injail;
498 
499 	injail = jailed(req->td->td_ucred);
500 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
501 
502 	return (error);
503 }
504 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
505     NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
506