xref: /freebsd/sys/kern/kern_jail.c (revision 2110d913c0e2f570315c65b6b318ebf8eb660051)
19454b2d8SWarner Losh /*-
207901f22SPoul-Henning Kamp  * ----------------------------------------------------------------------------
307901f22SPoul-Henning Kamp  * "THE BEER-WARE LICENSE" (Revision 42):
407901f22SPoul-Henning Kamp  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
507901f22SPoul-Henning Kamp  * can do whatever you want with this stuff. If we meet some day, and you think
607901f22SPoul-Henning Kamp  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
707901f22SPoul-Henning Kamp  * ----------------------------------------------------------------------------
807901f22SPoul-Henning Kamp  */
975c13541SPoul-Henning Kamp 
10677b542eSDavid E. O'Brien #include <sys/cdefs.h>
11677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
12677b542eSDavid E. O'Brien 
1346e3b1cbSPawel Jakub Dawidek #include "opt_mac.h"
1446e3b1cbSPawel Jakub Dawidek 
1575c13541SPoul-Henning Kamp #include <sys/param.h>
1675c13541SPoul-Henning Kamp #include <sys/types.h>
1775c13541SPoul-Henning Kamp #include <sys/kernel.h>
1875c13541SPoul-Henning Kamp #include <sys/systm.h>
1975c13541SPoul-Henning Kamp #include <sys/errno.h>
2075c13541SPoul-Henning Kamp #include <sys/sysproto.h>
2175c13541SPoul-Henning Kamp #include <sys/malloc.h>
22800c9408SRobert Watson #include <sys/priv.h>
2375c13541SPoul-Henning Kamp #include <sys/proc.h>
24b3059e09SRobert Watson #include <sys/taskqueue.h>
2557b4252eSKonstantin Belousov #include <sys/fcntl.h>
2675c13541SPoul-Henning Kamp #include <sys/jail.h>
2701137630SRobert Watson #include <sys/lock.h>
2801137630SRobert Watson #include <sys/mutex.h>
29dc68a633SPawel Jakub Dawidek #include <sys/sx.h>
30fd7a8150SMike Barcroft #include <sys/namei.h>
31820a0de9SPawel Jakub Dawidek #include <sys/mount.h>
32fd7a8150SMike Barcroft #include <sys/queue.h>
3375c13541SPoul-Henning Kamp #include <sys/socket.h>
34fd7a8150SMike Barcroft #include <sys/syscallsubr.h>
3583f1e257SRobert Watson #include <sys/sysctl.h>
36fd7a8150SMike Barcroft #include <sys/vnode.h>
3775c13541SPoul-Henning Kamp #include <net/if.h>
3875c13541SPoul-Henning Kamp #include <netinet/in.h>
3975c13541SPoul-Henning Kamp 
40aed55708SRobert Watson #include <security/mac/mac_framework.h>
41aed55708SRobert Watson 
4275c13541SPoul-Henning Kamp MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
4375c13541SPoul-Henning Kamp 
44d0615c64SAndrew R. Reiter SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
4583f1e257SRobert Watson     "Jail rules");
4683f1e257SRobert Watson 
4783f1e257SRobert Watson int	jail_set_hostname_allowed = 1;
48d0615c64SAndrew R. Reiter SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
4983f1e257SRobert Watson     &jail_set_hostname_allowed, 0,
5083f1e257SRobert Watson     "Processes in jail can set their hostnames");
5183f1e257SRobert Watson 
527cadc266SRobert Watson int	jail_socket_unixiproute_only = 1;
53d0615c64SAndrew R. Reiter SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
547cadc266SRobert Watson     &jail_socket_unixiproute_only, 0,
557cadc266SRobert Watson     "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
567cadc266SRobert Watson 
57cb1f0db9SRobert Watson int	jail_sysvipc_allowed = 0;
58d0615c64SAndrew R. Reiter SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
59cb1f0db9SRobert Watson     &jail_sysvipc_allowed, 0,
60cb1f0db9SRobert Watson     "Processes in jail can use System V IPC primitives");
61cb1f0db9SRobert Watson 
62820a0de9SPawel Jakub Dawidek static int jail_enforce_statfs = 2;
63820a0de9SPawel Jakub Dawidek SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
64820a0de9SPawel Jakub Dawidek     &jail_enforce_statfs, 0,
65820a0de9SPawel Jakub Dawidek     "Processes in jail cannot see all mounted file systems");
66f08df373SRobert Watson 
675a59cefcSBosko Milekic int	jail_allow_raw_sockets = 0;
685a59cefcSBosko Milekic SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
695a59cefcSBosko Milekic     &jail_allow_raw_sockets, 0,
705a59cefcSBosko Milekic     "Prison root can create raw sockets");
715a59cefcSBosko Milekic 
7279653046SColin Percival int	jail_chflags_allowed = 0;
7379653046SColin Percival SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
7479653046SColin Percival     &jail_chflags_allowed, 0,
7579653046SColin Percival     "Processes in jail can alter system file flags");
7679653046SColin Percival 
77f3a8d2f9SPawel Jakub Dawidek int	jail_mount_allowed = 0;
78f3a8d2f9SPawel Jakub Dawidek SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
79f3a8d2f9SPawel Jakub Dawidek     &jail_mount_allowed, 0,
80f3a8d2f9SPawel Jakub Dawidek     "Processes in jail can mount/unmount jail-friendly file systems");
81f3a8d2f9SPawel Jakub Dawidek 
822110d913SXin LI /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
832110d913SXin LI struct	prisonlist allprison;
84dc68a633SPawel Jakub Dawidek struct	sx allprison_lock;
852110d913SXin LI int	lastprid = 0;
86fd7a8150SMike Barcroft int	prisoncount = 0;
87fd7a8150SMike Barcroft 
88dc68a633SPawel Jakub Dawidek /*
89dc68a633SPawel Jakub Dawidek  * List of jail services. Protected by allprison_lock.
90dc68a633SPawel Jakub Dawidek  */
91dc68a633SPawel Jakub Dawidek TAILQ_HEAD(prison_services_head, prison_service);
92dc68a633SPawel Jakub Dawidek static struct prison_services_head prison_services =
93dc68a633SPawel Jakub Dawidek     TAILQ_HEAD_INITIALIZER(prison_services);
94dc68a633SPawel Jakub Dawidek static int prison_service_slots = 0;
95dc68a633SPawel Jakub Dawidek 
96dc68a633SPawel Jakub Dawidek struct prison_service {
97dc68a633SPawel Jakub Dawidek 	prison_create_t ps_create;
98dc68a633SPawel Jakub Dawidek 	prison_destroy_t ps_destroy;
99dc68a633SPawel Jakub Dawidek 	int		ps_slotno;
100dc68a633SPawel Jakub Dawidek 	TAILQ_ENTRY(prison_service) ps_next;
101dc68a633SPawel Jakub Dawidek 	char	ps_name[0];
102dc68a633SPawel Jakub Dawidek };
103dc68a633SPawel Jakub Dawidek 
104fd7a8150SMike Barcroft static void		 init_prison(void *);
105b3059e09SRobert Watson static void		 prison_complete(void *context, int pending);
106fd7a8150SMike Barcroft static int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
107fd7a8150SMike Barcroft 
108fd7a8150SMike Barcroft static void
109fd7a8150SMike Barcroft init_prison(void *data __unused)
110fd7a8150SMike Barcroft {
111fd7a8150SMike Barcroft 
1122110d913SXin LI 	sx_init(&allprison_lock, "allprison");
1132110d913SXin LI 	LIST_INIT(&allprison);
114fd7a8150SMike Barcroft }
115fd7a8150SMike Barcroft 
116fd7a8150SMike Barcroft SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
117fd7a8150SMike Barcroft 
118116734c4SMatthew Dillon /*
1199ddb7954SMike Barcroft  * struct jail_args {
1209ddb7954SMike Barcroft  *	struct jail *jail;
1219ddb7954SMike Barcroft  * };
122116734c4SMatthew Dillon  */
12375c13541SPoul-Henning Kamp int
1249ddb7954SMike Barcroft jail(struct thread *td, struct jail_args *uap)
12575c13541SPoul-Henning Kamp {
126fd7a8150SMike Barcroft 	struct nameidata nd;
1272110d913SXin LI 	struct prison *pr, *tpr;
128dc68a633SPawel Jakub Dawidek 	struct prison_service *psrv;
12975c13541SPoul-Henning Kamp 	struct jail j;
130fd7a8150SMike Barcroft 	struct jail_attach_args jaa;
1312110d913SXin LI 	int vfslocked, error, tryprid;
13275c13541SPoul-Henning Kamp 
1339ddb7954SMike Barcroft 	error = copyin(uap->jail, &j, sizeof(j));
13475c13541SPoul-Henning Kamp 	if (error)
135a2f2b3afSJohn Baldwin 		return (error);
136a2f2b3afSJohn Baldwin 	if (j.version != 0)
137a2f2b3afSJohn Baldwin 		return (EINVAL);
138a2f2b3afSJohn Baldwin 
1399ddb7954SMike Barcroft 	MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
1406008862bSJohn Baldwin 	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
141fd7a8150SMike Barcroft 	pr->pr_ref = 1;
1429ddb7954SMike Barcroft 	error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
143fd7a8150SMike Barcroft 	if (error)
144fd7a8150SMike Barcroft 		goto e_killmtx;
145453f7d53SChristian S.J. Peron 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
146453f7d53SChristian S.J. Peron 	    pr->pr_path, td);
147fd7a8150SMike Barcroft 	error = namei(&nd);
148453f7d53SChristian S.J. Peron 	if (error)
149fd7a8150SMike Barcroft 		goto e_killmtx;
150453f7d53SChristian S.J. Peron 	vfslocked = NDHASGIANT(&nd);
151fd7a8150SMike Barcroft 	pr->pr_root = nd.ni_vp;
15222db15c0SAttilio Rao 	VOP_UNLOCK(nd.ni_vp, 0);
153fd7a8150SMike Barcroft 	NDFREE(&nd, NDF_ONLY_PNBUF);
154453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
1559ddb7954SMike Barcroft 	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
15675c13541SPoul-Henning Kamp 	if (error)
157fd7a8150SMike Barcroft 		goto e_dropvnref;
158a2f2b3afSJohn Baldwin 	pr->pr_ip = j.ip_number;
159fd7a8150SMike Barcroft 	pr->pr_linux = NULL;
160fd7a8150SMike Barcroft 	pr->pr_securelevel = securelevel;
161dc68a633SPawel Jakub Dawidek 	if (prison_service_slots == 0)
162dc68a633SPawel Jakub Dawidek 		pr->pr_slots = NULL;
163dc68a633SPawel Jakub Dawidek 	else {
164dc68a633SPawel Jakub Dawidek 		pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
165dc68a633SPawel Jakub Dawidek 		    M_PRISON, M_ZERO | M_WAITOK);
166dc68a633SPawel Jakub Dawidek 	}
167fd7a8150SMike Barcroft 
1682110d913SXin LI 	/* Determine next pr_id and add prison to allprison list. */
169dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
1702110d913SXin LI 	tryprid = lastprid + 1;
1712110d913SXin LI 	if (tryprid == JAIL_MAX)
1722110d913SXin LI 		tryprid = 1;
1732110d913SXin LI next:
1742110d913SXin LI 	LIST_FOREACH(tpr, &allprison, pr_list) {
1752110d913SXin LI 		if (tpr->pr_id == tryprid) {
1762110d913SXin LI 			tryprid++;
1772110d913SXin LI 			if (tryprid == JAIL_MAX) {
1782110d913SXin LI 				sx_xunlock(&allprison_lock);
1792110d913SXin LI 				error = EAGAIN;
1802110d913SXin LI 				goto e_dropvnref;
1812110d913SXin LI 			}
1822110d913SXin LI 			goto next;
1832110d913SXin LI 		}
1842110d913SXin LI 	}
1852110d913SXin LI 	pr->pr_id = jaa.jid = lastprid = tryprid;
186fd7a8150SMike Barcroft 	LIST_INSERT_HEAD(&allprison, pr, pr_list);
187fd7a8150SMike Barcroft 	prisoncount++;
188dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
189dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
190dc68a633SPawel Jakub Dawidek 		psrv->ps_create(psrv, pr);
191dc68a633SPawel Jakub Dawidek 	}
192dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
193fd7a8150SMike Barcroft 
194fd7a8150SMike Barcroft 	error = jail_attach(td, &jaa);
195a2f2b3afSJohn Baldwin 	if (error)
196fd7a8150SMike Barcroft 		goto e_dropprref;
197fd7a8150SMike Barcroft 	mtx_lock(&pr->pr_mtx);
198fd7a8150SMike Barcroft 	pr->pr_ref--;
199fd7a8150SMike Barcroft 	mtx_unlock(&pr->pr_mtx);
200fd7a8150SMike Barcroft 	td->td_retval[0] = jaa.jid;
20175c13541SPoul-Henning Kamp 	return (0);
202fd7a8150SMike Barcroft e_dropprref:
203dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
204fd7a8150SMike Barcroft 	LIST_REMOVE(pr, pr_list);
205fd7a8150SMike Barcroft 	prisoncount--;
206dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
207dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
208dc68a633SPawel Jakub Dawidek 		psrv->ps_destroy(psrv, pr);
209dc68a633SPawel Jakub Dawidek 	}
210dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
211fd7a8150SMike Barcroft e_dropvnref:
212453f7d53SChristian S.J. Peron 	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
213fd7a8150SMike Barcroft 	vrele(pr->pr_root);
214453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
215fd7a8150SMike Barcroft e_killmtx:
216894db7b0SMaxime Henrion 	mtx_destroy(&pr->pr_mtx);
21775c13541SPoul-Henning Kamp 	FREE(pr, M_PRISON);
21875c13541SPoul-Henning Kamp 	return (error);
21975c13541SPoul-Henning Kamp }
22075c13541SPoul-Henning Kamp 
221fd7a8150SMike Barcroft /*
2229ddb7954SMike Barcroft  * struct jail_attach_args {
2239ddb7954SMike Barcroft  *	int jid;
2249ddb7954SMike Barcroft  * };
225fd7a8150SMike Barcroft  */
226fd7a8150SMike Barcroft int
2279ddb7954SMike Barcroft jail_attach(struct thread *td, struct jail_attach_args *uap)
228fd7a8150SMike Barcroft {
229fd7a8150SMike Barcroft 	struct proc *p;
230fd7a8150SMike Barcroft 	struct ucred *newcred, *oldcred;
231fd7a8150SMike Barcroft 	struct prison *pr;
232453f7d53SChristian S.J. Peron 	int vfslocked, error;
233fd7a8150SMike Barcroft 
23457f22bd4SJacques Vidrine 	/*
23557f22bd4SJacques Vidrine 	 * XXX: Note that there is a slight race here if two threads
23657f22bd4SJacques Vidrine 	 * in the same privileged process attempt to attach to two
23757f22bd4SJacques Vidrine 	 * different jails at the same time.  It is important for
23857f22bd4SJacques Vidrine 	 * user processes not to do this, or they might end up with
23957f22bd4SJacques Vidrine 	 * a process root from one prison, but attached to the jail
24057f22bd4SJacques Vidrine 	 * of another.
24157f22bd4SJacques Vidrine 	 */
242800c9408SRobert Watson 	error = priv_check(td, PRIV_JAIL_ATTACH);
24357f22bd4SJacques Vidrine 	if (error)
24457f22bd4SJacques Vidrine 		return (error);
245fd7a8150SMike Barcroft 
24657f22bd4SJacques Vidrine 	p = td->td_proc;
247dc68a633SPawel Jakub Dawidek 	sx_slock(&allprison_lock);
248fd7a8150SMike Barcroft 	pr = prison_find(uap->jid);
249fd7a8150SMike Barcroft 	if (pr == NULL) {
250dc68a633SPawel Jakub Dawidek 		sx_sunlock(&allprison_lock);
251fd7a8150SMike Barcroft 		return (EINVAL);
252fd7a8150SMike Barcroft 	}
253fd7a8150SMike Barcroft 	pr->pr_ref++;
254fd7a8150SMike Barcroft 	mtx_unlock(&pr->pr_mtx);
255dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
256fd7a8150SMike Barcroft 
257453f7d53SChristian S.J. Peron 	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
258cb05b60aSAttilio Rao 	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
259fd7a8150SMike Barcroft 	if ((error = change_dir(pr->pr_root, td)) != 0)
260fd7a8150SMike Barcroft 		goto e_unlock;
261fd7a8150SMike Barcroft #ifdef MAC
26230d239bcSRobert Watson 	if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
263fd7a8150SMike Barcroft 		goto e_unlock;
264fd7a8150SMike Barcroft #endif
26522db15c0SAttilio Rao 	VOP_UNLOCK(pr->pr_root, 0);
266fd7a8150SMike Barcroft 	change_root(pr->pr_root, td);
267453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
268fd7a8150SMike Barcroft 
269fd7a8150SMike Barcroft 	newcred = crget();
270fd7a8150SMike Barcroft 	PROC_LOCK(p);
271fd7a8150SMike Barcroft 	oldcred = p->p_ucred;
272fd7a8150SMike Barcroft 	setsugid(p);
273fd7a8150SMike Barcroft 	crcopy(newcred, oldcred);
27469c4ee54SJohn Baldwin 	newcred->cr_prison = pr;
275fd7a8150SMike Barcroft 	p->p_ucred = newcred;
276fd7a8150SMike Barcroft 	PROC_UNLOCK(p);
277fd7a8150SMike Barcroft 	crfree(oldcred);
278fd7a8150SMike Barcroft 	return (0);
279fd7a8150SMike Barcroft e_unlock:
28022db15c0SAttilio Rao 	VOP_UNLOCK(pr->pr_root, 0);
281453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
282fd7a8150SMike Barcroft 	mtx_lock(&pr->pr_mtx);
283fd7a8150SMike Barcroft 	pr->pr_ref--;
284fd7a8150SMike Barcroft 	mtx_unlock(&pr->pr_mtx);
285fd7a8150SMike Barcroft 	return (error);
286fd7a8150SMike Barcroft }
287fd7a8150SMike Barcroft 
288fd7a8150SMike Barcroft /*
289fd7a8150SMike Barcroft  * Returns a locked prison instance, or NULL on failure.
290fd7a8150SMike Barcroft  */
29154b369c1SPawel Jakub Dawidek struct prison *
292fd7a8150SMike Barcroft prison_find(int prid)
293fd7a8150SMike Barcroft {
294fd7a8150SMike Barcroft 	struct prison *pr;
295fd7a8150SMike Barcroft 
296dc68a633SPawel Jakub Dawidek 	sx_assert(&allprison_lock, SX_LOCKED);
297fd7a8150SMike Barcroft 	LIST_FOREACH(pr, &allprison, pr_list) {
298fd7a8150SMike Barcroft 		if (pr->pr_id == prid) {
299fd7a8150SMike Barcroft 			mtx_lock(&pr->pr_mtx);
300c2cda609SPawel Jakub Dawidek 			if (pr->pr_ref == 0) {
301c2cda609SPawel Jakub Dawidek 				mtx_unlock(&pr->pr_mtx);
302c2cda609SPawel Jakub Dawidek 				break;
303c2cda609SPawel Jakub Dawidek 			}
304fd7a8150SMike Barcroft 			return (pr);
305fd7a8150SMike Barcroft 		}
306fd7a8150SMike Barcroft 	}
307fd7a8150SMike Barcroft 	return (NULL);
308fd7a8150SMike Barcroft }
309fd7a8150SMike Barcroft 
31091421ba2SRobert Watson void
31191421ba2SRobert Watson prison_free(struct prison *pr)
31291421ba2SRobert Watson {
31391421ba2SRobert Watson 
31401137630SRobert Watson 	mtx_lock(&pr->pr_mtx);
31591421ba2SRobert Watson 	pr->pr_ref--;
31691421ba2SRobert Watson 	if (pr->pr_ref == 0) {
31701137630SRobert Watson 		mtx_unlock(&pr->pr_mtx);
318c2cda609SPawel Jakub Dawidek 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
319c2cda609SPawel Jakub Dawidek 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
320c2cda609SPawel Jakub Dawidek 		return;
321c2cda609SPawel Jakub Dawidek 	}
322c2cda609SPawel Jakub Dawidek 	mtx_unlock(&pr->pr_mtx);
323c2cda609SPawel Jakub Dawidek }
324c2cda609SPawel Jakub Dawidek 
325c2cda609SPawel Jakub Dawidek static void
326c2cda609SPawel Jakub Dawidek prison_complete(void *context, int pending)
327c2cda609SPawel Jakub Dawidek {
328c2cda609SPawel Jakub Dawidek 	struct prison_service *psrv;
329c2cda609SPawel Jakub Dawidek 	struct prison *pr;
330c2cda609SPawel Jakub Dawidek 	int vfslocked;
331c2cda609SPawel Jakub Dawidek 
332c2cda609SPawel Jakub Dawidek 	pr = (struct prison *)context;
333c2cda609SPawel Jakub Dawidek 
334c2cda609SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
335264de85eSPawel Jakub Dawidek 	LIST_REMOVE(pr, pr_list);
336fd7a8150SMike Barcroft 	prisoncount--;
337dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
338dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
339dc68a633SPawel Jakub Dawidek 		psrv->ps_destroy(psrv, pr);
340dc68a633SPawel Jakub Dawidek 	}
341dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
342b3059e09SRobert Watson 
343453f7d53SChristian S.J. Peron 	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
344b3059e09SRobert Watson 	vrele(pr->pr_root);
345453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
346b3059e09SRobert Watson 
347b3059e09SRobert Watson 	mtx_destroy(&pr->pr_mtx);
348b3059e09SRobert Watson 	if (pr->pr_linux != NULL)
349b3059e09SRobert Watson 		FREE(pr->pr_linux, M_PRISON);
350b3059e09SRobert Watson 	FREE(pr, M_PRISON);
351b3059e09SRobert Watson }
352b3059e09SRobert Watson 
35391421ba2SRobert Watson void
35491421ba2SRobert Watson prison_hold(struct prison *pr)
35591421ba2SRobert Watson {
35691421ba2SRobert Watson 
35701137630SRobert Watson 	mtx_lock(&pr->pr_mtx);
358c2cda609SPawel Jakub Dawidek 	KASSERT(pr->pr_ref > 0,
359c2cda609SPawel Jakub Dawidek 	    ("Trying to hold dead prison (id=%d).", pr->pr_id));
36091421ba2SRobert Watson 	pr->pr_ref++;
36101137630SRobert Watson 	mtx_unlock(&pr->pr_mtx);
36201137630SRobert Watson }
36301137630SRobert Watson 
36401137630SRobert Watson u_int32_t
36501137630SRobert Watson prison_getip(struct ucred *cred)
36601137630SRobert Watson {
36701137630SRobert Watson 
36801137630SRobert Watson 	return (cred->cr_prison->pr_ip);
36991421ba2SRobert Watson }
37091421ba2SRobert Watson 
37175c13541SPoul-Henning Kamp int
37291421ba2SRobert Watson prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
37375c13541SPoul-Henning Kamp {
37475c13541SPoul-Henning Kamp 	u_int32_t tmp;
37575c13541SPoul-Henning Kamp 
37691421ba2SRobert Watson 	if (!jailed(cred))
37775c13541SPoul-Henning Kamp 		return (0);
37875c13541SPoul-Henning Kamp 	if (flag)
37975c13541SPoul-Henning Kamp 		tmp = *ip;
38075c13541SPoul-Henning Kamp 	else
38175c13541SPoul-Henning Kamp 		tmp = ntohl(*ip);
38275c13541SPoul-Henning Kamp 	if (tmp == INADDR_ANY) {
38375c13541SPoul-Henning Kamp 		if (flag)
38491421ba2SRobert Watson 			*ip = cred->cr_prison->pr_ip;
38575c13541SPoul-Henning Kamp 		else
38691421ba2SRobert Watson 			*ip = htonl(cred->cr_prison->pr_ip);
38775c13541SPoul-Henning Kamp 		return (0);
38875c13541SPoul-Henning Kamp 	}
389fd6aaf7fSRobert Watson 	if (tmp == INADDR_LOOPBACK) {
390fd6aaf7fSRobert Watson 		if (flag)
391fd6aaf7fSRobert Watson 			*ip = cred->cr_prison->pr_ip;
392fd6aaf7fSRobert Watson 		else
393fd6aaf7fSRobert Watson 			*ip = htonl(cred->cr_prison->pr_ip);
394fd6aaf7fSRobert Watson 		return (0);
395fd6aaf7fSRobert Watson 	}
39691421ba2SRobert Watson 	if (cred->cr_prison->pr_ip != tmp)
39775c13541SPoul-Henning Kamp 		return (1);
39875c13541SPoul-Henning Kamp 	return (0);
39975c13541SPoul-Henning Kamp }
40075c13541SPoul-Henning Kamp 
40175c13541SPoul-Henning Kamp void
40291421ba2SRobert Watson prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
40375c13541SPoul-Henning Kamp {
40475c13541SPoul-Henning Kamp 	u_int32_t tmp;
40575c13541SPoul-Henning Kamp 
40691421ba2SRobert Watson 	if (!jailed(cred))
40775c13541SPoul-Henning Kamp 		return;
40875c13541SPoul-Henning Kamp 	if (flag)
40975c13541SPoul-Henning Kamp 		tmp = *ip;
41075c13541SPoul-Henning Kamp 	else
41175c13541SPoul-Henning Kamp 		tmp = ntohl(*ip);
412fd6aaf7fSRobert Watson 	if (tmp == INADDR_LOOPBACK) {
41375c13541SPoul-Henning Kamp 		if (flag)
41491421ba2SRobert Watson 			*ip = cred->cr_prison->pr_ip;
41575c13541SPoul-Henning Kamp 		else
41691421ba2SRobert Watson 			*ip = htonl(cred->cr_prison->pr_ip);
41775c13541SPoul-Henning Kamp 		return;
41875c13541SPoul-Henning Kamp 	}
41975c13541SPoul-Henning Kamp 	return;
42075c13541SPoul-Henning Kamp }
42175c13541SPoul-Henning Kamp 
42275c13541SPoul-Henning Kamp int
42391421ba2SRobert Watson prison_if(struct ucred *cred, struct sockaddr *sa)
42475c13541SPoul-Henning Kamp {
4259ddb7954SMike Barcroft 	struct sockaddr_in *sai;
42675c13541SPoul-Henning Kamp 	int ok;
42775c13541SPoul-Henning Kamp 
4289ddb7954SMike Barcroft 	sai = (struct sockaddr_in *)sa;
4297cadc266SRobert Watson 	if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
4307cadc266SRobert Watson 		ok = 1;
4317cadc266SRobert Watson 	else if (sai->sin_family != AF_INET)
43275c13541SPoul-Henning Kamp 		ok = 0;
43391421ba2SRobert Watson 	else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
43475c13541SPoul-Henning Kamp 		ok = 1;
43575c13541SPoul-Henning Kamp 	else
43675c13541SPoul-Henning Kamp 		ok = 0;
43775c13541SPoul-Henning Kamp 	return (ok);
43875c13541SPoul-Henning Kamp }
43991421ba2SRobert Watson 
44091421ba2SRobert Watson /*
44191421ba2SRobert Watson  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
44291421ba2SRobert Watson  */
44391421ba2SRobert Watson int
4449ddb7954SMike Barcroft prison_check(struct ucred *cred1, struct ucred *cred2)
44591421ba2SRobert Watson {
44691421ba2SRobert Watson 
44791421ba2SRobert Watson 	if (jailed(cred1)) {
44891421ba2SRobert Watson 		if (!jailed(cred2))
44991421ba2SRobert Watson 			return (ESRCH);
45091421ba2SRobert Watson 		if (cred2->cr_prison != cred1->cr_prison)
45191421ba2SRobert Watson 			return (ESRCH);
45291421ba2SRobert Watson 	}
45391421ba2SRobert Watson 
45491421ba2SRobert Watson 	return (0);
45591421ba2SRobert Watson }
45691421ba2SRobert Watson 
45791421ba2SRobert Watson /*
45891421ba2SRobert Watson  * Return 1 if the passed credential is in a jail, otherwise 0.
45991421ba2SRobert Watson  */
46091421ba2SRobert Watson int
4619ddb7954SMike Barcroft jailed(struct ucred *cred)
46291421ba2SRobert Watson {
46391421ba2SRobert Watson 
46491421ba2SRobert Watson 	return (cred->cr_prison != NULL);
46591421ba2SRobert Watson }
4669484d0c0SRobert Drehmel 
4679484d0c0SRobert Drehmel /*
4689484d0c0SRobert Drehmel  * Return the correct hostname for the passed credential.
4699484d0c0SRobert Drehmel  */
470ad1ff099SRobert Drehmel void
4719ddb7954SMike Barcroft getcredhostname(struct ucred *cred, char *buf, size_t size)
4729484d0c0SRobert Drehmel {
4739484d0c0SRobert Drehmel 
474ad1ff099SRobert Drehmel 	if (jailed(cred)) {
475ad1ff099SRobert Drehmel 		mtx_lock(&cred->cr_prison->pr_mtx);
476e80fb434SRobert Drehmel 		strlcpy(buf, cred->cr_prison->pr_host, size);
477ad1ff099SRobert Drehmel 		mtx_unlock(&cred->cr_prison->pr_mtx);
4789ddb7954SMike Barcroft 	} else
479e80fb434SRobert Drehmel 		strlcpy(buf, hostname, size);
4809484d0c0SRobert Drehmel }
481fd7a8150SMike Barcroft 
482f08df373SRobert Watson /*
483820a0de9SPawel Jakub Dawidek  * Determine whether the subject represented by cred can "see"
484820a0de9SPawel Jakub Dawidek  * status of a mount point.
485820a0de9SPawel Jakub Dawidek  * Returns: 0 for permitted, ENOENT otherwise.
486820a0de9SPawel Jakub Dawidek  * XXX: This function should be called cr_canseemount() and should be
487820a0de9SPawel Jakub Dawidek  *      placed in kern_prot.c.
488f08df373SRobert Watson  */
489f08df373SRobert Watson int
490820a0de9SPawel Jakub Dawidek prison_canseemount(struct ucred *cred, struct mount *mp)
491f08df373SRobert Watson {
492820a0de9SPawel Jakub Dawidek 	struct prison *pr;
493820a0de9SPawel Jakub Dawidek 	struct statfs *sp;
494820a0de9SPawel Jakub Dawidek 	size_t len;
495f08df373SRobert Watson 
496820a0de9SPawel Jakub Dawidek 	if (!jailed(cred) || jail_enforce_statfs == 0)
497820a0de9SPawel Jakub Dawidek 		return (0);
498820a0de9SPawel Jakub Dawidek 	pr = cred->cr_prison;
499820a0de9SPawel Jakub Dawidek 	if (pr->pr_root->v_mount == mp)
500820a0de9SPawel Jakub Dawidek 		return (0);
501820a0de9SPawel Jakub Dawidek 	if (jail_enforce_statfs == 2)
502820a0de9SPawel Jakub Dawidek 		return (ENOENT);
503820a0de9SPawel Jakub Dawidek 	/*
504820a0de9SPawel Jakub Dawidek 	 * If jail's chroot directory is set to "/" we should be able to see
505820a0de9SPawel Jakub Dawidek 	 * all mount-points from inside a jail.
506820a0de9SPawel Jakub Dawidek 	 * This is ugly check, but this is the only situation when jail's
507820a0de9SPawel Jakub Dawidek 	 * directory ends with '/'.
508820a0de9SPawel Jakub Dawidek 	 */
509820a0de9SPawel Jakub Dawidek 	if (strcmp(pr->pr_path, "/") == 0)
510820a0de9SPawel Jakub Dawidek 		return (0);
511820a0de9SPawel Jakub Dawidek 	len = strlen(pr->pr_path);
512820a0de9SPawel Jakub Dawidek 	sp = &mp->mnt_stat;
513820a0de9SPawel Jakub Dawidek 	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
514820a0de9SPawel Jakub Dawidek 		return (ENOENT);
515820a0de9SPawel Jakub Dawidek 	/*
516820a0de9SPawel Jakub Dawidek 	 * Be sure that we don't have situation where jail's root directory
517820a0de9SPawel Jakub Dawidek 	 * is "/some/path" and mount point is "/some/pathpath".
518820a0de9SPawel Jakub Dawidek 	 */
519820a0de9SPawel Jakub Dawidek 	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
520820a0de9SPawel Jakub Dawidek 		return (ENOENT);
521f08df373SRobert Watson 	return (0);
522f08df373SRobert Watson }
523820a0de9SPawel Jakub Dawidek 
524820a0de9SPawel Jakub Dawidek void
525820a0de9SPawel Jakub Dawidek prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
526820a0de9SPawel Jakub Dawidek {
527820a0de9SPawel Jakub Dawidek 	char jpath[MAXPATHLEN];
528820a0de9SPawel Jakub Dawidek 	struct prison *pr;
529820a0de9SPawel Jakub Dawidek 	size_t len;
530820a0de9SPawel Jakub Dawidek 
531820a0de9SPawel Jakub Dawidek 	if (!jailed(cred) || jail_enforce_statfs == 0)
532820a0de9SPawel Jakub Dawidek 		return;
533820a0de9SPawel Jakub Dawidek 	pr = cred->cr_prison;
534820a0de9SPawel Jakub Dawidek 	if (prison_canseemount(cred, mp) != 0) {
535820a0de9SPawel Jakub Dawidek 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
536820a0de9SPawel Jakub Dawidek 		strlcpy(sp->f_mntonname, "[restricted]",
537820a0de9SPawel Jakub Dawidek 		    sizeof(sp->f_mntonname));
538820a0de9SPawel Jakub Dawidek 		return;
539820a0de9SPawel Jakub Dawidek 	}
540820a0de9SPawel Jakub Dawidek 	if (pr->pr_root->v_mount == mp) {
541820a0de9SPawel Jakub Dawidek 		/*
542820a0de9SPawel Jakub Dawidek 		 * Clear current buffer data, so we are sure nothing from
543820a0de9SPawel Jakub Dawidek 		 * the valid path left there.
544820a0de9SPawel Jakub Dawidek 		 */
545820a0de9SPawel Jakub Dawidek 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
546820a0de9SPawel Jakub Dawidek 		*sp->f_mntonname = '/';
547820a0de9SPawel Jakub Dawidek 		return;
548820a0de9SPawel Jakub Dawidek 	}
549820a0de9SPawel Jakub Dawidek 	/*
550820a0de9SPawel Jakub Dawidek 	 * If jail's chroot directory is set to "/" we should be able to see
551820a0de9SPawel Jakub Dawidek 	 * all mount-points from inside a jail.
552820a0de9SPawel Jakub Dawidek 	 */
553820a0de9SPawel Jakub Dawidek 	if (strcmp(pr->pr_path, "/") == 0)
554820a0de9SPawel Jakub Dawidek 		return;
555820a0de9SPawel Jakub Dawidek 	len = strlen(pr->pr_path);
556820a0de9SPawel Jakub Dawidek 	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
557820a0de9SPawel Jakub Dawidek 	/*
558820a0de9SPawel Jakub Dawidek 	 * Clear current buffer data, so we are sure nothing from
559820a0de9SPawel Jakub Dawidek 	 * the valid path left there.
560820a0de9SPawel Jakub Dawidek 	 */
561820a0de9SPawel Jakub Dawidek 	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
562820a0de9SPawel Jakub Dawidek 	if (*jpath == '\0') {
563820a0de9SPawel Jakub Dawidek 		/* Should never happen. */
564820a0de9SPawel Jakub Dawidek 		*sp->f_mntonname = '/';
565820a0de9SPawel Jakub Dawidek 	} else {
566820a0de9SPawel Jakub Dawidek 		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
567820a0de9SPawel Jakub Dawidek 	}
568f08df373SRobert Watson }
569f08df373SRobert Watson 
570800c9408SRobert Watson /*
571800c9408SRobert Watson  * Check with permission for a specific privilege is granted within jail.  We
572800c9408SRobert Watson  * have a specific list of accepted privileges; the rest are denied.
573800c9408SRobert Watson  */
574800c9408SRobert Watson int
575800c9408SRobert Watson prison_priv_check(struct ucred *cred, int priv)
576800c9408SRobert Watson {
577800c9408SRobert Watson 
578800c9408SRobert Watson 	if (!jailed(cred))
579800c9408SRobert Watson 		return (0);
580800c9408SRobert Watson 
581800c9408SRobert Watson 	switch (priv) {
582800c9408SRobert Watson 
583800c9408SRobert Watson 		/*
584800c9408SRobert Watson 		 * Allow ktrace privileges for root in jail.
585800c9408SRobert Watson 		 */
586800c9408SRobert Watson 	case PRIV_KTRACE:
587800c9408SRobert Watson 
588c3c1b5e6SRobert Watson #if 0
589800c9408SRobert Watson 		/*
590800c9408SRobert Watson 		 * Allow jailed processes to configure audit identity and
591800c9408SRobert Watson 		 * submit audit records (login, etc).  In the future we may
592800c9408SRobert Watson 		 * want to further refine the relationship between audit and
593800c9408SRobert Watson 		 * jail.
594800c9408SRobert Watson 		 */
595800c9408SRobert Watson 	case PRIV_AUDIT_GETAUDIT:
596800c9408SRobert Watson 	case PRIV_AUDIT_SETAUDIT:
597800c9408SRobert Watson 	case PRIV_AUDIT_SUBMIT:
598c3c1b5e6SRobert Watson #endif
599800c9408SRobert Watson 
600800c9408SRobert Watson 		/*
601800c9408SRobert Watson 		 * Allow jailed processes to manipulate process UNIX
602800c9408SRobert Watson 		 * credentials in any way they see fit.
603800c9408SRobert Watson 		 */
604800c9408SRobert Watson 	case PRIV_CRED_SETUID:
605800c9408SRobert Watson 	case PRIV_CRED_SETEUID:
606800c9408SRobert Watson 	case PRIV_CRED_SETGID:
607800c9408SRobert Watson 	case PRIV_CRED_SETEGID:
608800c9408SRobert Watson 	case PRIV_CRED_SETGROUPS:
609800c9408SRobert Watson 	case PRIV_CRED_SETREUID:
610800c9408SRobert Watson 	case PRIV_CRED_SETREGID:
611800c9408SRobert Watson 	case PRIV_CRED_SETRESUID:
612800c9408SRobert Watson 	case PRIV_CRED_SETRESGID:
613800c9408SRobert Watson 
614800c9408SRobert Watson 		/*
615800c9408SRobert Watson 		 * Jail implements visibility constraints already, so allow
616800c9408SRobert Watson 		 * jailed root to override uid/gid-based constraints.
617800c9408SRobert Watson 		 */
618800c9408SRobert Watson 	case PRIV_SEEOTHERGIDS:
619800c9408SRobert Watson 	case PRIV_SEEOTHERUIDS:
620800c9408SRobert Watson 
621800c9408SRobert Watson 		/*
622800c9408SRobert Watson 		 * Jail implements inter-process debugging limits already, so
623800c9408SRobert Watson 		 * allow jailed root various debugging privileges.
624800c9408SRobert Watson 		 */
625800c9408SRobert Watson 	case PRIV_DEBUG_DIFFCRED:
626800c9408SRobert Watson 	case PRIV_DEBUG_SUGID:
627800c9408SRobert Watson 	case PRIV_DEBUG_UNPRIV:
628800c9408SRobert Watson 
629800c9408SRobert Watson 		/*
630800c9408SRobert Watson 		 * Allow jail to set various resource limits and login
631800c9408SRobert Watson 		 * properties, and for now, exceed process resource limits.
632800c9408SRobert Watson 		 */
633800c9408SRobert Watson 	case PRIV_PROC_LIMIT:
634800c9408SRobert Watson 	case PRIV_PROC_SETLOGIN:
635800c9408SRobert Watson 	case PRIV_PROC_SETRLIMIT:
636800c9408SRobert Watson 
637800c9408SRobert Watson 		/*
638800c9408SRobert Watson 		 * System V and POSIX IPC privileges are granted in jail.
639800c9408SRobert Watson 		 */
640800c9408SRobert Watson 	case PRIV_IPC_READ:
641800c9408SRobert Watson 	case PRIV_IPC_WRITE:
642800c9408SRobert Watson 	case PRIV_IPC_ADMIN:
643800c9408SRobert Watson 	case PRIV_IPC_MSGSIZE:
644800c9408SRobert Watson 	case PRIV_MQ_ADMIN:
645800c9408SRobert Watson 
646800c9408SRobert Watson 		/*
647800c9408SRobert Watson 		 * Jail implements its own inter-process limits, so allow
648800c9408SRobert Watson 		 * root processes in jail to change scheduling on other
649800c9408SRobert Watson 		 * processes in the same jail.  Likewise for signalling.
650800c9408SRobert Watson 		 */
651800c9408SRobert Watson 	case PRIV_SCHED_DIFFCRED:
652800c9408SRobert Watson 	case PRIV_SIGNAL_DIFFCRED:
653800c9408SRobert Watson 	case PRIV_SIGNAL_SUGID:
654800c9408SRobert Watson 
655800c9408SRobert Watson 		/*
656800c9408SRobert Watson 		 * Allow jailed processes to write to sysctls marked as jail
657800c9408SRobert Watson 		 * writable.
658800c9408SRobert Watson 		 */
659800c9408SRobert Watson 	case PRIV_SYSCTL_WRITEJAIL:
660800c9408SRobert Watson 
661800c9408SRobert Watson 		/*
662800c9408SRobert Watson 		 * Allow root in jail to manage a variety of quota
663e82d0201SRobert Watson 		 * properties.  These should likely be conditional on a
664e82d0201SRobert Watson 		 * configuration option.
665800c9408SRobert Watson 		 */
66695b091d2SRobert Watson 	case PRIV_VFS_GETQUOTA:
66795b091d2SRobert Watson 	case PRIV_VFS_SETQUOTA:
668800c9408SRobert Watson 
669800c9408SRobert Watson 		/*
670800c9408SRobert Watson 		 * Since Jail relies on chroot() to implement file system
671800c9408SRobert Watson 		 * protections, grant many VFS privileges to root in jail.
672800c9408SRobert Watson 		 * Be careful to exclude mount-related and NFS-related
673800c9408SRobert Watson 		 * privileges.
674800c9408SRobert Watson 		 */
675800c9408SRobert Watson 	case PRIV_VFS_READ:
676800c9408SRobert Watson 	case PRIV_VFS_WRITE:
677800c9408SRobert Watson 	case PRIV_VFS_ADMIN:
678800c9408SRobert Watson 	case PRIV_VFS_EXEC:
679800c9408SRobert Watson 	case PRIV_VFS_LOOKUP:
680800c9408SRobert Watson 	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
681800c9408SRobert Watson 	case PRIV_VFS_CHFLAGS_DEV:
682800c9408SRobert Watson 	case PRIV_VFS_CHOWN:
683800c9408SRobert Watson 	case PRIV_VFS_CHROOT:
684bb531912SPawel Jakub Dawidek 	case PRIV_VFS_RETAINSUGID:
685800c9408SRobert Watson 	case PRIV_VFS_FCHROOT:
686800c9408SRobert Watson 	case PRIV_VFS_LINK:
687800c9408SRobert Watson 	case PRIV_VFS_SETGID:
688e41966dcSRobert Watson 	case PRIV_VFS_STAT:
689800c9408SRobert Watson 	case PRIV_VFS_STICKYFILE:
690800c9408SRobert Watson 		return (0);
691800c9408SRobert Watson 
692800c9408SRobert Watson 		/*
693800c9408SRobert Watson 		 * Depending on the global setting, allow privilege of
694800c9408SRobert Watson 		 * setting system flags.
695800c9408SRobert Watson 		 */
696800c9408SRobert Watson 	case PRIV_VFS_SYSFLAGS:
697800c9408SRobert Watson 		if (jail_chflags_allowed)
698800c9408SRobert Watson 			return (0);
699800c9408SRobert Watson 		else
700800c9408SRobert Watson 			return (EPERM);
701800c9408SRobert Watson 
702800c9408SRobert Watson 		/*
703f3a8d2f9SPawel Jakub Dawidek 		 * Depending on the global setting, allow privilege of
704f3a8d2f9SPawel Jakub Dawidek 		 * mounting/unmounting file systems.
705f3a8d2f9SPawel Jakub Dawidek 		 */
706f3a8d2f9SPawel Jakub Dawidek 	case PRIV_VFS_MOUNT:
707f3a8d2f9SPawel Jakub Dawidek 	case PRIV_VFS_UNMOUNT:
708f3a8d2f9SPawel Jakub Dawidek 	case PRIV_VFS_MOUNT_NONUSER:
70924b0502eSPawel Jakub Dawidek 	case PRIV_VFS_MOUNT_OWNER:
710f3a8d2f9SPawel Jakub Dawidek 		if (jail_mount_allowed)
711f3a8d2f9SPawel Jakub Dawidek 			return (0);
712f3a8d2f9SPawel Jakub Dawidek 		else
713f3a8d2f9SPawel Jakub Dawidek 			return (EPERM);
714f3a8d2f9SPawel Jakub Dawidek 
715f3a8d2f9SPawel Jakub Dawidek 		/*
7164b084056SRobert Watson 		 * Allow jailed root to bind reserved ports and reuse in-use
7174b084056SRobert Watson 		 * ports.
718800c9408SRobert Watson 		 */
719800c9408SRobert Watson 	case PRIV_NETINET_RESERVEDPORT:
7204b084056SRobert Watson 	case PRIV_NETINET_REUSEPORT:
721800c9408SRobert Watson 		return (0);
722800c9408SRobert Watson 
723800c9408SRobert Watson 		/*
72479ba3952SBjoern A. Zeeb 		 * Allow jailed root to set certian IPv4/6 (option) headers.
72579ba3952SBjoern A. Zeeb 		 */
72679ba3952SBjoern A. Zeeb 	case PRIV_NETINET_SETHDROPTS:
72779ba3952SBjoern A. Zeeb 		return (0);
72879ba3952SBjoern A. Zeeb 
72979ba3952SBjoern A. Zeeb 		/*
730800c9408SRobert Watson 		 * Conditionally allow creating raw sockets in jail.
731800c9408SRobert Watson 		 */
732800c9408SRobert Watson 	case PRIV_NETINET_RAW:
733800c9408SRobert Watson 		if (jail_allow_raw_sockets)
734800c9408SRobert Watson 			return (0);
735800c9408SRobert Watson 		else
736800c9408SRobert Watson 			return (EPERM);
737800c9408SRobert Watson 
738800c9408SRobert Watson 		/*
739800c9408SRobert Watson 		 * Since jail implements its own visibility limits on netstat
740800c9408SRobert Watson 		 * sysctls, allow getcred.  This allows identd to work in
741800c9408SRobert Watson 		 * jail.
742800c9408SRobert Watson 		 */
743800c9408SRobert Watson 	case PRIV_NETINET_GETCRED:
744800c9408SRobert Watson 		return (0);
745800c9408SRobert Watson 
746800c9408SRobert Watson 	default:
747800c9408SRobert Watson 		/*
748800c9408SRobert Watson 		 * In all remaining cases, deny the privilege request.  This
749800c9408SRobert Watson 		 * includes almost all network privileges, many system
750800c9408SRobert Watson 		 * configuration privileges.
751800c9408SRobert Watson 		 */
752800c9408SRobert Watson 		return (EPERM);
753800c9408SRobert Watson 	}
754800c9408SRobert Watson }
755800c9408SRobert Watson 
756dc68a633SPawel Jakub Dawidek /*
757dc68a633SPawel Jakub Dawidek  * Register jail service. Provides 'create' and 'destroy' methods.
758dc68a633SPawel Jakub Dawidek  * 'create' method will be called for every existing jail and all
759dc68a633SPawel Jakub Dawidek  * jails in the future as they beeing created.
760dc68a633SPawel Jakub Dawidek  * 'destroy' method will be called for every jail going away and
761dc68a633SPawel Jakub Dawidek  * for all existing jails at the time of service deregistration.
762dc68a633SPawel Jakub Dawidek  */
763dc68a633SPawel Jakub Dawidek struct prison_service *
764dc68a633SPawel Jakub Dawidek prison_service_register(const char *name, prison_create_t create,
765dc68a633SPawel Jakub Dawidek     prison_destroy_t destroy)
766dc68a633SPawel Jakub Dawidek {
767dc68a633SPawel Jakub Dawidek 	struct prison_service *psrv, *psrv2;
768dc68a633SPawel Jakub Dawidek 	struct prison *pr;
769dc68a633SPawel Jakub Dawidek 	int reallocate = 1, slotno = 0;
770dc68a633SPawel Jakub Dawidek 	void **slots, **oldslots;
771dc68a633SPawel Jakub Dawidek 
772dc68a633SPawel Jakub Dawidek 	psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
773dc68a633SPawel Jakub Dawidek 	    M_WAITOK | M_ZERO);
774dc68a633SPawel Jakub Dawidek 	psrv->ps_create = create;
775dc68a633SPawel Jakub Dawidek 	psrv->ps_destroy = destroy;
776dc68a633SPawel Jakub Dawidek 	strcpy(psrv->ps_name, name);
777dc68a633SPawel Jakub Dawidek 	/*
778dc68a633SPawel Jakub Dawidek 	 * Grab the allprison_lock here, so we won't miss any jail
779dc68a633SPawel Jakub Dawidek 	 * creation/destruction.
780dc68a633SPawel Jakub Dawidek 	 */
781dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
782dc68a633SPawel Jakub Dawidek #ifdef INVARIANTS
783dc68a633SPawel Jakub Dawidek 	/*
784dc68a633SPawel Jakub Dawidek 	 * Verify if service is not already registered.
785dc68a633SPawel Jakub Dawidek 	 */
786dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
787dc68a633SPawel Jakub Dawidek 		KASSERT(strcmp(psrv2->ps_name, name) != 0,
788dc68a633SPawel Jakub Dawidek 		    ("jail service %s already registered", name));
789dc68a633SPawel Jakub Dawidek 	}
790dc68a633SPawel Jakub Dawidek #endif
791dc68a633SPawel Jakub Dawidek 	/*
792dc68a633SPawel Jakub Dawidek 	 * Find free slot. When there is no existing free slot available,
793dc68a633SPawel Jakub Dawidek 	 * allocate one at the end.
794dc68a633SPawel Jakub Dawidek 	 */
795dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
796dc68a633SPawel Jakub Dawidek 		if (psrv2->ps_slotno != slotno) {
797dc68a633SPawel Jakub Dawidek 			KASSERT(slotno < psrv2->ps_slotno,
798dc68a633SPawel Jakub Dawidek 			    ("Invalid slotno (slotno=%d >= ps_slotno=%d",
799dc68a633SPawel Jakub Dawidek 			    slotno, psrv2->ps_slotno));
800dc68a633SPawel Jakub Dawidek 			/* We found free slot. */
801dc68a633SPawel Jakub Dawidek 			reallocate = 0;
802dc68a633SPawel Jakub Dawidek 			break;
803dc68a633SPawel Jakub Dawidek 		}
804dc68a633SPawel Jakub Dawidek 		slotno++;
805dc68a633SPawel Jakub Dawidek 	}
806dc68a633SPawel Jakub Dawidek 	psrv->ps_slotno = slotno;
807dc68a633SPawel Jakub Dawidek 	/*
808dc68a633SPawel Jakub Dawidek 	 * Keep the list sorted by slot number.
809dc68a633SPawel Jakub Dawidek 	 */
810dc68a633SPawel Jakub Dawidek 	if (psrv2 != NULL) {
811dc68a633SPawel Jakub Dawidek 		KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
812dc68a633SPawel Jakub Dawidek 		TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
813dc68a633SPawel Jakub Dawidek 	} else {
814dc68a633SPawel Jakub Dawidek 		KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
815dc68a633SPawel Jakub Dawidek 		TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
816dc68a633SPawel Jakub Dawidek 	}
817dc68a633SPawel Jakub Dawidek 	prison_service_slots++;
818dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
819dc68a633SPawel Jakub Dawidek 	/*
820dc68a633SPawel Jakub Dawidek 	 * Allocate memory for new slot if we didn't found empty one.
821dc68a633SPawel Jakub Dawidek 	 * Do not use realloc(9), because pr_slots is protected with a mutex,
822dc68a633SPawel Jakub Dawidek 	 * so we can't sleep.
823dc68a633SPawel Jakub Dawidek 	 */
824dc68a633SPawel Jakub Dawidek 	LIST_FOREACH(pr, &allprison, pr_list) {
825dc68a633SPawel Jakub Dawidek 		if (reallocate) {
826dc68a633SPawel Jakub Dawidek 			/* First allocate memory with M_WAITOK. */
827dc68a633SPawel Jakub Dawidek 			slots = malloc(sizeof(*slots) * prison_service_slots,
828dc68a633SPawel Jakub Dawidek 			    M_PRISON, M_WAITOK);
829dc68a633SPawel Jakub Dawidek 			/* Now grab the mutex and replace pr_slots. */
830dc68a633SPawel Jakub Dawidek 			mtx_lock(&pr->pr_mtx);
831dc68a633SPawel Jakub Dawidek 			oldslots = pr->pr_slots;
832dc68a633SPawel Jakub Dawidek 			if (psrv->ps_slotno > 0) {
833dc68a633SPawel Jakub Dawidek 				bcopy(oldslots, slots,
834dc68a633SPawel Jakub Dawidek 				    sizeof(*slots) * (prison_service_slots - 1));
835dc68a633SPawel Jakub Dawidek 			}
836dc68a633SPawel Jakub Dawidek 			slots[psrv->ps_slotno] = NULL;
837dc68a633SPawel Jakub Dawidek 			pr->pr_slots = slots;
838dc68a633SPawel Jakub Dawidek 			mtx_unlock(&pr->pr_mtx);
839dc68a633SPawel Jakub Dawidek 			if (oldslots != NULL)
840dc68a633SPawel Jakub Dawidek 				free(oldslots, M_PRISON);
841dc68a633SPawel Jakub Dawidek 		}
842dc68a633SPawel Jakub Dawidek 		/*
843dc68a633SPawel Jakub Dawidek 		 * Call 'create' method for each existing jail.
844dc68a633SPawel Jakub Dawidek 		 */
845dc68a633SPawel Jakub Dawidek 		psrv->ps_create(psrv, pr);
846dc68a633SPawel Jakub Dawidek 	}
847dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
848dc68a633SPawel Jakub Dawidek 
849dc68a633SPawel Jakub Dawidek 	return (psrv);
850dc68a633SPawel Jakub Dawidek }
851dc68a633SPawel Jakub Dawidek 
852dc68a633SPawel Jakub Dawidek void
853dc68a633SPawel Jakub Dawidek prison_service_deregister(struct prison_service *psrv)
854dc68a633SPawel Jakub Dawidek {
855dc68a633SPawel Jakub Dawidek 	struct prison *pr;
856dc68a633SPawel Jakub Dawidek 	void **slots, **oldslots;
857dc68a633SPawel Jakub Dawidek 	int last = 0;
858dc68a633SPawel Jakub Dawidek 
859dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
860dc68a633SPawel Jakub Dawidek 	if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
861dc68a633SPawel Jakub Dawidek 		last = 1;
862dc68a633SPawel Jakub Dawidek 	TAILQ_REMOVE(&prison_services, psrv, ps_next);
863dc68a633SPawel Jakub Dawidek 	prison_service_slots--;
864dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
865dc68a633SPawel Jakub Dawidek 	LIST_FOREACH(pr, &allprison, pr_list) {
866dc68a633SPawel Jakub Dawidek 		/*
867dc68a633SPawel Jakub Dawidek 		 * Call 'destroy' method for every currently existing jail.
868dc68a633SPawel Jakub Dawidek 		 */
869dc68a633SPawel Jakub Dawidek 		psrv->ps_destroy(psrv, pr);
870dc68a633SPawel Jakub Dawidek 		/*
871dc68a633SPawel Jakub Dawidek 		 * If this is the last slot, free the memory allocated for it.
872dc68a633SPawel Jakub Dawidek 		 */
873dc68a633SPawel Jakub Dawidek 		if (last) {
874dc68a633SPawel Jakub Dawidek 			if (prison_service_slots == 0)
875dc68a633SPawel Jakub Dawidek 				slots = NULL;
876dc68a633SPawel Jakub Dawidek 			else {
877dc68a633SPawel Jakub Dawidek 				slots = malloc(sizeof(*slots) * prison_service_slots,
878dc68a633SPawel Jakub Dawidek 				    M_PRISON, M_WAITOK);
879dc68a633SPawel Jakub Dawidek 			}
880dc68a633SPawel Jakub Dawidek 			mtx_lock(&pr->pr_mtx);
881dc68a633SPawel Jakub Dawidek 			oldslots = pr->pr_slots;
882dc68a633SPawel Jakub Dawidek 			/*
883dc68a633SPawel Jakub Dawidek 			 * We require setting slot to NULL after freeing it,
884dc68a633SPawel Jakub Dawidek 			 * this way we can check for memory leaks here.
885dc68a633SPawel Jakub Dawidek 			 */
886dc68a633SPawel Jakub Dawidek 			KASSERT(oldslots[psrv->ps_slotno] == NULL,
887dc68a633SPawel Jakub Dawidek 			    ("Slot %d (service %s, jailid=%d) still contains data?",
888dc68a633SPawel Jakub Dawidek 			     psrv->ps_slotno, psrv->ps_name, pr->pr_id));
889dc68a633SPawel Jakub Dawidek 			if (psrv->ps_slotno > 0) {
890dc68a633SPawel Jakub Dawidek 				bcopy(oldslots, slots,
891dc68a633SPawel Jakub Dawidek 				    sizeof(*slots) * prison_service_slots);
892dc68a633SPawel Jakub Dawidek 			}
893dc68a633SPawel Jakub Dawidek 			pr->pr_slots = slots;
894dc68a633SPawel Jakub Dawidek 			mtx_unlock(&pr->pr_mtx);
895dc68a633SPawel Jakub Dawidek 			KASSERT(oldslots != NULL, ("oldslots == NULL"));
896dc68a633SPawel Jakub Dawidek 			free(oldslots, M_PRISON);
897dc68a633SPawel Jakub Dawidek 		}
898dc68a633SPawel Jakub Dawidek 	}
899dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
900dc68a633SPawel Jakub Dawidek 	free(psrv, M_PRISON);
901dc68a633SPawel Jakub Dawidek }
902dc68a633SPawel Jakub Dawidek 
903dc68a633SPawel Jakub Dawidek /*
904dc68a633SPawel Jakub Dawidek  * Function sets data for the given jail in slot assigned for the given
905dc68a633SPawel Jakub Dawidek  * jail service.
906dc68a633SPawel Jakub Dawidek  */
907dc68a633SPawel Jakub Dawidek void
908dc68a633SPawel Jakub Dawidek prison_service_data_set(struct prison_service *psrv, struct prison *pr,
909dc68a633SPawel Jakub Dawidek     void *data)
910dc68a633SPawel Jakub Dawidek {
911dc68a633SPawel Jakub Dawidek 
912dc68a633SPawel Jakub Dawidek 	mtx_assert(&pr->pr_mtx, MA_OWNED);
913dc68a633SPawel Jakub Dawidek 	pr->pr_slots[psrv->ps_slotno] = data;
914dc68a633SPawel Jakub Dawidek }
915dc68a633SPawel Jakub Dawidek 
916dc68a633SPawel Jakub Dawidek /*
917dc68a633SPawel Jakub Dawidek  * Function clears slots assigned for the given jail service in the given
918dc68a633SPawel Jakub Dawidek  * prison structure and returns current slot data.
919dc68a633SPawel Jakub Dawidek  */
920dc68a633SPawel Jakub Dawidek void *
921dc68a633SPawel Jakub Dawidek prison_service_data_del(struct prison_service *psrv, struct prison *pr)
922dc68a633SPawel Jakub Dawidek {
923dc68a633SPawel Jakub Dawidek 	void *data;
924dc68a633SPawel Jakub Dawidek 
925dc68a633SPawel Jakub Dawidek 	mtx_assert(&pr->pr_mtx, MA_OWNED);
926dc68a633SPawel Jakub Dawidek 	data = pr->pr_slots[psrv->ps_slotno];
927dc68a633SPawel Jakub Dawidek 	pr->pr_slots[psrv->ps_slotno] = NULL;
928dc68a633SPawel Jakub Dawidek 	return (data);
929dc68a633SPawel Jakub Dawidek }
930dc68a633SPawel Jakub Dawidek 
931dc68a633SPawel Jakub Dawidek /*
932dc68a633SPawel Jakub Dawidek  * Function returns current data from the slot assigned to the given jail
933dc68a633SPawel Jakub Dawidek  * service for the given jail.
934dc68a633SPawel Jakub Dawidek  */
935dc68a633SPawel Jakub Dawidek void *
936dc68a633SPawel Jakub Dawidek prison_service_data_get(struct prison_service *psrv, struct prison *pr)
937dc68a633SPawel Jakub Dawidek {
938dc68a633SPawel Jakub Dawidek 
939dc68a633SPawel Jakub Dawidek 	mtx_assert(&pr->pr_mtx, MA_OWNED);
940dc68a633SPawel Jakub Dawidek 	return (pr->pr_slots[psrv->ps_slotno]);
941dc68a633SPawel Jakub Dawidek }
942dc68a633SPawel Jakub Dawidek 
943fd7a8150SMike Barcroft static int
944fd7a8150SMike Barcroft sysctl_jail_list(SYSCTL_HANDLER_ARGS)
945fd7a8150SMike Barcroft {
946fd7a8150SMike Barcroft 	struct xprison *xp, *sxp;
947fd7a8150SMike Barcroft 	struct prison *pr;
948fd7a8150SMike Barcroft 	int count, error;
949fd7a8150SMike Barcroft 
9507f4704c0SPawel Jakub Dawidek 	if (jailed(req->td->td_ucred))
951679a1060SRobert Watson 		return (0);
952fd7a8150SMike Barcroft 
953dc68a633SPawel Jakub Dawidek 	sx_slock(&allprison_lock);
954dc68a633SPawel Jakub Dawidek 	if ((count = prisoncount) == 0) {
955dc68a633SPawel Jakub Dawidek 		sx_sunlock(&allprison_lock);
956fd7a8150SMike Barcroft 		return (0);
957dc68a633SPawel Jakub Dawidek 	}
958fd7a8150SMike Barcroft 
959fd7a8150SMike Barcroft 	sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
960fd7a8150SMike Barcroft 
961fd7a8150SMike Barcroft 	LIST_FOREACH(pr, &allprison, pr_list) {
962fd7a8150SMike Barcroft 		xp->pr_version = XPRISON_VERSION;
963fd7a8150SMike Barcroft 		xp->pr_id = pr->pr_id;
964fd7a8150SMike Barcroft 		xp->pr_ip = pr->pr_ip;
965b63b0c65SPawel Jakub Dawidek 		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
966b63b0c65SPawel Jakub Dawidek 		mtx_lock(&pr->pr_mtx);
967b63b0c65SPawel Jakub Dawidek 		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
968fd7a8150SMike Barcroft 		mtx_unlock(&pr->pr_mtx);
969fd7a8150SMike Barcroft 		xp++;
970fd7a8150SMike Barcroft 	}
971dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
972fd7a8150SMike Barcroft 
973fd7a8150SMike Barcroft 	error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
974fd7a8150SMike Barcroft 	free(sxp, M_TEMP);
975fd7a8150SMike Barcroft 	return (error);
976fd7a8150SMike Barcroft }
977fd7a8150SMike Barcroft 
978fd7a8150SMike Barcroft SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
979fd7a8150SMike Barcroft     NULL, 0, sysctl_jail_list, "S", "List of active jails");
980461167c2SPawel Jakub Dawidek 
981461167c2SPawel Jakub Dawidek static int
982461167c2SPawel Jakub Dawidek sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
983461167c2SPawel Jakub Dawidek {
984461167c2SPawel Jakub Dawidek 	int error, injail;
985461167c2SPawel Jakub Dawidek 
986461167c2SPawel Jakub Dawidek 	injail = jailed(req->td->td_ucred);
987461167c2SPawel Jakub Dawidek 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
988461167c2SPawel Jakub Dawidek 
989461167c2SPawel Jakub Dawidek 	return (error);
990461167c2SPawel Jakub Dawidek }
991461167c2SPawel Jakub Dawidek SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
992461167c2SPawel Jakub Dawidek     NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
993