xref: /freebsd/sys/kern/kern_jail.c (revision 24b0502ee0b395bc84f1cb8f6805112ffc417882)
19454b2d8SWarner Losh /*-
207901f22SPoul-Henning Kamp  * ----------------------------------------------------------------------------
307901f22SPoul-Henning Kamp  * "THE BEER-WARE LICENSE" (Revision 42):
407901f22SPoul-Henning Kamp  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
507901f22SPoul-Henning Kamp  * can do whatever you want with this stuff. If we meet some day, and you think
607901f22SPoul-Henning Kamp  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
707901f22SPoul-Henning Kamp  * ----------------------------------------------------------------------------
807901f22SPoul-Henning Kamp  */
975c13541SPoul-Henning Kamp 
10677b542eSDavid E. O'Brien #include <sys/cdefs.h>
11677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
12677b542eSDavid E. O'Brien 
1346e3b1cbSPawel Jakub Dawidek #include "opt_mac.h"
1446e3b1cbSPawel Jakub Dawidek 
1575c13541SPoul-Henning Kamp #include <sys/param.h>
1675c13541SPoul-Henning Kamp #include <sys/types.h>
1775c13541SPoul-Henning Kamp #include <sys/kernel.h>
1875c13541SPoul-Henning Kamp #include <sys/systm.h>
1975c13541SPoul-Henning Kamp #include <sys/errno.h>
2075c13541SPoul-Henning Kamp #include <sys/sysproto.h>
2175c13541SPoul-Henning Kamp #include <sys/malloc.h>
22800c9408SRobert Watson #include <sys/priv.h>
2375c13541SPoul-Henning Kamp #include <sys/proc.h>
24b3059e09SRobert Watson #include <sys/taskqueue.h>
2575c13541SPoul-Henning Kamp #include <sys/jail.h>
2601137630SRobert Watson #include <sys/lock.h>
2701137630SRobert Watson #include <sys/mutex.h>
28dc68a633SPawel Jakub Dawidek #include <sys/sx.h>
29fd7a8150SMike Barcroft #include <sys/namei.h>
30820a0de9SPawel Jakub Dawidek #include <sys/mount.h>
31fd7a8150SMike Barcroft #include <sys/queue.h>
3275c13541SPoul-Henning Kamp #include <sys/socket.h>
33fd7a8150SMike Barcroft #include <sys/syscallsubr.h>
3483f1e257SRobert Watson #include <sys/sysctl.h>
35fd7a8150SMike Barcroft #include <sys/vnode.h>
3675c13541SPoul-Henning Kamp #include <net/if.h>
3775c13541SPoul-Henning Kamp #include <netinet/in.h>
3875c13541SPoul-Henning Kamp 
39aed55708SRobert Watson #include <security/mac/mac_framework.h>
40aed55708SRobert Watson 
4175c13541SPoul-Henning Kamp MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
4275c13541SPoul-Henning Kamp 
43d0615c64SAndrew R. Reiter SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0,
4483f1e257SRobert Watson     "Jail rules");
4583f1e257SRobert Watson 
4683f1e257SRobert Watson int	jail_set_hostname_allowed = 1;
47d0615c64SAndrew R. Reiter SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
4883f1e257SRobert Watson     &jail_set_hostname_allowed, 0,
4983f1e257SRobert Watson     "Processes in jail can set their hostnames");
5083f1e257SRobert Watson 
517cadc266SRobert Watson int	jail_socket_unixiproute_only = 1;
52d0615c64SAndrew R. Reiter SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
537cadc266SRobert Watson     &jail_socket_unixiproute_only, 0,
547cadc266SRobert Watson     "Processes in jail are limited to creating UNIX/IPv4/route sockets only");
557cadc266SRobert Watson 
56cb1f0db9SRobert Watson int	jail_sysvipc_allowed = 0;
57d0615c64SAndrew R. Reiter SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
58cb1f0db9SRobert Watson     &jail_sysvipc_allowed, 0,
59cb1f0db9SRobert Watson     "Processes in jail can use System V IPC primitives");
60cb1f0db9SRobert Watson 
61820a0de9SPawel Jakub Dawidek static int jail_enforce_statfs = 2;
62820a0de9SPawel Jakub Dawidek SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW,
63820a0de9SPawel Jakub Dawidek     &jail_enforce_statfs, 0,
64820a0de9SPawel Jakub Dawidek     "Processes in jail cannot see all mounted file systems");
65f08df373SRobert Watson 
665a59cefcSBosko Milekic int	jail_allow_raw_sockets = 0;
675a59cefcSBosko Milekic SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
685a59cefcSBosko Milekic     &jail_allow_raw_sockets, 0,
695a59cefcSBosko Milekic     "Prison root can create raw sockets");
705a59cefcSBosko Milekic 
7179653046SColin Percival int	jail_chflags_allowed = 0;
7279653046SColin Percival SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
7379653046SColin Percival     &jail_chflags_allowed, 0,
7479653046SColin Percival     "Processes in jail can alter system file flags");
7579653046SColin Percival 
76f3a8d2f9SPawel Jakub Dawidek int	jail_mount_allowed = 0;
77f3a8d2f9SPawel Jakub Dawidek SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW,
78f3a8d2f9SPawel Jakub Dawidek     &jail_mount_allowed, 0,
79f3a8d2f9SPawel Jakub Dawidek     "Processes in jail can mount/unmount jail-friendly file systems");
80f3a8d2f9SPawel Jakub Dawidek 
81dc68a633SPawel Jakub Dawidek /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
82fd7a8150SMike Barcroft struct	prisonlist allprison;
83dc68a633SPawel Jakub Dawidek struct	sx allprison_lock;
84fd7a8150SMike Barcroft int	lastprid = 0;
85fd7a8150SMike Barcroft int	prisoncount = 0;
86fd7a8150SMike Barcroft 
87dc68a633SPawel Jakub Dawidek /*
88dc68a633SPawel Jakub Dawidek  * List of jail services. Protected by allprison_lock.
89dc68a633SPawel Jakub Dawidek  */
90dc68a633SPawel Jakub Dawidek TAILQ_HEAD(prison_services_head, prison_service);
91dc68a633SPawel Jakub Dawidek static struct prison_services_head prison_services =
92dc68a633SPawel Jakub Dawidek     TAILQ_HEAD_INITIALIZER(prison_services);
93dc68a633SPawel Jakub Dawidek static int prison_service_slots = 0;
94dc68a633SPawel Jakub Dawidek 
95dc68a633SPawel Jakub Dawidek struct prison_service {
96dc68a633SPawel Jakub Dawidek 	prison_create_t ps_create;
97dc68a633SPawel Jakub Dawidek 	prison_destroy_t ps_destroy;
98dc68a633SPawel Jakub Dawidek 	int		ps_slotno;
99dc68a633SPawel Jakub Dawidek 	TAILQ_ENTRY(prison_service) ps_next;
100dc68a633SPawel Jakub Dawidek 	char	ps_name[0];
101dc68a633SPawel Jakub Dawidek };
102dc68a633SPawel Jakub Dawidek 
103fd7a8150SMike Barcroft static void		 init_prison(void *);
104b3059e09SRobert Watson static void		 prison_complete(void *context, int pending);
105fd7a8150SMike Barcroft static int		 sysctl_jail_list(SYSCTL_HANDLER_ARGS);
106fd7a8150SMike Barcroft 
107fd7a8150SMike Barcroft static void
108fd7a8150SMike Barcroft init_prison(void *data __unused)
109fd7a8150SMike Barcroft {
110fd7a8150SMike Barcroft 
111dc68a633SPawel Jakub Dawidek 	sx_init(&allprison_lock, "allprison");
112fd7a8150SMike Barcroft 	LIST_INIT(&allprison);
113fd7a8150SMike Barcroft }
114fd7a8150SMike Barcroft 
115fd7a8150SMike Barcroft SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
116fd7a8150SMike Barcroft 
117116734c4SMatthew Dillon /*
1189ddb7954SMike Barcroft  * struct jail_args {
1199ddb7954SMike Barcroft  *	struct jail *jail;
1209ddb7954SMike Barcroft  * };
121116734c4SMatthew Dillon  */
12275c13541SPoul-Henning Kamp int
1239ddb7954SMike Barcroft jail(struct thread *td, struct jail_args *uap)
12475c13541SPoul-Henning Kamp {
125fd7a8150SMike Barcroft 	struct nameidata nd;
126fd7a8150SMike Barcroft 	struct prison *pr, *tpr;
127dc68a633SPawel Jakub Dawidek 	struct prison_service *psrv;
12875c13541SPoul-Henning Kamp 	struct jail j;
129fd7a8150SMike Barcroft 	struct jail_attach_args jaa;
130453f7d53SChristian S.J. Peron 	int vfslocked, error, tryprid;
13175c13541SPoul-Henning Kamp 
1329ddb7954SMike Barcroft 	error = copyin(uap->jail, &j, sizeof(j));
13375c13541SPoul-Henning Kamp 	if (error)
134a2f2b3afSJohn Baldwin 		return (error);
135a2f2b3afSJohn Baldwin 	if (j.version != 0)
136a2f2b3afSJohn Baldwin 		return (EINVAL);
137a2f2b3afSJohn Baldwin 
1389ddb7954SMike Barcroft 	MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
1396008862bSJohn Baldwin 	mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF);
140fd7a8150SMike Barcroft 	pr->pr_ref = 1;
1419ddb7954SMike Barcroft 	error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0);
142fd7a8150SMike Barcroft 	if (error)
143fd7a8150SMike Barcroft 		goto e_killmtx;
144453f7d53SChristian S.J. Peron 	NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE,
145453f7d53SChristian S.J. Peron 	    pr->pr_path, td);
146fd7a8150SMike Barcroft 	error = namei(&nd);
147453f7d53SChristian S.J. Peron 	if (error)
148fd7a8150SMike Barcroft 		goto e_killmtx;
149453f7d53SChristian S.J. Peron 	vfslocked = NDHASGIANT(&nd);
150fd7a8150SMike Barcroft 	pr->pr_root = nd.ni_vp;
151fd7a8150SMike Barcroft 	VOP_UNLOCK(nd.ni_vp, 0, td);
152fd7a8150SMike Barcroft 	NDFREE(&nd, NDF_ONLY_PNBUF);
153453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
1549ddb7954SMike Barcroft 	error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
15575c13541SPoul-Henning Kamp 	if (error)
156fd7a8150SMike Barcroft 		goto e_dropvnref;
157a2f2b3afSJohn Baldwin 	pr->pr_ip = j.ip_number;
158fd7a8150SMike Barcroft 	pr->pr_linux = NULL;
159fd7a8150SMike Barcroft 	pr->pr_securelevel = securelevel;
160dc68a633SPawel Jakub Dawidek 	if (prison_service_slots == 0)
161dc68a633SPawel Jakub Dawidek 		pr->pr_slots = NULL;
162dc68a633SPawel Jakub Dawidek 	else {
163dc68a633SPawel Jakub Dawidek 		pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
164dc68a633SPawel Jakub Dawidek 		    M_PRISON, M_ZERO | M_WAITOK);
165dc68a633SPawel Jakub Dawidek 	}
166fd7a8150SMike Barcroft 
167fd7a8150SMike Barcroft 	/* Determine next pr_id and add prison to allprison list. */
168dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
169fd7a8150SMike Barcroft 	tryprid = lastprid + 1;
170fd7a8150SMike Barcroft 	if (tryprid == JAIL_MAX)
171fd7a8150SMike Barcroft 		tryprid = 1;
172fd7a8150SMike Barcroft next:
173fd7a8150SMike Barcroft 	LIST_FOREACH(tpr, &allprison, pr_list) {
174fd7a8150SMike Barcroft 		if (tpr->pr_id == tryprid) {
175fd7a8150SMike Barcroft 			tryprid++;
176fd7a8150SMike Barcroft 			if (tryprid == JAIL_MAX) {
177dc68a633SPawel Jakub Dawidek 				sx_xunlock(&allprison_lock);
178fd7a8150SMike Barcroft 				error = EAGAIN;
179fd7a8150SMike Barcroft 				goto e_dropvnref;
180fd7a8150SMike Barcroft 			}
181fd7a8150SMike Barcroft 			goto next;
182fd7a8150SMike Barcroft 		}
183fd7a8150SMike Barcroft 	}
184fd7a8150SMike Barcroft 	pr->pr_id = jaa.jid = lastprid = tryprid;
185fd7a8150SMike Barcroft 	LIST_INSERT_HEAD(&allprison, pr, pr_list);
186fd7a8150SMike Barcroft 	prisoncount++;
187dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
188dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
189dc68a633SPawel Jakub Dawidek 		psrv->ps_create(psrv, pr);
190dc68a633SPawel Jakub Dawidek 	}
191dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
192fd7a8150SMike Barcroft 
193fd7a8150SMike Barcroft 	error = jail_attach(td, &jaa);
194a2f2b3afSJohn Baldwin 	if (error)
195fd7a8150SMike Barcroft 		goto e_dropprref;
196fd7a8150SMike Barcroft 	mtx_lock(&pr->pr_mtx);
197fd7a8150SMike Barcroft 	pr->pr_ref--;
198fd7a8150SMike Barcroft 	mtx_unlock(&pr->pr_mtx);
199fd7a8150SMike Barcroft 	td->td_retval[0] = jaa.jid;
20075c13541SPoul-Henning Kamp 	return (0);
201fd7a8150SMike Barcroft e_dropprref:
202dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
203fd7a8150SMike Barcroft 	LIST_REMOVE(pr, pr_list);
204fd7a8150SMike Barcroft 	prisoncount--;
205dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
206dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
207dc68a633SPawel Jakub Dawidek 		psrv->ps_destroy(psrv, pr);
208dc68a633SPawel Jakub Dawidek 	}
209dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
210fd7a8150SMike Barcroft e_dropvnref:
211453f7d53SChristian S.J. Peron 	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
212fd7a8150SMike Barcroft 	vrele(pr->pr_root);
213453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
214fd7a8150SMike Barcroft e_killmtx:
215894db7b0SMaxime Henrion 	mtx_destroy(&pr->pr_mtx);
21675c13541SPoul-Henning Kamp 	FREE(pr, M_PRISON);
21775c13541SPoul-Henning Kamp 	return (error);
21875c13541SPoul-Henning Kamp }
21975c13541SPoul-Henning Kamp 
220fd7a8150SMike Barcroft /*
2219ddb7954SMike Barcroft  * struct jail_attach_args {
2229ddb7954SMike Barcroft  *	int jid;
2239ddb7954SMike Barcroft  * };
224fd7a8150SMike Barcroft  */
225fd7a8150SMike Barcroft int
2269ddb7954SMike Barcroft jail_attach(struct thread *td, struct jail_attach_args *uap)
227fd7a8150SMike Barcroft {
228fd7a8150SMike Barcroft 	struct proc *p;
229fd7a8150SMike Barcroft 	struct ucred *newcred, *oldcred;
230fd7a8150SMike Barcroft 	struct prison *pr;
231453f7d53SChristian S.J. Peron 	int vfslocked, error;
232fd7a8150SMike Barcroft 
23357f22bd4SJacques Vidrine 	/*
23457f22bd4SJacques Vidrine 	 * XXX: Note that there is a slight race here if two threads
23557f22bd4SJacques Vidrine 	 * in the same privileged process attempt to attach to two
23657f22bd4SJacques Vidrine 	 * different jails at the same time.  It is important for
23757f22bd4SJacques Vidrine 	 * user processes not to do this, or they might end up with
23857f22bd4SJacques Vidrine 	 * a process root from one prison, but attached to the jail
23957f22bd4SJacques Vidrine 	 * of another.
24057f22bd4SJacques Vidrine 	 */
241800c9408SRobert Watson 	error = priv_check(td, PRIV_JAIL_ATTACH);
24257f22bd4SJacques Vidrine 	if (error)
24357f22bd4SJacques Vidrine 		return (error);
244fd7a8150SMike Barcroft 
24557f22bd4SJacques Vidrine 	p = td->td_proc;
246dc68a633SPawel Jakub Dawidek 	sx_slock(&allprison_lock);
247fd7a8150SMike Barcroft 	pr = prison_find(uap->jid);
248fd7a8150SMike Barcroft 	if (pr == NULL) {
249dc68a633SPawel Jakub Dawidek 		sx_sunlock(&allprison_lock);
250fd7a8150SMike Barcroft 		return (EINVAL);
251fd7a8150SMike Barcroft 	}
252fd7a8150SMike Barcroft 	pr->pr_ref++;
253fd7a8150SMike Barcroft 	mtx_unlock(&pr->pr_mtx);
254dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
255fd7a8150SMike Barcroft 
256453f7d53SChristian S.J. Peron 	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
257fd7a8150SMike Barcroft 	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td);
258fd7a8150SMike Barcroft 	if ((error = change_dir(pr->pr_root, td)) != 0)
259fd7a8150SMike Barcroft 		goto e_unlock;
260fd7a8150SMike Barcroft #ifdef MAC
261fd7a8150SMike Barcroft 	if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root)))
262fd7a8150SMike Barcroft 		goto e_unlock;
263fd7a8150SMike Barcroft #endif
264fd7a8150SMike Barcroft 	VOP_UNLOCK(pr->pr_root, 0, td);
265fd7a8150SMike Barcroft 	change_root(pr->pr_root, td);
266453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
267fd7a8150SMike Barcroft 
268fd7a8150SMike Barcroft 	newcred = crget();
269fd7a8150SMike Barcroft 	PROC_LOCK(p);
270fd7a8150SMike Barcroft 	oldcred = p->p_ucred;
271fd7a8150SMike Barcroft 	setsugid(p);
272fd7a8150SMike Barcroft 	crcopy(newcred, oldcred);
27369c4ee54SJohn Baldwin 	newcred->cr_prison = pr;
274fd7a8150SMike Barcroft 	p->p_ucred = newcred;
275fd7a8150SMike Barcroft 	PROC_UNLOCK(p);
276fd7a8150SMike Barcroft 	crfree(oldcred);
277fd7a8150SMike Barcroft 	return (0);
278fd7a8150SMike Barcroft e_unlock:
279fd7a8150SMike Barcroft 	VOP_UNLOCK(pr->pr_root, 0, td);
280453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
281fd7a8150SMike Barcroft 	mtx_lock(&pr->pr_mtx);
282fd7a8150SMike Barcroft 	pr->pr_ref--;
283fd7a8150SMike Barcroft 	mtx_unlock(&pr->pr_mtx);
284fd7a8150SMike Barcroft 	return (error);
285fd7a8150SMike Barcroft }
286fd7a8150SMike Barcroft 
287fd7a8150SMike Barcroft /*
288fd7a8150SMike Barcroft  * Returns a locked prison instance, or NULL on failure.
289fd7a8150SMike Barcroft  */
29054b369c1SPawel Jakub Dawidek struct prison *
291fd7a8150SMike Barcroft prison_find(int prid)
292fd7a8150SMike Barcroft {
293fd7a8150SMike Barcroft 	struct prison *pr;
294fd7a8150SMike Barcroft 
295dc68a633SPawel Jakub Dawidek 	sx_assert(&allprison_lock, SX_LOCKED);
296fd7a8150SMike Barcroft 	LIST_FOREACH(pr, &allprison, pr_list) {
297fd7a8150SMike Barcroft 		if (pr->pr_id == prid) {
298fd7a8150SMike Barcroft 			mtx_lock(&pr->pr_mtx);
299c2cda609SPawel Jakub Dawidek 			if (pr->pr_ref == 0) {
300c2cda609SPawel Jakub Dawidek 				mtx_unlock(&pr->pr_mtx);
301c2cda609SPawel Jakub Dawidek 				break;
302c2cda609SPawel Jakub Dawidek 			}
303fd7a8150SMike Barcroft 			return (pr);
304fd7a8150SMike Barcroft 		}
305fd7a8150SMike Barcroft 	}
306fd7a8150SMike Barcroft 	return (NULL);
307fd7a8150SMike Barcroft }
308fd7a8150SMike Barcroft 
30991421ba2SRobert Watson void
31091421ba2SRobert Watson prison_free(struct prison *pr)
31191421ba2SRobert Watson {
31291421ba2SRobert Watson 
31301137630SRobert Watson 	mtx_lock(&pr->pr_mtx);
31491421ba2SRobert Watson 	pr->pr_ref--;
31591421ba2SRobert Watson 	if (pr->pr_ref == 0) {
31601137630SRobert Watson 		mtx_unlock(&pr->pr_mtx);
317c2cda609SPawel Jakub Dawidek 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
318c2cda609SPawel Jakub Dawidek 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
319c2cda609SPawel Jakub Dawidek 		return;
320c2cda609SPawel Jakub Dawidek 	}
321c2cda609SPawel Jakub Dawidek 	mtx_unlock(&pr->pr_mtx);
322c2cda609SPawel Jakub Dawidek }
323c2cda609SPawel Jakub Dawidek 
324c2cda609SPawel Jakub Dawidek static void
325c2cda609SPawel Jakub Dawidek prison_complete(void *context, int pending)
326c2cda609SPawel Jakub Dawidek {
327c2cda609SPawel Jakub Dawidek 	struct prison_service *psrv;
328c2cda609SPawel Jakub Dawidek 	struct prison *pr;
329c2cda609SPawel Jakub Dawidek 	int vfslocked;
330c2cda609SPawel Jakub Dawidek 
331c2cda609SPawel Jakub Dawidek 	pr = (struct prison *)context;
332c2cda609SPawel Jakub Dawidek 
333c2cda609SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
334264de85eSPawel Jakub Dawidek 	LIST_REMOVE(pr, pr_list);
335fd7a8150SMike Barcroft 	prisoncount--;
336dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
337dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv, &prison_services, ps_next) {
338dc68a633SPawel Jakub Dawidek 		psrv->ps_destroy(psrv, pr);
339dc68a633SPawel Jakub Dawidek 	}
340dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
341b3059e09SRobert Watson 
342453f7d53SChristian S.J. Peron 	vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
343b3059e09SRobert Watson 	vrele(pr->pr_root);
344453f7d53SChristian S.J. Peron 	VFS_UNLOCK_GIANT(vfslocked);
345b3059e09SRobert Watson 
346b3059e09SRobert Watson 	mtx_destroy(&pr->pr_mtx);
347b3059e09SRobert Watson 	if (pr->pr_linux != NULL)
348b3059e09SRobert Watson 		FREE(pr->pr_linux, M_PRISON);
349b3059e09SRobert Watson 	FREE(pr, M_PRISON);
350b3059e09SRobert Watson }
351b3059e09SRobert Watson 
35291421ba2SRobert Watson void
35391421ba2SRobert Watson prison_hold(struct prison *pr)
35491421ba2SRobert Watson {
35591421ba2SRobert Watson 
35601137630SRobert Watson 	mtx_lock(&pr->pr_mtx);
357c2cda609SPawel Jakub Dawidek 	KASSERT(pr->pr_ref > 0,
358c2cda609SPawel Jakub Dawidek 	    ("Trying to hold dead prison (id=%d).", pr->pr_id));
35991421ba2SRobert Watson 	pr->pr_ref++;
36001137630SRobert Watson 	mtx_unlock(&pr->pr_mtx);
36101137630SRobert Watson }
36201137630SRobert Watson 
36301137630SRobert Watson u_int32_t
36401137630SRobert Watson prison_getip(struct ucred *cred)
36501137630SRobert Watson {
36601137630SRobert Watson 
36701137630SRobert Watson 	return (cred->cr_prison->pr_ip);
36891421ba2SRobert Watson }
36991421ba2SRobert Watson 
37075c13541SPoul-Henning Kamp int
37191421ba2SRobert Watson prison_ip(struct ucred *cred, int flag, u_int32_t *ip)
37275c13541SPoul-Henning Kamp {
37375c13541SPoul-Henning Kamp 	u_int32_t tmp;
37475c13541SPoul-Henning Kamp 
37591421ba2SRobert Watson 	if (!jailed(cred))
37675c13541SPoul-Henning Kamp 		return (0);
37775c13541SPoul-Henning Kamp 	if (flag)
37875c13541SPoul-Henning Kamp 		tmp = *ip;
37975c13541SPoul-Henning Kamp 	else
38075c13541SPoul-Henning Kamp 		tmp = ntohl(*ip);
38175c13541SPoul-Henning Kamp 	if (tmp == INADDR_ANY) {
38275c13541SPoul-Henning Kamp 		if (flag)
38391421ba2SRobert Watson 			*ip = cred->cr_prison->pr_ip;
38475c13541SPoul-Henning Kamp 		else
38591421ba2SRobert Watson 			*ip = htonl(cred->cr_prison->pr_ip);
38675c13541SPoul-Henning Kamp 		return (0);
38775c13541SPoul-Henning Kamp 	}
388fd6aaf7fSRobert Watson 	if (tmp == INADDR_LOOPBACK) {
389fd6aaf7fSRobert Watson 		if (flag)
390fd6aaf7fSRobert Watson 			*ip = cred->cr_prison->pr_ip;
391fd6aaf7fSRobert Watson 		else
392fd6aaf7fSRobert Watson 			*ip = htonl(cred->cr_prison->pr_ip);
393fd6aaf7fSRobert Watson 		return (0);
394fd6aaf7fSRobert Watson 	}
39591421ba2SRobert Watson 	if (cred->cr_prison->pr_ip != tmp)
39675c13541SPoul-Henning Kamp 		return (1);
39775c13541SPoul-Henning Kamp 	return (0);
39875c13541SPoul-Henning Kamp }
39975c13541SPoul-Henning Kamp 
40075c13541SPoul-Henning Kamp void
40191421ba2SRobert Watson prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip)
40275c13541SPoul-Henning Kamp {
40375c13541SPoul-Henning Kamp 	u_int32_t tmp;
40475c13541SPoul-Henning Kamp 
40591421ba2SRobert Watson 	if (!jailed(cred))
40675c13541SPoul-Henning Kamp 		return;
40775c13541SPoul-Henning Kamp 	if (flag)
40875c13541SPoul-Henning Kamp 		tmp = *ip;
40975c13541SPoul-Henning Kamp 	else
41075c13541SPoul-Henning Kamp 		tmp = ntohl(*ip);
411fd6aaf7fSRobert Watson 	if (tmp == INADDR_LOOPBACK) {
41275c13541SPoul-Henning Kamp 		if (flag)
41391421ba2SRobert Watson 			*ip = cred->cr_prison->pr_ip;
41475c13541SPoul-Henning Kamp 		else
41591421ba2SRobert Watson 			*ip = htonl(cred->cr_prison->pr_ip);
41675c13541SPoul-Henning Kamp 		return;
41775c13541SPoul-Henning Kamp 	}
41875c13541SPoul-Henning Kamp 	return;
41975c13541SPoul-Henning Kamp }
42075c13541SPoul-Henning Kamp 
42175c13541SPoul-Henning Kamp int
42291421ba2SRobert Watson prison_if(struct ucred *cred, struct sockaddr *sa)
42375c13541SPoul-Henning Kamp {
4249ddb7954SMike Barcroft 	struct sockaddr_in *sai;
42575c13541SPoul-Henning Kamp 	int ok;
42675c13541SPoul-Henning Kamp 
4279ddb7954SMike Barcroft 	sai = (struct sockaddr_in *)sa;
4287cadc266SRobert Watson 	if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only)
4297cadc266SRobert Watson 		ok = 1;
4307cadc266SRobert Watson 	else if (sai->sin_family != AF_INET)
43175c13541SPoul-Henning Kamp 		ok = 0;
43291421ba2SRobert Watson 	else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr))
43375c13541SPoul-Henning Kamp 		ok = 1;
43475c13541SPoul-Henning Kamp 	else
43575c13541SPoul-Henning Kamp 		ok = 0;
43675c13541SPoul-Henning Kamp 	return (ok);
43775c13541SPoul-Henning Kamp }
43891421ba2SRobert Watson 
43991421ba2SRobert Watson /*
44091421ba2SRobert Watson  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
44191421ba2SRobert Watson  */
44291421ba2SRobert Watson int
4439ddb7954SMike Barcroft prison_check(struct ucred *cred1, struct ucred *cred2)
44491421ba2SRobert Watson {
44591421ba2SRobert Watson 
44691421ba2SRobert Watson 	if (jailed(cred1)) {
44791421ba2SRobert Watson 		if (!jailed(cred2))
44891421ba2SRobert Watson 			return (ESRCH);
44991421ba2SRobert Watson 		if (cred2->cr_prison != cred1->cr_prison)
45091421ba2SRobert Watson 			return (ESRCH);
45191421ba2SRobert Watson 	}
45291421ba2SRobert Watson 
45391421ba2SRobert Watson 	return (0);
45491421ba2SRobert Watson }
45591421ba2SRobert Watson 
45691421ba2SRobert Watson /*
45791421ba2SRobert Watson  * Return 1 if the passed credential is in a jail, otherwise 0.
45891421ba2SRobert Watson  */
45991421ba2SRobert Watson int
4609ddb7954SMike Barcroft jailed(struct ucred *cred)
46191421ba2SRobert Watson {
46291421ba2SRobert Watson 
46391421ba2SRobert Watson 	return (cred->cr_prison != NULL);
46491421ba2SRobert Watson }
4659484d0c0SRobert Drehmel 
4669484d0c0SRobert Drehmel /*
4679484d0c0SRobert Drehmel  * Return the correct hostname for the passed credential.
4689484d0c0SRobert Drehmel  */
469ad1ff099SRobert Drehmel void
4709ddb7954SMike Barcroft getcredhostname(struct ucred *cred, char *buf, size_t size)
4719484d0c0SRobert Drehmel {
4729484d0c0SRobert Drehmel 
473ad1ff099SRobert Drehmel 	if (jailed(cred)) {
474ad1ff099SRobert Drehmel 		mtx_lock(&cred->cr_prison->pr_mtx);
475e80fb434SRobert Drehmel 		strlcpy(buf, cred->cr_prison->pr_host, size);
476ad1ff099SRobert Drehmel 		mtx_unlock(&cred->cr_prison->pr_mtx);
4779ddb7954SMike Barcroft 	} else
478e80fb434SRobert Drehmel 		strlcpy(buf, hostname, size);
4799484d0c0SRobert Drehmel }
480fd7a8150SMike Barcroft 
481f08df373SRobert Watson /*
482820a0de9SPawel Jakub Dawidek  * Determine whether the subject represented by cred can "see"
483820a0de9SPawel Jakub Dawidek  * status of a mount point.
484820a0de9SPawel Jakub Dawidek  * Returns: 0 for permitted, ENOENT otherwise.
485820a0de9SPawel Jakub Dawidek  * XXX: This function should be called cr_canseemount() and should be
486820a0de9SPawel Jakub Dawidek  *      placed in kern_prot.c.
487f08df373SRobert Watson  */
488f08df373SRobert Watson int
489820a0de9SPawel Jakub Dawidek prison_canseemount(struct ucred *cred, struct mount *mp)
490f08df373SRobert Watson {
491820a0de9SPawel Jakub Dawidek 	struct prison *pr;
492820a0de9SPawel Jakub Dawidek 	struct statfs *sp;
493820a0de9SPawel Jakub Dawidek 	size_t len;
494f08df373SRobert Watson 
495820a0de9SPawel Jakub Dawidek 	if (!jailed(cred) || jail_enforce_statfs == 0)
496820a0de9SPawel Jakub Dawidek 		return (0);
497820a0de9SPawel Jakub Dawidek 	pr = cred->cr_prison;
498820a0de9SPawel Jakub Dawidek 	if (pr->pr_root->v_mount == mp)
499820a0de9SPawel Jakub Dawidek 		return (0);
500820a0de9SPawel Jakub Dawidek 	if (jail_enforce_statfs == 2)
501820a0de9SPawel Jakub Dawidek 		return (ENOENT);
502820a0de9SPawel Jakub Dawidek 	/*
503820a0de9SPawel Jakub Dawidek 	 * If jail's chroot directory is set to "/" we should be able to see
504820a0de9SPawel Jakub Dawidek 	 * all mount-points from inside a jail.
505820a0de9SPawel Jakub Dawidek 	 * This is ugly check, but this is the only situation when jail's
506820a0de9SPawel Jakub Dawidek 	 * directory ends with '/'.
507820a0de9SPawel Jakub Dawidek 	 */
508820a0de9SPawel Jakub Dawidek 	if (strcmp(pr->pr_path, "/") == 0)
509820a0de9SPawel Jakub Dawidek 		return (0);
510820a0de9SPawel Jakub Dawidek 	len = strlen(pr->pr_path);
511820a0de9SPawel Jakub Dawidek 	sp = &mp->mnt_stat;
512820a0de9SPawel Jakub Dawidek 	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
513820a0de9SPawel Jakub Dawidek 		return (ENOENT);
514820a0de9SPawel Jakub Dawidek 	/*
515820a0de9SPawel Jakub Dawidek 	 * Be sure that we don't have situation where jail's root directory
516820a0de9SPawel Jakub Dawidek 	 * is "/some/path" and mount point is "/some/pathpath".
517820a0de9SPawel Jakub Dawidek 	 */
518820a0de9SPawel Jakub Dawidek 	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
519820a0de9SPawel Jakub Dawidek 		return (ENOENT);
520f08df373SRobert Watson 	return (0);
521f08df373SRobert Watson }
522820a0de9SPawel Jakub Dawidek 
523820a0de9SPawel Jakub Dawidek void
524820a0de9SPawel Jakub Dawidek prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
525820a0de9SPawel Jakub Dawidek {
526820a0de9SPawel Jakub Dawidek 	char jpath[MAXPATHLEN];
527820a0de9SPawel Jakub Dawidek 	struct prison *pr;
528820a0de9SPawel Jakub Dawidek 	size_t len;
529820a0de9SPawel Jakub Dawidek 
530820a0de9SPawel Jakub Dawidek 	if (!jailed(cred) || jail_enforce_statfs == 0)
531820a0de9SPawel Jakub Dawidek 		return;
532820a0de9SPawel Jakub Dawidek 	pr = cred->cr_prison;
533820a0de9SPawel Jakub Dawidek 	if (prison_canseemount(cred, mp) != 0) {
534820a0de9SPawel Jakub Dawidek 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
535820a0de9SPawel Jakub Dawidek 		strlcpy(sp->f_mntonname, "[restricted]",
536820a0de9SPawel Jakub Dawidek 		    sizeof(sp->f_mntonname));
537820a0de9SPawel Jakub Dawidek 		return;
538820a0de9SPawel Jakub Dawidek 	}
539820a0de9SPawel Jakub Dawidek 	if (pr->pr_root->v_mount == mp) {
540820a0de9SPawel Jakub Dawidek 		/*
541820a0de9SPawel Jakub Dawidek 		 * Clear current buffer data, so we are sure nothing from
542820a0de9SPawel Jakub Dawidek 		 * the valid path left there.
543820a0de9SPawel Jakub Dawidek 		 */
544820a0de9SPawel Jakub Dawidek 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
545820a0de9SPawel Jakub Dawidek 		*sp->f_mntonname = '/';
546820a0de9SPawel Jakub Dawidek 		return;
547820a0de9SPawel Jakub Dawidek 	}
548820a0de9SPawel Jakub Dawidek 	/*
549820a0de9SPawel Jakub Dawidek 	 * If jail's chroot directory is set to "/" we should be able to see
550820a0de9SPawel Jakub Dawidek 	 * all mount-points from inside a jail.
551820a0de9SPawel Jakub Dawidek 	 */
552820a0de9SPawel Jakub Dawidek 	if (strcmp(pr->pr_path, "/") == 0)
553820a0de9SPawel Jakub Dawidek 		return;
554820a0de9SPawel Jakub Dawidek 	len = strlen(pr->pr_path);
555820a0de9SPawel Jakub Dawidek 	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
556820a0de9SPawel Jakub Dawidek 	/*
557820a0de9SPawel Jakub Dawidek 	 * Clear current buffer data, so we are sure nothing from
558820a0de9SPawel Jakub Dawidek 	 * the valid path left there.
559820a0de9SPawel Jakub Dawidek 	 */
560820a0de9SPawel Jakub Dawidek 	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
561820a0de9SPawel Jakub Dawidek 	if (*jpath == '\0') {
562820a0de9SPawel Jakub Dawidek 		/* Should never happen. */
563820a0de9SPawel Jakub Dawidek 		*sp->f_mntonname = '/';
564820a0de9SPawel Jakub Dawidek 	} else {
565820a0de9SPawel Jakub Dawidek 		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
566820a0de9SPawel Jakub Dawidek 	}
567f08df373SRobert Watson }
568f08df373SRobert Watson 
569800c9408SRobert Watson /*
570800c9408SRobert Watson  * Check with permission for a specific privilege is granted within jail.  We
571800c9408SRobert Watson  * have a specific list of accepted privileges; the rest are denied.
572800c9408SRobert Watson  */
573800c9408SRobert Watson int
574800c9408SRobert Watson prison_priv_check(struct ucred *cred, int priv)
575800c9408SRobert Watson {
576800c9408SRobert Watson 
577800c9408SRobert Watson 	if (!jailed(cred))
578800c9408SRobert Watson 		return (0);
579800c9408SRobert Watson 
580800c9408SRobert Watson 	switch (priv) {
581800c9408SRobert Watson 
582800c9408SRobert Watson 		/*
583800c9408SRobert Watson 		 * Allow ktrace privileges for root in jail.
584800c9408SRobert Watson 		 */
585800c9408SRobert Watson 	case PRIV_KTRACE:
586800c9408SRobert Watson 
587c3c1b5e6SRobert Watson #if 0
588800c9408SRobert Watson 		/*
589800c9408SRobert Watson 		 * Allow jailed processes to configure audit identity and
590800c9408SRobert Watson 		 * submit audit records (login, etc).  In the future we may
591800c9408SRobert Watson 		 * want to further refine the relationship between audit and
592800c9408SRobert Watson 		 * jail.
593800c9408SRobert Watson 		 */
594800c9408SRobert Watson 	case PRIV_AUDIT_GETAUDIT:
595800c9408SRobert Watson 	case PRIV_AUDIT_SETAUDIT:
596800c9408SRobert Watson 	case PRIV_AUDIT_SUBMIT:
597c3c1b5e6SRobert Watson #endif
598800c9408SRobert Watson 
599800c9408SRobert Watson 		/*
600800c9408SRobert Watson 		 * Allow jailed processes to manipulate process UNIX
601800c9408SRobert Watson 		 * credentials in any way they see fit.
602800c9408SRobert Watson 		 */
603800c9408SRobert Watson 	case PRIV_CRED_SETUID:
604800c9408SRobert Watson 	case PRIV_CRED_SETEUID:
605800c9408SRobert Watson 	case PRIV_CRED_SETGID:
606800c9408SRobert Watson 	case PRIV_CRED_SETEGID:
607800c9408SRobert Watson 	case PRIV_CRED_SETGROUPS:
608800c9408SRobert Watson 	case PRIV_CRED_SETREUID:
609800c9408SRobert Watson 	case PRIV_CRED_SETREGID:
610800c9408SRobert Watson 	case PRIV_CRED_SETRESUID:
611800c9408SRobert Watson 	case PRIV_CRED_SETRESGID:
612800c9408SRobert Watson 
613800c9408SRobert Watson 		/*
614800c9408SRobert Watson 		 * Jail implements visibility constraints already, so allow
615800c9408SRobert Watson 		 * jailed root to override uid/gid-based constraints.
616800c9408SRobert Watson 		 */
617800c9408SRobert Watson 	case PRIV_SEEOTHERGIDS:
618800c9408SRobert Watson 	case PRIV_SEEOTHERUIDS:
619800c9408SRobert Watson 
620800c9408SRobert Watson 		/*
621800c9408SRobert Watson 		 * Jail implements inter-process debugging limits already, so
622800c9408SRobert Watson 		 * allow jailed root various debugging privileges.
623800c9408SRobert Watson 		 */
624800c9408SRobert Watson 	case PRIV_DEBUG_DIFFCRED:
625800c9408SRobert Watson 	case PRIV_DEBUG_SUGID:
626800c9408SRobert Watson 	case PRIV_DEBUG_UNPRIV:
627800c9408SRobert Watson 
628800c9408SRobert Watson 		/*
629800c9408SRobert Watson 		 * Allow jail to set various resource limits and login
630800c9408SRobert Watson 		 * properties, and for now, exceed process resource limits.
631800c9408SRobert Watson 		 */
632800c9408SRobert Watson 	case PRIV_PROC_LIMIT:
633800c9408SRobert Watson 	case PRIV_PROC_SETLOGIN:
634800c9408SRobert Watson 	case PRIV_PROC_SETRLIMIT:
635800c9408SRobert Watson 
636800c9408SRobert Watson 		/*
637800c9408SRobert Watson 		 * System V and POSIX IPC privileges are granted in jail.
638800c9408SRobert Watson 		 */
639800c9408SRobert Watson 	case PRIV_IPC_READ:
640800c9408SRobert Watson 	case PRIV_IPC_WRITE:
641800c9408SRobert Watson 	case PRIV_IPC_ADMIN:
642800c9408SRobert Watson 	case PRIV_IPC_MSGSIZE:
643800c9408SRobert Watson 	case PRIV_MQ_ADMIN:
644800c9408SRobert Watson 
645800c9408SRobert Watson 		/*
646800c9408SRobert Watson 		 * Jail implements its own inter-process limits, so allow
647800c9408SRobert Watson 		 * root processes in jail to change scheduling on other
648800c9408SRobert Watson 		 * processes in the same jail.  Likewise for signalling.
649800c9408SRobert Watson 		 */
650800c9408SRobert Watson 	case PRIV_SCHED_DIFFCRED:
651800c9408SRobert Watson 	case PRIV_SIGNAL_DIFFCRED:
652800c9408SRobert Watson 	case PRIV_SIGNAL_SUGID:
653800c9408SRobert Watson 
654800c9408SRobert Watson 		/*
655800c9408SRobert Watson 		 * Allow jailed processes to write to sysctls marked as jail
656800c9408SRobert Watson 		 * writable.
657800c9408SRobert Watson 		 */
658800c9408SRobert Watson 	case PRIV_SYSCTL_WRITEJAIL:
659800c9408SRobert Watson 
660800c9408SRobert Watson 		/*
661800c9408SRobert Watson 		 * Allow root in jail to manage a variety of quota
662e82d0201SRobert Watson 		 * properties.  These should likely be conditional on a
663e82d0201SRobert Watson 		 * configuration option.
664800c9408SRobert Watson 		 */
66595b091d2SRobert Watson 	case PRIV_VFS_GETQUOTA:
66695b091d2SRobert Watson 	case PRIV_VFS_SETQUOTA:
667800c9408SRobert Watson 
668800c9408SRobert Watson 		/*
669800c9408SRobert Watson 		 * Since Jail relies on chroot() to implement file system
670800c9408SRobert Watson 		 * protections, grant many VFS privileges to root in jail.
671800c9408SRobert Watson 		 * Be careful to exclude mount-related and NFS-related
672800c9408SRobert Watson 		 * privileges.
673800c9408SRobert Watson 		 */
674800c9408SRobert Watson 	case PRIV_VFS_READ:
675800c9408SRobert Watson 	case PRIV_VFS_WRITE:
676800c9408SRobert Watson 	case PRIV_VFS_ADMIN:
677800c9408SRobert Watson 	case PRIV_VFS_EXEC:
678800c9408SRobert Watson 	case PRIV_VFS_LOOKUP:
679800c9408SRobert Watson 	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
680800c9408SRobert Watson 	case PRIV_VFS_CHFLAGS_DEV:
681800c9408SRobert Watson 	case PRIV_VFS_CHOWN:
682800c9408SRobert Watson 	case PRIV_VFS_CHROOT:
683bb531912SPawel Jakub Dawidek 	case PRIV_VFS_RETAINSUGID:
684800c9408SRobert Watson 	case PRIV_VFS_FCHROOT:
685800c9408SRobert Watson 	case PRIV_VFS_LINK:
686800c9408SRobert Watson 	case PRIV_VFS_SETGID:
687800c9408SRobert Watson 	case PRIV_VFS_STICKYFILE:
688800c9408SRobert Watson 		return (0);
689800c9408SRobert Watson 
690800c9408SRobert Watson 		/*
691800c9408SRobert Watson 		 * Depending on the global setting, allow privilege of
692800c9408SRobert Watson 		 * setting system flags.
693800c9408SRobert Watson 		 */
694800c9408SRobert Watson 	case PRIV_VFS_SYSFLAGS:
695800c9408SRobert Watson 		if (jail_chflags_allowed)
696800c9408SRobert Watson 			return (0);
697800c9408SRobert Watson 		else
698800c9408SRobert Watson 			return (EPERM);
699800c9408SRobert Watson 
700800c9408SRobert Watson 		/*
701f3a8d2f9SPawel Jakub Dawidek 		 * Depending on the global setting, allow privilege of
702f3a8d2f9SPawel Jakub Dawidek 		 * mounting/unmounting file systems.
703f3a8d2f9SPawel Jakub Dawidek 		 */
704f3a8d2f9SPawel Jakub Dawidek 	case PRIV_VFS_MOUNT:
705f3a8d2f9SPawel Jakub Dawidek 	case PRIV_VFS_UNMOUNT:
706f3a8d2f9SPawel Jakub Dawidek 	case PRIV_VFS_MOUNT_NONUSER:
70724b0502eSPawel Jakub Dawidek 	case PRIV_VFS_MOUNT_OWNER:
708f3a8d2f9SPawel Jakub Dawidek 		if (jail_mount_allowed)
709f3a8d2f9SPawel Jakub Dawidek 			return (0);
710f3a8d2f9SPawel Jakub Dawidek 		else
711f3a8d2f9SPawel Jakub Dawidek 			return (EPERM);
712f3a8d2f9SPawel Jakub Dawidek 
713f3a8d2f9SPawel Jakub Dawidek 		/*
7144b084056SRobert Watson 		 * Allow jailed root to bind reserved ports and reuse in-use
7154b084056SRobert Watson 		 * ports.
716800c9408SRobert Watson 		 */
717800c9408SRobert Watson 	case PRIV_NETINET_RESERVEDPORT:
7184b084056SRobert Watson 	case PRIV_NETINET_REUSEPORT:
719800c9408SRobert Watson 		return (0);
720800c9408SRobert Watson 
721800c9408SRobert Watson 		/*
722800c9408SRobert Watson 		 * Conditionally allow creating raw sockets in jail.
723800c9408SRobert Watson 		 */
724800c9408SRobert Watson 	case PRIV_NETINET_RAW:
725800c9408SRobert Watson 		if (jail_allow_raw_sockets)
726800c9408SRobert Watson 			return (0);
727800c9408SRobert Watson 		else
728800c9408SRobert Watson 			return (EPERM);
729800c9408SRobert Watson 
730800c9408SRobert Watson 		/*
731800c9408SRobert Watson 		 * Since jail implements its own visibility limits on netstat
732800c9408SRobert Watson 		 * sysctls, allow getcred.  This allows identd to work in
733800c9408SRobert Watson 		 * jail.
734800c9408SRobert Watson 		 */
735800c9408SRobert Watson 	case PRIV_NETINET_GETCRED:
736800c9408SRobert Watson 		return (0);
737800c9408SRobert Watson 
738800c9408SRobert Watson 	default:
739800c9408SRobert Watson 		/*
740800c9408SRobert Watson 		 * In all remaining cases, deny the privilege request.  This
741800c9408SRobert Watson 		 * includes almost all network privileges, many system
742800c9408SRobert Watson 		 * configuration privileges.
743800c9408SRobert Watson 		 */
744800c9408SRobert Watson 		return (EPERM);
745800c9408SRobert Watson 	}
746800c9408SRobert Watson }
747800c9408SRobert Watson 
748dc68a633SPawel Jakub Dawidek /*
749dc68a633SPawel Jakub Dawidek  * Register jail service. Provides 'create' and 'destroy' methods.
750dc68a633SPawel Jakub Dawidek  * 'create' method will be called for every existing jail and all
751dc68a633SPawel Jakub Dawidek  * jails in the future as they beeing created.
752dc68a633SPawel Jakub Dawidek  * 'destroy' method will be called for every jail going away and
753dc68a633SPawel Jakub Dawidek  * for all existing jails at the time of service deregistration.
754dc68a633SPawel Jakub Dawidek  */
755dc68a633SPawel Jakub Dawidek struct prison_service *
756dc68a633SPawel Jakub Dawidek prison_service_register(const char *name, prison_create_t create,
757dc68a633SPawel Jakub Dawidek     prison_destroy_t destroy)
758dc68a633SPawel Jakub Dawidek {
759dc68a633SPawel Jakub Dawidek 	struct prison_service *psrv, *psrv2;
760dc68a633SPawel Jakub Dawidek 	struct prison *pr;
761dc68a633SPawel Jakub Dawidek 	int reallocate = 1, slotno = 0;
762dc68a633SPawel Jakub Dawidek 	void **slots, **oldslots;
763dc68a633SPawel Jakub Dawidek 
764dc68a633SPawel Jakub Dawidek 	psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
765dc68a633SPawel Jakub Dawidek 	    M_WAITOK | M_ZERO);
766dc68a633SPawel Jakub Dawidek 	psrv->ps_create = create;
767dc68a633SPawel Jakub Dawidek 	psrv->ps_destroy = destroy;
768dc68a633SPawel Jakub Dawidek 	strcpy(psrv->ps_name, name);
769dc68a633SPawel Jakub Dawidek 	/*
770dc68a633SPawel Jakub Dawidek 	 * Grab the allprison_lock here, so we won't miss any jail
771dc68a633SPawel Jakub Dawidek 	 * creation/destruction.
772dc68a633SPawel Jakub Dawidek 	 */
773dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
774dc68a633SPawel Jakub Dawidek #ifdef INVARIANTS
775dc68a633SPawel Jakub Dawidek 	/*
776dc68a633SPawel Jakub Dawidek 	 * Verify if service is not already registered.
777dc68a633SPawel Jakub Dawidek 	 */
778dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
779dc68a633SPawel Jakub Dawidek 		KASSERT(strcmp(psrv2->ps_name, name) != 0,
780dc68a633SPawel Jakub Dawidek 		    ("jail service %s already registered", name));
781dc68a633SPawel Jakub Dawidek 	}
782dc68a633SPawel Jakub Dawidek #endif
783dc68a633SPawel Jakub Dawidek 	/*
784dc68a633SPawel Jakub Dawidek 	 * Find free slot. When there is no existing free slot available,
785dc68a633SPawel Jakub Dawidek 	 * allocate one at the end.
786dc68a633SPawel Jakub Dawidek 	 */
787dc68a633SPawel Jakub Dawidek 	TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
788dc68a633SPawel Jakub Dawidek 		if (psrv2->ps_slotno != slotno) {
789dc68a633SPawel Jakub Dawidek 			KASSERT(slotno < psrv2->ps_slotno,
790dc68a633SPawel Jakub Dawidek 			    ("Invalid slotno (slotno=%d >= ps_slotno=%d",
791dc68a633SPawel Jakub Dawidek 			    slotno, psrv2->ps_slotno));
792dc68a633SPawel Jakub Dawidek 			/* We found free slot. */
793dc68a633SPawel Jakub Dawidek 			reallocate = 0;
794dc68a633SPawel Jakub Dawidek 			break;
795dc68a633SPawel Jakub Dawidek 		}
796dc68a633SPawel Jakub Dawidek 		slotno++;
797dc68a633SPawel Jakub Dawidek 	}
798dc68a633SPawel Jakub Dawidek 	psrv->ps_slotno = slotno;
799dc68a633SPawel Jakub Dawidek 	/*
800dc68a633SPawel Jakub Dawidek 	 * Keep the list sorted by slot number.
801dc68a633SPawel Jakub Dawidek 	 */
802dc68a633SPawel Jakub Dawidek 	if (psrv2 != NULL) {
803dc68a633SPawel Jakub Dawidek 		KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
804dc68a633SPawel Jakub Dawidek 		TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
805dc68a633SPawel Jakub Dawidek 	} else {
806dc68a633SPawel Jakub Dawidek 		KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
807dc68a633SPawel Jakub Dawidek 		TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
808dc68a633SPawel Jakub Dawidek 	}
809dc68a633SPawel Jakub Dawidek 	prison_service_slots++;
810dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
811dc68a633SPawel Jakub Dawidek 	/*
812dc68a633SPawel Jakub Dawidek 	 * Allocate memory for new slot if we didn't found empty one.
813dc68a633SPawel Jakub Dawidek 	 * Do not use realloc(9), because pr_slots is protected with a mutex,
814dc68a633SPawel Jakub Dawidek 	 * so we can't sleep.
815dc68a633SPawel Jakub Dawidek 	 */
816dc68a633SPawel Jakub Dawidek 	LIST_FOREACH(pr, &allprison, pr_list) {
817dc68a633SPawel Jakub Dawidek 		if (reallocate) {
818dc68a633SPawel Jakub Dawidek 			/* First allocate memory with M_WAITOK. */
819dc68a633SPawel Jakub Dawidek 			slots = malloc(sizeof(*slots) * prison_service_slots,
820dc68a633SPawel Jakub Dawidek 			    M_PRISON, M_WAITOK);
821dc68a633SPawel Jakub Dawidek 			/* Now grab the mutex and replace pr_slots. */
822dc68a633SPawel Jakub Dawidek 			mtx_lock(&pr->pr_mtx);
823dc68a633SPawel Jakub Dawidek 			oldslots = pr->pr_slots;
824dc68a633SPawel Jakub Dawidek 			if (psrv->ps_slotno > 0) {
825dc68a633SPawel Jakub Dawidek 				bcopy(oldslots, slots,
826dc68a633SPawel Jakub Dawidek 				    sizeof(*slots) * (prison_service_slots - 1));
827dc68a633SPawel Jakub Dawidek 			}
828dc68a633SPawel Jakub Dawidek 			slots[psrv->ps_slotno] = NULL;
829dc68a633SPawel Jakub Dawidek 			pr->pr_slots = slots;
830dc68a633SPawel Jakub Dawidek 			mtx_unlock(&pr->pr_mtx);
831dc68a633SPawel Jakub Dawidek 			if (oldslots != NULL)
832dc68a633SPawel Jakub Dawidek 				free(oldslots, M_PRISON);
833dc68a633SPawel Jakub Dawidek 		}
834dc68a633SPawel Jakub Dawidek 		/*
835dc68a633SPawel Jakub Dawidek 		 * Call 'create' method for each existing jail.
836dc68a633SPawel Jakub Dawidek 		 */
837dc68a633SPawel Jakub Dawidek 		psrv->ps_create(psrv, pr);
838dc68a633SPawel Jakub Dawidek 	}
839dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
840dc68a633SPawel Jakub Dawidek 
841dc68a633SPawel Jakub Dawidek 	return (psrv);
842dc68a633SPawel Jakub Dawidek }
843dc68a633SPawel Jakub Dawidek 
844dc68a633SPawel Jakub Dawidek void
845dc68a633SPawel Jakub Dawidek prison_service_deregister(struct prison_service *psrv)
846dc68a633SPawel Jakub Dawidek {
847dc68a633SPawel Jakub Dawidek 	struct prison *pr;
848dc68a633SPawel Jakub Dawidek 	void **slots, **oldslots;
849dc68a633SPawel Jakub Dawidek 	int last = 0;
850dc68a633SPawel Jakub Dawidek 
851dc68a633SPawel Jakub Dawidek 	sx_xlock(&allprison_lock);
852dc68a633SPawel Jakub Dawidek 	if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
853dc68a633SPawel Jakub Dawidek 		last = 1;
854dc68a633SPawel Jakub Dawidek 	TAILQ_REMOVE(&prison_services, psrv, ps_next);
855dc68a633SPawel Jakub Dawidek 	prison_service_slots--;
856dc68a633SPawel Jakub Dawidek 	sx_downgrade(&allprison_lock);
857dc68a633SPawel Jakub Dawidek 	LIST_FOREACH(pr, &allprison, pr_list) {
858dc68a633SPawel Jakub Dawidek 		/*
859dc68a633SPawel Jakub Dawidek 		 * Call 'destroy' method for every currently existing jail.
860dc68a633SPawel Jakub Dawidek 		 */
861dc68a633SPawel Jakub Dawidek 		psrv->ps_destroy(psrv, pr);
862dc68a633SPawel Jakub Dawidek 		/*
863dc68a633SPawel Jakub Dawidek 		 * If this is the last slot, free the memory allocated for it.
864dc68a633SPawel Jakub Dawidek 		 */
865dc68a633SPawel Jakub Dawidek 		if (last) {
866dc68a633SPawel Jakub Dawidek 			if (prison_service_slots == 0)
867dc68a633SPawel Jakub Dawidek 				slots = NULL;
868dc68a633SPawel Jakub Dawidek 			else {
869dc68a633SPawel Jakub Dawidek 				slots = malloc(sizeof(*slots) * prison_service_slots,
870dc68a633SPawel Jakub Dawidek 				    M_PRISON, M_WAITOK);
871dc68a633SPawel Jakub Dawidek 			}
872dc68a633SPawel Jakub Dawidek 			mtx_lock(&pr->pr_mtx);
873dc68a633SPawel Jakub Dawidek 			oldslots = pr->pr_slots;
874dc68a633SPawel Jakub Dawidek 			/*
875dc68a633SPawel Jakub Dawidek 			 * We require setting slot to NULL after freeing it,
876dc68a633SPawel Jakub Dawidek 			 * this way we can check for memory leaks here.
877dc68a633SPawel Jakub Dawidek 			 */
878dc68a633SPawel Jakub Dawidek 			KASSERT(oldslots[psrv->ps_slotno] == NULL,
879dc68a633SPawel Jakub Dawidek 			    ("Slot %d (service %s, jailid=%d) still contains data?",
880dc68a633SPawel Jakub Dawidek 			     psrv->ps_slotno, psrv->ps_name, pr->pr_id));
881dc68a633SPawel Jakub Dawidek 			if (psrv->ps_slotno > 0) {
882dc68a633SPawel Jakub Dawidek 				bcopy(oldslots, slots,
883dc68a633SPawel Jakub Dawidek 				    sizeof(*slots) * prison_service_slots);
884dc68a633SPawel Jakub Dawidek 			}
885dc68a633SPawel Jakub Dawidek 			pr->pr_slots = slots;
886dc68a633SPawel Jakub Dawidek 			mtx_unlock(&pr->pr_mtx);
887dc68a633SPawel Jakub Dawidek 			KASSERT(oldslots != NULL, ("oldslots == NULL"));
888dc68a633SPawel Jakub Dawidek 			free(oldslots, M_PRISON);
889dc68a633SPawel Jakub Dawidek 		}
890dc68a633SPawel Jakub Dawidek 	}
891dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
892dc68a633SPawel Jakub Dawidek 	free(psrv, M_PRISON);
893dc68a633SPawel Jakub Dawidek }
894dc68a633SPawel Jakub Dawidek 
895dc68a633SPawel Jakub Dawidek /*
896dc68a633SPawel Jakub Dawidek  * Function sets data for the given jail in slot assigned for the given
897dc68a633SPawel Jakub Dawidek  * jail service.
898dc68a633SPawel Jakub Dawidek  */
899dc68a633SPawel Jakub Dawidek void
900dc68a633SPawel Jakub Dawidek prison_service_data_set(struct prison_service *psrv, struct prison *pr,
901dc68a633SPawel Jakub Dawidek     void *data)
902dc68a633SPawel Jakub Dawidek {
903dc68a633SPawel Jakub Dawidek 
904dc68a633SPawel Jakub Dawidek 	mtx_assert(&pr->pr_mtx, MA_OWNED);
905dc68a633SPawel Jakub Dawidek 	pr->pr_slots[psrv->ps_slotno] = data;
906dc68a633SPawel Jakub Dawidek }
907dc68a633SPawel Jakub Dawidek 
908dc68a633SPawel Jakub Dawidek /*
909dc68a633SPawel Jakub Dawidek  * Function clears slots assigned for the given jail service in the given
910dc68a633SPawel Jakub Dawidek  * prison structure and returns current slot data.
911dc68a633SPawel Jakub Dawidek  */
912dc68a633SPawel Jakub Dawidek void *
913dc68a633SPawel Jakub Dawidek prison_service_data_del(struct prison_service *psrv, struct prison *pr)
914dc68a633SPawel Jakub Dawidek {
915dc68a633SPawel Jakub Dawidek 	void *data;
916dc68a633SPawel Jakub Dawidek 
917dc68a633SPawel Jakub Dawidek 	mtx_assert(&pr->pr_mtx, MA_OWNED);
918dc68a633SPawel Jakub Dawidek 	data = pr->pr_slots[psrv->ps_slotno];
919dc68a633SPawel Jakub Dawidek 	pr->pr_slots[psrv->ps_slotno] = NULL;
920dc68a633SPawel Jakub Dawidek 	return (data);
921dc68a633SPawel Jakub Dawidek }
922dc68a633SPawel Jakub Dawidek 
923dc68a633SPawel Jakub Dawidek /*
924dc68a633SPawel Jakub Dawidek  * Function returns current data from the slot assigned to the given jail
925dc68a633SPawel Jakub Dawidek  * service for the given jail.
926dc68a633SPawel Jakub Dawidek  */
927dc68a633SPawel Jakub Dawidek void *
928dc68a633SPawel Jakub Dawidek prison_service_data_get(struct prison_service *psrv, struct prison *pr)
929dc68a633SPawel Jakub Dawidek {
930dc68a633SPawel Jakub Dawidek 
931dc68a633SPawel Jakub Dawidek 	mtx_assert(&pr->pr_mtx, MA_OWNED);
932dc68a633SPawel Jakub Dawidek 	return (pr->pr_slots[psrv->ps_slotno]);
933dc68a633SPawel Jakub Dawidek }
934dc68a633SPawel Jakub Dawidek 
935fd7a8150SMike Barcroft static int
936fd7a8150SMike Barcroft sysctl_jail_list(SYSCTL_HANDLER_ARGS)
937fd7a8150SMike Barcroft {
938fd7a8150SMike Barcroft 	struct xprison *xp, *sxp;
939fd7a8150SMike Barcroft 	struct prison *pr;
940fd7a8150SMike Barcroft 	int count, error;
941fd7a8150SMike Barcroft 
9427f4704c0SPawel Jakub Dawidek 	if (jailed(req->td->td_ucred))
943679a1060SRobert Watson 		return (0);
944fd7a8150SMike Barcroft 
945dc68a633SPawel Jakub Dawidek 	sx_slock(&allprison_lock);
946dc68a633SPawel Jakub Dawidek 	if ((count = prisoncount) == 0) {
947dc68a633SPawel Jakub Dawidek 		sx_sunlock(&allprison_lock);
948fd7a8150SMike Barcroft 		return (0);
949dc68a633SPawel Jakub Dawidek 	}
950fd7a8150SMike Barcroft 
951fd7a8150SMike Barcroft 	sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO);
952fd7a8150SMike Barcroft 
953fd7a8150SMike Barcroft 	LIST_FOREACH(pr, &allprison, pr_list) {
954fd7a8150SMike Barcroft 		xp->pr_version = XPRISON_VERSION;
955fd7a8150SMike Barcroft 		xp->pr_id = pr->pr_id;
956fd7a8150SMike Barcroft 		xp->pr_ip = pr->pr_ip;
957b63b0c65SPawel Jakub Dawidek 		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
958b63b0c65SPawel Jakub Dawidek 		mtx_lock(&pr->pr_mtx);
959b63b0c65SPawel Jakub Dawidek 		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));
960fd7a8150SMike Barcroft 		mtx_unlock(&pr->pr_mtx);
961fd7a8150SMike Barcroft 		xp++;
962fd7a8150SMike Barcroft 	}
963dc68a633SPawel Jakub Dawidek 	sx_sunlock(&allprison_lock);
964fd7a8150SMike Barcroft 
965fd7a8150SMike Barcroft 	error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count);
966fd7a8150SMike Barcroft 	free(sxp, M_TEMP);
967fd7a8150SMike Barcroft 	return (error);
968fd7a8150SMike Barcroft }
969fd7a8150SMike Barcroft 
970fd7a8150SMike Barcroft SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD,
971fd7a8150SMike Barcroft     NULL, 0, sysctl_jail_list, "S", "List of active jails");
972461167c2SPawel Jakub Dawidek 
973461167c2SPawel Jakub Dawidek static int
974461167c2SPawel Jakub Dawidek sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
975461167c2SPawel Jakub Dawidek {
976461167c2SPawel Jakub Dawidek 	int error, injail;
977461167c2SPawel Jakub Dawidek 
978461167c2SPawel Jakub Dawidek 	injail = jailed(req->td->td_ucred);
979461167c2SPawel Jakub Dawidek 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
980461167c2SPawel Jakub Dawidek 
981461167c2SPawel Jakub Dawidek 	return (error);
982461167c2SPawel Jakub Dawidek }
983461167c2SPawel Jakub Dawidek SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD,
984461167c2SPawel Jakub Dawidek     NULL, 0, sysctl_jail_jailed, "I", "Process in jail?");
985