1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1999 Poul-Henning Kamp.
5 * Copyright (c) 2009 James Gritton.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #ifndef _SYS_JAIL_H_
31 #define _SYS_JAIL_H_
32
33 #ifdef _KERNEL
34 struct jail_v0 {
35 u_int32_t version;
36 char *path;
37 char *hostname;
38 u_int32_t ip_number;
39 };
40 #endif
41
42 struct jail {
43 uint32_t version;
44 char *path;
45 char *hostname;
46 char *jailname;
47 uint32_t ip4s;
48 uint32_t ip6s;
49 struct in_addr *ip4;
50 struct in6_addr *ip6;
51 };
52 #define JAIL_API_VERSION 2
53
54 /*
55 * For all xprison structs, always keep the pr_version an int and
56 * the first variable so userspace can easily distinguish them.
57 */
58 #ifndef _KERNEL
59 struct xprison_v1 {
60 int pr_version;
61 int pr_id;
62 char pr_path[MAXPATHLEN];
63 char pr_host[MAXHOSTNAMELEN];
64 u_int32_t pr_ip;
65 };
66 #endif
67
68 struct xprison {
69 int pr_version;
70 int pr_id;
71 int pr_state;
72 cpusetid_t pr_cpusetid;
73 char pr_path[MAXPATHLEN];
74 char pr_host[MAXHOSTNAMELEN];
75 char pr_name[MAXHOSTNAMELEN];
76 uint32_t pr_ip4s;
77 uint32_t pr_ip6s;
78 #if 0
79 /*
80 * sizeof(xprison) will be malloced + size needed for all
81 * IPv4 and IPv6 addesses. Offsets are based numbers of addresses.
82 */
83 struct in_addr pr_ip4[];
84 struct in6_addr pr_ip6[];
85 #endif
86 };
87 #define XPRISON_VERSION 3
88
89 enum prison_state {
90 PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */
91 PRISON_STATE_ALIVE, /* Current prison, visible to all */
92 PRISON_STATE_DYING /* Removed but holding resources, */
93 }; /* optionally visible. */
94
95 /*
96 * Flags for jail_set and jail_get.
97 */
98 #define JAIL_CREATE 0x01 /* Create jail if it doesn't exist */
99 #define JAIL_UPDATE 0x02 /* Update parameters of existing jail */
100 #define JAIL_ATTACH 0x04 /* Attach to jail upon creation */
101 #define JAIL_DYING 0x08 /* Allow getting a dying jail */
102 #define JAIL_SET_MASK 0x0f /* JAIL_DYING is deprecated/ignored here */
103 #define JAIL_GET_MASK 0x08
104
105 #define JAIL_SYS_DISABLE 0
106 #define JAIL_SYS_NEW 1
107 #define JAIL_SYS_INHERIT 2
108
109 #ifndef _KERNEL
110
111 struct iovec;
112
113 __BEGIN_DECLS
114 int jail(struct jail *);
115 int jail_set(struct iovec *, unsigned int, int);
116 int jail_get(struct iovec *, unsigned int, int);
117 int jail_attach(int);
118 int jail_remove(int);
119 __END_DECLS
120
121 #else /* _KERNEL */
122
123 #include <sys/queue.h>
124 #include <sys/sysctl.h>
125 #include <sys/lock.h>
126 #include <sys/mutex.h>
127 #include <sys/_task.h>
128
129 #define JAIL_MAX 999999
130
131 #ifdef MALLOC_DECLARE
132 MALLOC_DECLARE(M_PRISON);
133 #endif
134 #endif /* _KERNEL */
135
136 #if defined(_KERNEL) || defined(_WANT_PRISON)
137
138 #include <sys/osd.h>
139
140 #define HOSTUUIDLEN 64
141 #define DEFAULT_HOSTUUID "00000000-0000-0000-0000-000000000000"
142 #define OSRELEASELEN 32
143
144 #define JAIL_META_PRIVATE "meta"
145 #define JAIL_META_SHARED "env"
146
147 struct racct;
148 struct prison_racct;
149
150 typedef enum {
151 PR_INET = 0,
152 PR_INET6 = 1,
153 PR_FAMILY_MAX = 2,
154 } pr_family_t;
155
156 /*
157 * This structure describes a prison. It is pointed to by all struct
158 * ucreds's of the inmates. pr_ref keeps track of them and is used to
159 * delete the structure when the last inmate is dead.
160 *
161 * Lock key:
162 * (a) allprison_lock
163 * (A) allproc_lock
164 * (c) set only during creation before the structure is shared, no mutex
165 * required to read
166 * (m) locked by pr_mtx
167 * (p) locked by pr_mtx, and also at least shared allprison_lock required
168 * to update
169 * (q) locked by both pr_mtx and allprison_lock
170 * (r) atomic via refcount(9), pr_mtx and allprison_lock required to
171 * decrement to zero
172 * (n) read access granted with the network epoch
173 */
174 struct prison {
175 TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
176 int pr_id; /* (c) prison id */
177 volatile u_int pr_ref; /* (r) refcount */
178 volatile u_int pr_uref; /* (r) user (alive) refcount */
179 unsigned pr_flags; /* (p) PR_* flags */
180 LIST_HEAD(, prison) pr_children; /* (a) list of child jails */
181 LIST_HEAD(, proc) pr_proclist; /* (A) list of jailed processes */
182 LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */
183 struct prison *pr_parent; /* (c) containing jail */
184 struct mtx pr_mtx;
185 struct task pr_task; /* (c) destroy task */
186 struct osd pr_osd; /* (p) additional data */
187 struct cpuset *pr_cpuset; /* (p) cpuset */
188 struct vnet *pr_vnet; /* (c) network stack */
189 struct vnode *pr_root; /* (c) vnode to rdir */
190 struct prison_ip *pr_addrs[PR_FAMILY_MAX]; /* (p,n) IPs of jail */
191 struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */
192 void *pr_sparep[3];
193 int pr_childcount; /* (a) number of child jails */
194 int pr_childmax; /* (p) maximum child jails */
195 unsigned pr_allow; /* (p) PR_ALLOW_* flags */
196 int pr_securelevel; /* (p) securelevel */
197 int pr_enforce_statfs; /* (p) statfs permission */
198 int pr_devfs_rsnum; /* (p) devfs ruleset */
199 enum prison_state pr_state; /* (q) state in life cycle */
200 volatile int pr_exportcnt; /* (r) count of mount exports */
201 int pr_spare;
202 int pr_osreldate; /* (c) kern.osreldate value */
203 unsigned long pr_hostid; /* (p) jail hostid */
204 char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
205 char pr_path[MAXPATHLEN]; /* (c) chroot path */
206 char pr_hostname[MAXHOSTNAMELEN]; /* (p) jail hostname */
207 char pr_domainname[MAXHOSTNAMELEN]; /* (p) jail domainname */
208 char pr_hostuuid[HOSTUUIDLEN]; /* (p) jail hostuuid */
209 char pr_osrelease[OSRELEASELEN]; /* (c) kern.osrelease value */
210 };
211
212 struct prison_racct {
213 LIST_ENTRY(prison_racct) prr_next;
214 char prr_name[MAXHOSTNAMELEN];
215 u_int prr_refcount;
216 struct racct *prr_racct;
217 };
218 #endif /* _KERNEL || _WANT_PRISON */
219
220 #ifdef _KERNEL
221 /* Flag bits set via options */
222 #define PR_PERSIST 0x00000001 /* Can exist without processes */
223 #define PR_HOST 0x00000002 /* Virtualize hostname et al */
224 #define PR_IP4_USER 0x00000004 /* Restrict IPv4 addresses */
225 #define PR_IP6_USER 0x00000008 /* Restrict IPv6 addresses */
226 #define PR_VNET 0x00000010 /* Virtual network stack */
227 #define PR_IP4_SADDRSEL 0x00000080 /* Do IPv4 src addr sel. or use the */
228 /* primary jail address. */
229 #define PR_IP6_SADDRSEL 0x00000100 /* Do IPv6 src addr sel. or use the */
230 /* primary jail address. */
231
232 /* Internal flag bits */
233 #define PR_REMOVE 0x01000000 /* In process of being removed */
234 #define PR_IP4 0x02000000 /* IPv4 restricted or disabled */
235 /* by this jail or an ancestor */
236 #define PR_IP6 0x04000000 /* IPv6 restricted or disabled */
237 /* by this jail or an ancestor */
238 #define PR_COMPLETE_PROC 0x08000000 /* prison_complete called from */
239 /* prison_proc_free, releases uref */
240
241 /*
242 * Flags for pr_allow
243 * Bits not noted here may be used for dynamic allow.mount.xxxfs.
244 */
245 #define PR_ALLOW_SET_HOSTNAME 0x00000001
246 #define PR_ALLOW_SYSVIPC 0x00000002
247 #define PR_ALLOW_RAW_SOCKETS 0x00000004
248 #define PR_ALLOW_CHFLAGS 0x00000008
249 #define PR_ALLOW_MOUNT 0x00000010
250 #define PR_ALLOW_QUOTAS 0x00000020
251 #define PR_ALLOW_SOCKET_AF 0x00000040
252 #define PR_ALLOW_MLOCK 0x00000080
253 #define PR_ALLOW_READ_MSGBUF 0x00000100
254 #define PR_ALLOW_UNPRIV_DEBUG 0x00000200
255 #define PR_ALLOW_SUSER 0x00000400
256 #define PR_ALLOW_RESERVED_PORTS 0x00008000
257 #define PR_ALLOW_KMEM_ACCESS 0x00010000 /* reserved, not used yet */
258 #define PR_ALLOW_NFSD 0x00020000
259 #define PR_ALLOW_EXTATTR 0x00040000
260 #define PR_ALLOW_ADJTIME 0x00080000
261 #define PR_ALLOW_SETTIME 0x00100000
262 #define PR_ALLOW_ROUTING 0x00200000
263 #define PR_ALLOW_UNPRIV_PARENT_TAMPER 0x00400000
264
265 /*
266 * PR_ALLOW_PRISON0 are the allow flags that we apply by default to prison0,
267 * while PR_ALLOW_ALL_STATIC are all of the allow bits that we have allocated at
268 * build time. PR_ALLOW_ALL_STATIC should contain any bit above that we expect
269 * to be used on the system, while PR_ALLOW_PRISON0 will be some subset of that.
270 */
271 #define PR_ALLOW_ALL_STATIC 0x007f87ff
272 #define PR_ALLOW_PRISON0 \
273 (PR_ALLOW_ALL_STATIC & ~(PR_ALLOW_UNPRIV_PARENT_TAMPER))
274
275 /*
276 * PR_ALLOW_DIFFERENCES determines which flags are able to be
277 * different between the parent and child jail upon creation.
278 */
279 #define PR_ALLOW_DIFFERENCES \
280 (PR_ALLOW_UNPRIV_DEBUG | PR_ALLOW_UNPRIV_PARENT_TAMPER)
281
282 /*
283 * OSD methods
284 */
285 #define PR_METHOD_CREATE 0
286 #define PR_METHOD_GET 1
287 #define PR_METHOD_SET 2
288 #define PR_METHOD_CHECK 3
289 #define PR_METHOD_ATTACH 4
290 #define PR_METHOD_REMOVE 5
291 #define PR_MAXMETHOD 6
292
293 /*
294 * Lock/unlock a prison.
295 * XXX These exist not so much for general convenience, but to be useable in
296 * the FOREACH_PRISON_DESCENDANT_LOCKED macro which can't handle them in
297 * non-function form as currently defined.
298 */
299 static __inline void
prison_lock(struct prison * pr)300 prison_lock(struct prison *pr)
301 {
302
303 mtx_lock(&pr->pr_mtx);
304 }
305
306 static __inline void
prison_unlock(struct prison * pr)307 prison_unlock(struct prison *pr)
308 {
309
310 mtx_unlock(&pr->pr_mtx);
311 }
312
313 /* Traverse a prison's immediate children. */
314 #define FOREACH_PRISON_CHILD(ppr, cpr) \
315 LIST_FOREACH(cpr, &(ppr)->pr_children, pr_sibling)
316
317 /*
318 * Preorder traversal of all of a prison's descendants.
319 * This ugly loop allows the macro to be followed by a single block
320 * as expected in a looping primitive.
321 */
322 #define FOREACH_PRISON_DESCENDANT(ppr, cpr, descend) \
323 for ((cpr) = (ppr), (descend) = 1; \
324 ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \
325 ? LIST_FIRST(&(cpr)->pr_children) \
326 : ((cpr) == (ppr) \
327 ? NULL \
328 : (((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
329 ? LIST_NEXT(cpr, pr_sibling) \
330 : (cpr)->pr_parent))));) \
331 if (!(descend)) \
332 ; \
333 else
334
335 /*
336 * As above, but lock descendants on the way down and unlock on the way up.
337 */
338 #define FOREACH_PRISON_DESCENDANT_LOCKED(ppr, cpr, descend) \
339 for ((cpr) = (ppr), (descend) = 1; \
340 ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \
341 ? LIST_FIRST(&(cpr)->pr_children) \
342 : ((cpr) == (ppr) \
343 ? NULL \
344 : ((prison_unlock(cpr), \
345 (descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
346 ? LIST_NEXT(cpr, pr_sibling) \
347 : (cpr)->pr_parent))));) \
348 if ((descend) ? (prison_lock(cpr), 0) : 1) \
349 ; \
350 else
351
352 /*
353 * As above, but also keep track of the level descended to.
354 */
355 #define FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(ppr, cpr, descend, level)\
356 for ((cpr) = (ppr), (descend) = 1, (level) = 0; \
357 ((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \
358 ? (level++, LIST_FIRST(&(cpr)->pr_children)) \
359 : ((cpr) == (ppr) \
360 ? NULL \
361 : ((prison_unlock(cpr), \
362 (descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
363 ? LIST_NEXT(cpr, pr_sibling) \
364 : (level--, (cpr)->pr_parent)))));) \
365 if ((descend) ? (prison_lock(cpr), 0) : 1) \
366 ; \
367 else
368
369 /*
370 * Traverse a prison's descendants, visiting both preorder and postorder.
371 */
372 #define FOREACH_PRISON_DESCENDANT_PRE_POST(ppr, cpr, descend) \
373 for ((cpr) = (ppr), (descend) = 1; \
374 ((cpr) = (descend) \
375 ? ((descend) = !LIST_EMPTY(&(cpr)->pr_children)) \
376 ? LIST_FIRST(&(cpr)->pr_children) \
377 : (cpr) \
378 : ((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
379 ? LIST_NEXT(cpr, pr_sibling) \
380 : cpr->pr_parent) != (ppr);)
381
382 /*
383 * Attributes of the physical system, and the root of the jail tree.
384 */
385 extern struct prison prison0;
386
387 TAILQ_HEAD(prisonlist, prison);
388 extern struct prisonlist allprison;
389 extern struct sx allprison_lock;
390
391 /*
392 * Sysctls to describe jail parameters.
393 */
394 SYSCTL_DECL(_security_jail);
395 SYSCTL_DECL(_security_jail_param);
396
397 #define SYSCTL_JAIL_PARAM_DECL(name) \
398 SYSCTL_DECL(_security_jail_param_##name)
399 #define SYSCTL_JAIL_PARAM(module, param, type, fmt, descr) \
400 SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param, \
401 (type) | CTLFLAG_MPSAFE, NULL, 0, sysctl_jail_param, fmt, descr)
402 #define SYSCTL_JAIL_PARAM_STRING(module, param, access, len, descr) \
403 SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param, \
404 CTLTYPE_STRING | CTLFLAG_MPSAFE | (access), NULL, len, \
405 sysctl_jail_param, "A", descr)
406 #define SYSCTL_JAIL_PARAM_STRUCT(module, param, access, len, fmt, descr) \
407 SYSCTL_PROC(_security_jail_param ## module, OID_AUTO, param, \
408 CTLTYPE_STRUCT | CTLFLAG_MPSAFE | (access), NULL, len, \
409 sysctl_jail_param, fmt, descr)
410 #define SYSCTL_JAIL_PARAM_NODE(module, descr) \
411 SYSCTL_NODE(_security_jail_param, OID_AUTO, module, CTLFLAG_MPSAFE, \
412 0, descr)
413 #define SYSCTL_JAIL_PARAM_SUBNODE(parent, module, descr) \
414 SYSCTL_NODE(_security_jail_param_##parent, OID_AUTO, module, \
415 CTLFLAG_MPSAFE, 0, descr)
416 #define SYSCTL_JAIL_PARAM_SYS_NODE(module, access, descr) \
417 SYSCTL_JAIL_PARAM_NODE(module, descr); \
418 SYSCTL_JAIL_PARAM(_##module, , CTLTYPE_INT | (access), "E,jailsys", \
419 descr)
420 #define SYSCTL_JAIL_PARAM_SYS_SUBNODE(parent, module, access, descr) \
421 SYSCTL_JAIL_PARAM_SUBNODE(parent, module, descr); \
422 SYSCTL_JAIL_PARAM(_##parent##_##module, , CTLTYPE_INT | (access), \
423 "E,jailsys", descr)
424
425 /*
426 * Kernel support functions for jail().
427 */
428 struct ucred;
429 struct mount;
430 struct sockaddr;
431 struct statfs;
432 struct vfsconf;
433
434 /*
435 * Return 1 if the passed credential is in a jail, otherwise 0.
436 */
437 #define jailed(cred) (cred->cr_prison != &prison0)
438
439 bool jailed_without_vnet(struct ucred *);
440 void getcredhostname(struct ucred *, char *, size_t);
441 void getcreddomainname(struct ucred *, char *, size_t);
442 void getcredhostuuid(struct ucred *, char *, size_t);
443 void getcredhostid(struct ucred *, unsigned long *);
444 void getjailname(struct ucred *cred, char *name, size_t len);
445 void prison0_init(void);
446 bool prison_allow(struct ucred *, unsigned);
447 int prison_check(struct ucred *cred1, struct ucred *cred2);
448 bool prison_check_nfsd(struct ucred *cred);
449 bool prison_owns_vnet(struct prison *pr);
450 int prison_canseemount(struct ucred *cred, struct mount *mp);
451 void prison_enforce_statfs(struct ucred *cred, struct mount *mp,
452 struct statfs *sp);
453 struct prison *prison_find(int prid);
454 struct prison *prison_find_child(struct prison *, int);
455 struct prison *prison_find_name(struct prison *, const char *);
456 bool prison_flag(struct ucred *, unsigned);
457 void prison_free(struct prison *pr);
458 void prison_free_locked(struct prison *pr);
459 void prison_hold(struct prison *pr);
460 void prison_hold_locked(struct prison *pr);
461 void prison_proc_hold(struct prison *);
462 void prison_proc_free(struct prison *);
463 void prison_proc_link(struct prison *, struct proc *);
464 void prison_proc_unlink(struct prison *, struct proc *);
465 void prison_proc_iterate(struct prison *, void (*)(struct proc *, void *), void *);
466 void prison_set_allow(struct ucred *cred, unsigned flag, int enable);
467 bool prison_ischild(struct prison *, struct prison *);
468 bool prison_isalive(const struct prison *);
469 bool prison_isvalid(struct prison *);
470 #if defined(INET) || defined(INET6)
471 int prison_ip_check(const struct prison *, const pr_family_t, const void *);
472 const void *prison_ip_get0(const struct prison *, const pr_family_t);
473 u_int prison_ip_cnt(const struct prison *, const pr_family_t);
474 #endif
475 #ifdef INET
476 bool prison_equal_ip4(struct prison *, struct prison *);
477 int prison_get_ip4(struct ucred *cred, struct in_addr *ia);
478 int prison_local_ip4(struct ucred *cred, struct in_addr *ia);
479 int prison_remote_ip4(struct ucred *cred, struct in_addr *ia);
480 int prison_check_ip4(const struct ucred *, const struct in_addr *);
481 int prison_check_ip4_locked(const struct prison *, const struct in_addr *);
482 bool prison_saddrsel_ip4(struct ucred *, struct in_addr *);
483 int prison_qcmp_v4(const void *, const void *);
484 bool prison_valid_v4(const void *);
485 #endif
486 #ifdef INET6
487 bool prison_equal_ip6(struct prison *, struct prison *);
488 int prison_get_ip6(struct ucred *, struct in6_addr *);
489 int prison_local_ip6(struct ucred *, struct in6_addr *, int);
490 int prison_remote_ip6(struct ucred *, struct in6_addr *);
491 int prison_check_ip6(const struct ucred *, const struct in6_addr *);
492 int prison_check_ip6_locked(const struct prison *, const struct in6_addr *);
493 bool prison_saddrsel_ip6(struct ucred *, struct in6_addr *);
494 int prison_qcmp_v6(const void *, const void *);
495 bool prison_valid_v6(const void *);
496 #endif
497 int prison_check_af(struct ucred *cred, int af);
498 int prison_if(struct ucred *cred, const struct sockaddr *sa);
499 char *prison_name(struct prison *, struct prison *);
500 int prison_priv_check(struct ucred *cred, int priv);
501 int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
502 unsigned prison_add_allow(const char *prefix, const char *name,
503 const char *prefix_descr, const char *descr);
504 void prison_add_vfs(struct vfsconf *vfsp);
505 void prison_racct_foreach(void (*callback)(struct racct *racct,
506 void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
507 void *arg2, void *arg3);
508 struct prison_racct *prison_racct_find(const char *name);
509 void prison_racct_hold(struct prison_racct *prr);
510 void prison_racct_free(struct prison_racct *prr);
511
512 #endif /* _KERNEL */
513 #endif /* !_SYS_JAIL_H_ */
514