xref: /freebsd/sys/netinet/in_jail.c (revision be996c05224c3d82f26f94315c760776c3f2896c)
1 /*-
2  * Copyright (c) 1999 Poul-Henning Kamp.
3  * Copyright (c) 2008 Bjoern A. Zeeb.
4  * Copyright (c) 2009 James Gritton.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 #include "opt_ddb.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/kernel.h>
40 #include <sys/systm.h>
41 #include <sys/errno.h>
42 #include <sys/sysproto.h>
43 #include <sys/malloc.h>
44 #include <sys/osd.h>
45 #include <sys/priv.h>
46 #include <sys/proc.h>
47 #include <sys/taskqueue.h>
48 #include <sys/fcntl.h>
49 #include <sys/jail.h>
50 #include <sys/lock.h>
51 #include <sys/mutex.h>
52 #include <sys/racct.h>
53 #include <sys/refcount.h>
54 #include <sys/sx.h>
55 #include <sys/sysent.h>
56 #include <sys/namei.h>
57 #include <sys/mount.h>
58 #include <sys/queue.h>
59 #include <sys/socket.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysctl.h>
62 #include <sys/vnode.h>
63 
64 #include <net/if.h>
65 #include <net/vnet.h>
66 
67 #include <netinet/in.h>
68 
69 int
70 prison_qcmp_v4(const void *ip1, const void *ip2)
71 {
72 	in_addr_t iaa, iab;
73 
74 	/*
75 	 * We need to compare in HBO here to get the list sorted as expected
76 	 * by the result of the code.  Sorting NBO addresses gives you
77 	 * interesting results.  If you do not understand, do not try.
78 	 */
79 	iaa = ntohl(((const struct in_addr *)ip1)->s_addr);
80 	iab = ntohl(((const struct in_addr *)ip2)->s_addr);
81 
82 	/*
83 	 * Do not simply return the difference of the two numbers, the int is
84 	 * not wide enough.
85 	 */
86 	if (iaa > iab)
87 		return (1);
88 	else if (iaa < iab)
89 		return (-1);
90 	else
91 		return (0);
92 }
93 
94 /*
95  * Restrict a prison's IP address list with its parent's, possibly replacing
96  * it.  Return true if the replacement buffer was used (or would have been).
97  */
98 int
99 prison_restrict_ip4(struct prison *pr, struct in_addr *newip4)
100 {
101 	int ii, ij, used;
102 	struct prison *ppr;
103 
104 	ppr = pr->pr_parent;
105 	if (!(pr->pr_flags & PR_IP4_USER)) {
106 		/* This has no user settings, so just copy the parent's list. */
107 		if (pr->pr_ip4s < ppr->pr_ip4s) {
108 			/*
109 			 * There's no room for the parent's list.  Use the
110 			 * new list buffer, which is assumed to be big enough
111 			 * (if it was passed).  If there's no buffer, try to
112 			 * allocate one.
113 			 */
114 			used = 1;
115 			if (newip4 == NULL) {
116 				newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4),
117 				    M_PRISON, M_NOWAIT);
118 				if (newip4 != NULL)
119 					used = 0;
120 			}
121 			if (newip4 != NULL) {
122 				bcopy(ppr->pr_ip4, newip4,
123 				    ppr->pr_ip4s * sizeof(*newip4));
124 				free(pr->pr_ip4, M_PRISON);
125 				pr->pr_ip4 = newip4;
126 				pr->pr_ip4s = ppr->pr_ip4s;
127 			}
128 			return (used);
129 		}
130 		pr->pr_ip4s = ppr->pr_ip4s;
131 		if (pr->pr_ip4s > 0)
132 			bcopy(ppr->pr_ip4, pr->pr_ip4,
133 			    pr->pr_ip4s * sizeof(*newip4));
134 		else if (pr->pr_ip4 != NULL) {
135 			free(pr->pr_ip4, M_PRISON);
136 			pr->pr_ip4 = NULL;
137 		}
138 	} else if (pr->pr_ip4s > 0) {
139 		/* Remove addresses that aren't in the parent. */
140 		for (ij = 0; ij < ppr->pr_ip4s; ij++)
141 			if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
142 				break;
143 		if (ij < ppr->pr_ip4s)
144 			ii = 1;
145 		else {
146 			bcopy(pr->pr_ip4 + 1, pr->pr_ip4,
147 			    --pr->pr_ip4s * sizeof(*pr->pr_ip4));
148 			ii = 0;
149 		}
150 		for (ij = 1; ii < pr->pr_ip4s; ) {
151 			if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) {
152 				ii++;
153 				continue;
154 			}
155 			switch (ij >= ppr->pr_ip4s ? -1 :
156 				prison_qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) {
157 			case -1:
158 				bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii,
159 				    (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4));
160 				break;
161 			case 0:
162 				ii++;
163 				ij++;
164 				break;
165 			case 1:
166 				ij++;
167 				break;
168 			}
169 		}
170 		if (pr->pr_ip4s == 0) {
171 			free(pr->pr_ip4, M_PRISON);
172 			pr->pr_ip4 = NULL;
173 		}
174 	}
175 	return (0);
176 }
177 
178 /*
179  * Pass back primary IPv4 address of this jail.
180  *
181  * If not restricted return success but do not alter the address.  Caller has
182  * to make sure to initialize it correctly (e.g. INADDR_ANY).
183  *
184  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
185  * Address returned in NBO.
186  */
187 int
188 prison_get_ip4(struct ucred *cred, struct in_addr *ia)
189 {
190 	struct prison *pr;
191 
192 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
193 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
194 
195 	pr = cred->cr_prison;
196 	if (!(pr->pr_flags & PR_IP4))
197 		return (0);
198 	mtx_lock(&pr->pr_mtx);
199 	if (!(pr->pr_flags & PR_IP4)) {
200 		mtx_unlock(&pr->pr_mtx);
201 		return (0);
202 	}
203 	if (pr->pr_ip4 == NULL) {
204 		mtx_unlock(&pr->pr_mtx);
205 		return (EAFNOSUPPORT);
206 	}
207 
208 	ia->s_addr = pr->pr_ip4[0].s_addr;
209 	mtx_unlock(&pr->pr_mtx);
210 	return (0);
211 }
212 
213 /*
214  * Return 1 if we should do proper source address selection or are not jailed.
215  * We will return 0 if we should bypass source address selection in favour
216  * of the primary jail IPv4 address. Only in this case *ia will be updated and
217  * returned in NBO.
218  * Return EAFNOSUPPORT, in case this jail does not allow IPv4.
219  */
220 int
221 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia)
222 {
223 	struct prison *pr;
224 	struct in_addr lia;
225 	int error;
226 
227 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
228 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
229 
230 	if (!jailed(cred))
231 		return (1);
232 
233 	pr = cred->cr_prison;
234 	if (pr->pr_flags & PR_IP4_SADDRSEL)
235 		return (1);
236 
237 	lia.s_addr = INADDR_ANY;
238 	error = prison_get_ip4(cred, &lia);
239 	if (error)
240 		return (error);
241 	if (lia.s_addr == INADDR_ANY)
242 		return (1);
243 
244 	ia->s_addr = lia.s_addr;
245 	return (0);
246 }
247 
248 /*
249  * Return true if pr1 and pr2 have the same IPv4 address restrictions.
250  */
251 int
252 prison_equal_ip4(struct prison *pr1, struct prison *pr2)
253 {
254 
255 	if (pr1 == pr2)
256 		return (1);
257 
258 	/*
259 	 * No need to lock since the PR_IP4_USER flag can't be altered for
260 	 * existing prisons.
261 	 */
262 	while (pr1 != &prison0 &&
263 #ifdef VIMAGE
264 	       !(pr1->pr_flags & PR_VNET) &&
265 #endif
266 	       !(pr1->pr_flags & PR_IP4_USER))
267 		pr1 = pr1->pr_parent;
268 	while (pr2 != &prison0 &&
269 #ifdef VIMAGE
270 	       !(pr2->pr_flags & PR_VNET) &&
271 #endif
272 	       !(pr2->pr_flags & PR_IP4_USER))
273 		pr2 = pr2->pr_parent;
274 	return (pr1 == pr2);
275 }
276 
277 /*
278  * Make sure our (source) address is set to something meaningful to this
279  * jail.
280  *
281  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
282  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
283  * doesn't allow IPv4.  Address passed in in NBO and returned in NBO.
284  */
285 int
286 prison_local_ip4(struct ucred *cred, struct in_addr *ia)
287 {
288 	struct prison *pr;
289 	struct in_addr ia0;
290 	int error;
291 
292 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
293 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
294 
295 	pr = cred->cr_prison;
296 	if (!(pr->pr_flags & PR_IP4))
297 		return (0);
298 	mtx_lock(&pr->pr_mtx);
299 	if (!(pr->pr_flags & PR_IP4)) {
300 		mtx_unlock(&pr->pr_mtx);
301 		return (0);
302 	}
303 	if (pr->pr_ip4 == NULL) {
304 		mtx_unlock(&pr->pr_mtx);
305 		return (EAFNOSUPPORT);
306 	}
307 
308 	ia0.s_addr = ntohl(ia->s_addr);
309 	if (ia0.s_addr == INADDR_LOOPBACK) {
310 		ia->s_addr = pr->pr_ip4[0].s_addr;
311 		mtx_unlock(&pr->pr_mtx);
312 		return (0);
313 	}
314 
315 	if (ia0.s_addr == INADDR_ANY) {
316 		/*
317 		 * In case there is only 1 IPv4 address, bind directly.
318 		 */
319 		if (pr->pr_ip4s == 1)
320 			ia->s_addr = pr->pr_ip4[0].s_addr;
321 		mtx_unlock(&pr->pr_mtx);
322 		return (0);
323 	}
324 
325 	error = prison_check_ip4_locked(pr, ia);
326 	mtx_unlock(&pr->pr_mtx);
327 	return (error);
328 }
329 
330 /*
331  * Rewrite destination address in case we will connect to loopback address.
332  *
333  * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4.
334  * Address passed in in NBO and returned in NBO.
335  */
336 int
337 prison_remote_ip4(struct ucred *cred, struct in_addr *ia)
338 {
339 	struct prison *pr;
340 
341 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
342 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
343 
344 	pr = cred->cr_prison;
345 	if (!(pr->pr_flags & PR_IP4))
346 		return (0);
347 	mtx_lock(&pr->pr_mtx);
348 	if (!(pr->pr_flags & PR_IP4)) {
349 		mtx_unlock(&pr->pr_mtx);
350 		return (0);
351 	}
352 	if (pr->pr_ip4 == NULL) {
353 		mtx_unlock(&pr->pr_mtx);
354 		return (EAFNOSUPPORT);
355 	}
356 
357 	if (ntohl(ia->s_addr) == INADDR_LOOPBACK) {
358 		ia->s_addr = pr->pr_ip4[0].s_addr;
359 		mtx_unlock(&pr->pr_mtx);
360 		return (0);
361 	}
362 
363 	/*
364 	 * Return success because nothing had to be changed.
365 	 */
366 	mtx_unlock(&pr->pr_mtx);
367 	return (0);
368 }
369 
370 /*
371  * Check if given address belongs to the jail referenced by cred/prison.
372  *
373  * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail,
374  * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail
375  * doesn't allow IPv4.  Address passed in in NBO.
376  */
377 int
378 prison_check_ip4_locked(const struct prison *pr, const struct in_addr *ia)
379 {
380 	int i, a, z, d;
381 
382 	/*
383 	 * Check the primary IP.
384 	 */
385 	if (pr->pr_ip4[0].s_addr == ia->s_addr)
386 		return (0);
387 
388 	/*
389 	 * All the other IPs are sorted so we can do a binary search.
390 	 */
391 	a = 0;
392 	z = pr->pr_ip4s - 2;
393 	while (a <= z) {
394 		i = (a + z) / 2;
395 		d = prison_qcmp_v4(&pr->pr_ip4[i+1], ia);
396 		if (d > 0)
397 			z = i - 1;
398 		else if (d < 0)
399 			a = i + 1;
400 		else
401 			return (0);
402 	}
403 
404 	return (EADDRNOTAVAIL);
405 }
406 
407 int
408 prison_check_ip4(const struct ucred *cred, const struct in_addr *ia)
409 {
410 	struct prison *pr;
411 	int error;
412 
413 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
414 	KASSERT(ia != NULL, ("%s: ia is NULL", __func__));
415 
416 	pr = cred->cr_prison;
417 	if (!(pr->pr_flags & PR_IP4))
418 		return (0);
419 	mtx_lock(&pr->pr_mtx);
420 	if (!(pr->pr_flags & PR_IP4)) {
421 		mtx_unlock(&pr->pr_mtx);
422 		return (0);
423 	}
424 	if (pr->pr_ip4 == NULL) {
425 		mtx_unlock(&pr->pr_mtx);
426 		return (EAFNOSUPPORT);
427 	}
428 
429 	error = prison_check_ip4_locked(pr, ia);
430 	mtx_unlock(&pr->pr_mtx);
431 	return (error);
432 }
433