xref: /freebsd/sys/kern/kern_jailmeta.c (revision 30e6e008bc06385a66756bebb41676f4f9017eca)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2024 SkunkWerks GmbH
5  *
6  * This software was developed by Igor Ostapenko <igoro@FreeBSD.org>
7  * under sponsorship from SkunkWerks GmbH.
8  */
9 
10 #include <sys/param.h>
11 #include <sys/_bitset.h>
12 #include <sys/bitset.h>
13 #include <sys/lock.h>
14 #include <sys/sx.h>
15 #include <sys/kernel.h>
16 #include <sys/mount.h>
17 #include <sys/malloc.h>
18 #include <sys/jail.h>
19 #include <sys/osd.h>
20 #include <sys/proc.h>
21 
22 /*
23  * Buffer limit.
24  *
25  * The hard limit is the actual value used during setting or modification. The
26  * soft limit is used solely by the security.jail.param.meta and .env sysctl. If
27  * the hard limit is decreased, the soft limit may remain higher to ensure that
28  * previously set meta strings can still be correctly interpreted by end-user
29  * interfaces, such as jls(8).
30  */
31 
32 static uint32_t jm_maxbufsize_hard = 4096;
33 static uint32_t jm_maxbufsize_soft = 4096;
34 
35 static int
jm_sysctl_meta_maxbufsize(SYSCTL_HANDLER_ARGS)36 jm_sysctl_meta_maxbufsize(SYSCTL_HANDLER_ARGS)
37 {
38 	int error;
39 	uint32_t newmax = 0;
40 
41 	/* Reading only. */
42 
43 	if (req->newptr == NULL) {
44 		sx_slock(&allprison_lock);
45 		error = SYSCTL_OUT(req, &jm_maxbufsize_hard,
46 		    sizeof(jm_maxbufsize_hard));
47 		sx_sunlock(&allprison_lock);
48 
49 		return (error);
50 	}
51 
52 	/* Reading and writing. */
53 
54 	sx_xlock(&allprison_lock);
55 
56 	error = SYSCTL_OUT(req, &jm_maxbufsize_hard,
57 	    sizeof(jm_maxbufsize_hard));
58 	if (error != 0)
59 		goto end;
60 
61 	error = SYSCTL_IN(req, &newmax, sizeof(newmax));
62 	if (error != 0)
63 		goto end;
64 
65 	jm_maxbufsize_hard = newmax;
66 	if (jm_maxbufsize_hard >= jm_maxbufsize_soft) {
67 		jm_maxbufsize_soft = jm_maxbufsize_hard;
68 	} else if (TAILQ_EMPTY(&allprison)) {
69 		/*
70 		 * For now, this is the simplest way to
71 		 * avoid O(n) iteration over all prisons in
72 		 * case of a large n.
73 		 */
74 		jm_maxbufsize_soft = jm_maxbufsize_hard;
75 	}
76 
77 end:
78 	sx_xunlock(&allprison_lock);
79 	return (error);
80 }
81 SYSCTL_PROC(_security_jail, OID_AUTO, meta_maxbufsize,
82     CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
83     jm_sysctl_meta_maxbufsize, "IU",
84     "Maximum buffer size of each meta and env");
85 
86 
87 /* Jail parameter announcement. */
88 
89 static int
jm_sysctl_param_meta(SYSCTL_HANDLER_ARGS)90 jm_sysctl_param_meta(SYSCTL_HANDLER_ARGS)
91 {
92 	uint32_t soft;
93 
94 	sx_slock(&allprison_lock);
95 	soft = jm_maxbufsize_soft;
96 	sx_sunlock(&allprison_lock);
97 
98 	return (sysctl_jail_param(oidp, arg1, soft, req));
99 }
100 SYSCTL_PROC(_security_jail_param, OID_AUTO, meta,
101     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
102     jm_sysctl_param_meta, "A,keyvalue",
103     "Jail meta information hidden from the jail");
104 SYSCTL_PROC(_security_jail_param, OID_AUTO, env,
105     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
106     jm_sysctl_param_meta, "A,keyvalue",
107     "Jail meta information readable by the jail");
108 
109 
110 /* Generic OSD-based logic for any metadata buffer. */
111 
112 struct meta {
113 	char *name;
114 	u_int osd_slot;
115 	osd_method_t methods[PR_MAXMETHOD];
116 };
117 
118 /* A chain of hunks representing the final buffer after all manipulations. */
119 struct hunk {
120 	char *p;		/* a buf reference */
121 	size_t len;		/* number of bytes referred */
122 	char *owned;		/* must be freed */
123 	struct hunk *next;
124 };
125 
126 static inline struct hunk *
jm_h_alloc(void)127 jm_h_alloc(void)
128 {
129 	/* All fields are zeroed. */
130 	return (malloc(sizeof(struct hunk), M_PRISON, M_WAITOK | M_ZERO));
131 }
132 
133 static inline struct hunk *
jm_h_prepend(struct hunk * h,char * p,size_t len)134 jm_h_prepend(struct hunk *h, char *p, size_t len)
135 {
136 	struct hunk *n;
137 
138 	n = jm_h_alloc();
139 	n->p = p;
140 	n->len = len;
141 	n->next = h;
142 	return (n);
143 }
144 
145 static inline void
jm_h_cut_line(struct hunk * h,char * begin)146 jm_h_cut_line(struct hunk *h, char *begin)
147 {
148 	struct hunk *rem;
149 	char *end;
150 
151 	/* Find the end of key=value. */
152 	for (end = begin; (end + 1) < (h->p + h->len); end++)
153 		if (*end == '\0' || *end == '\n')
154 			break;
155 
156 	/* Pick up a non-empty remainder. */
157 	if ((end + 1) < (h->p + h->len) && *(end + 1) != '\0') {
158 		rem = jm_h_alloc();
159 		rem->p = end + 1;
160 		rem->len = h->p + h->len - rem->p;
161 
162 		/* insert */
163 		rem->next = h->next;
164 		h->next = rem;
165 	}
166 
167 	/* Shorten this hunk. */
168 	h->len = begin - h->p;
169 }
170 
171 static inline void
jm_h_cut_occurrences(struct hunk * h,const char * key,size_t keylen)172 jm_h_cut_occurrences(struct hunk *h, const char *key, size_t keylen)
173 {
174 	char *p = h->p;
175 
176 #define nexthunk()					\
177 	do {						\
178 		h = h->next;				\
179 		p = (h == NULL) ? NULL : h->p;		\
180 	} while (0)
181 
182 	while (p != NULL) {
183 		p = strnstr(p, key, h->len - (p - h->p));
184 		if (p == NULL) {
185 			nexthunk();
186 			continue;
187 		}
188 		if ((p == h->p || *(p - 1) == '\n') && p[keylen] == '=') {
189 			jm_h_cut_line(h, p);
190 			nexthunk();
191 			continue;
192 		}
193 		/* Continue with this hunk. */
194 		p += keylen;
195 		/* Empty? The next hunk then. */
196 		if ((p - h->p) >= h->len)
197 			nexthunk();
198 	}
199 }
200 
201 static inline size_t
jm_h_len(struct hunk * h)202 jm_h_len(struct hunk *h)
203 {
204 	size_t len = 0;
205 	while (h != NULL) {
206 		len += h->len;
207 		h = h->next;
208 	}
209 	return (len);
210 }
211 
212 static inline void
jm_h_assemble(char * dst,struct hunk * h)213 jm_h_assemble(char *dst, struct hunk *h)
214 {
215 	while (h != NULL) {
216 		if (h->len > 0) {
217 			memcpy(dst, h->p, h->len);
218 			dst += h->len;
219 			/* If not the last hunk then concatenate with \n. */
220 			if (h->next != NULL && *(dst - 1) == '\0')
221 				*(dst - 1) = '\n';
222 		}
223 		h = h->next;
224 	}
225 }
226 
227 static inline struct hunk *
jm_h_freechain(struct hunk * h)228 jm_h_freechain(struct hunk *h)
229 {
230 	struct hunk *n = h;
231 	while (n != NULL) {
232 		h = n;
233 		n = h->next;
234 		free(h->owned, M_PRISON);
235 		free(h, M_PRISON);
236 	}
237 
238 	return (NULL);
239 }
240 
241 static int
jm_osd_method_set(void * obj,void * data,const struct meta * meta)242 jm_osd_method_set(void *obj, void *data, const struct meta *meta)
243 {
244 	struct prison *pr = obj;
245 	struct vfsoptlist *opts = data;
246 	struct vfsopt *opt;
247 
248 	char *origosd;
249 	char *origosd_copy;
250 	char *oldosd;
251 	char *osd;
252 	size_t osdlen;
253 	struct hunk *h;
254 	char *key;
255 	size_t keylen;
256 	int error;
257 	int repeats = 0;
258 	bool repeat;
259 
260 	sx_assert(&allprison_lock, SA_XLOCKED);
261 
262 again:
263 	origosd = NULL;
264 	origosd_copy = NULL;
265 	osd = NULL;
266 	h = NULL;
267 	error = 0;
268 	repeat = false;
269 	TAILQ_FOREACH(opt, opts, link) {
270 		/* Look for options with <metaname> prefix. */
271 		if (strstr(opt->name, meta->name) != opt->name)
272 			continue;
273 		/* Consider only full <metaname> or <metaname>.* ones. */
274 		if (opt->name[strlen(meta->name)] != '.' &&
275 		    opt->name[strlen(meta->name)] != '\0')
276 			continue;
277 		opt->seen = 1;
278 
279 		/* The very first preconditions. */
280 		if (opt->len < 0)
281 			continue;
282 		if (opt->len > jm_maxbufsize_hard) {
283 			error = EFBIG;
284 			break;
285 		}
286 		/* NULL-terminated strings are expected from vfsopt. */
287 		if (opt->value != NULL &&
288 		    ((char *)opt->value)[opt->len - 1] != '\0') {
289 			error = EINVAL;
290 			break;
291 		}
292 
293 		/* Work with our own copy of existing metadata. */
294 		if (h == NULL) {
295 			h = jm_h_alloc(); /* zeroed */
296 			mtx_lock(&pr->pr_mtx);
297 			origosd = osd_jail_get(pr, meta->osd_slot);
298 			if (origosd != NULL) {
299 				origosd_copy = malloc(strlen(origosd) + 1,
300 				    M_PRISON, M_NOWAIT);
301 				if (origosd_copy == NULL)
302 					error = ENOMEM;
303 				else {
304 					h->p = origosd_copy;
305 					h->len = strlen(origosd) + 1;
306 					memcpy(h->p, origosd, h->len);
307 				}
308 			}
309 			mtx_unlock(&pr->pr_mtx);
310 			if (error != 0)
311 				break;
312 		}
313 
314 		/* 1) Change the whole metadata. */
315 		if (strcmp(opt->name, meta->name) == 0) {
316 			if (opt->len > jm_maxbufsize_hard) {
317 				error = EFBIG;
318 				break;
319 			}
320 			h = jm_h_freechain(h);
321 			h = jm_h_prepend(h,
322 			    (opt->value != NULL) ? opt->value : "",
323 			    /* avoid empty NULL-terminated string */
324 			    (opt->len > 1) ? opt->len : 0);
325 			continue;
326 		}
327 
328 		/* 2) Or add/replace/remove a specific key=value. */
329 		key = opt->name + strlen(meta->name) + 1;
330 		keylen = strlen(key);
331 		if (keylen < 1) {
332 			error = EINVAL;
333 			break;
334 		}
335 		jm_h_cut_occurrences(h, key, keylen);
336 		if (opt->value == NULL)
337 			continue; /* key removal */
338 		h = jm_h_prepend(h, NULL, 0);
339 		h->len = keylen + 1 + opt->len; /* key=value\0 */
340 		h->owned = malloc(h->len, M_PRISON, M_WAITOK | M_ZERO);
341 		h->p = h->owned;
342 		memcpy(h->p, key, keylen);
343 		h->p[keylen] = '=';
344 		memcpy(h->p + keylen + 1, opt->value, opt->len);
345 	}
346 
347 	if (h == NULL || error != 0)
348 		goto end;
349 
350 	/* Assemble the final contiguous buffer. */
351 	osdlen = jm_h_len(h);
352 	if (osdlen > jm_maxbufsize_hard) {
353 		error = EFBIG;
354 		goto end;
355 	}
356 	if (osdlen > 1) {
357 		osd = malloc(osdlen, M_PRISON, M_WAITOK);
358 		jm_h_assemble(osd, h);
359 		osd[osdlen - 1] = '\0'; /* sealed */
360 	}
361 
362 	/* Compare and swap the buffers. */
363 	mtx_lock(&pr->pr_mtx);
364 	oldosd = osd_jail_get(pr, meta->osd_slot);
365 	if (oldosd == origosd) {
366 		error = osd_jail_set(pr, meta->osd_slot, osd);
367 	} else {
368 		/*
369 		 * The osd(9) framework requires protection only for pr_osd,
370 		 * which is covered by pr_mtx. Therefore, other code might
371 		 * legally alter jail metadata without allprison_lock. It
372 		 * means that here we could override data just added by other
373 		 * thread. This extra caution with retry mechanism aims to
374 		 * prevent user data loss in such potential cases.
375 		 */
376 		error = EAGAIN;
377 		repeat = true;
378 	}
379 	mtx_unlock(&pr->pr_mtx);
380 	if (error == 0)
381 		osd = oldosd;
382 
383 end:
384 	jm_h_freechain(h);
385 	free(osd, M_PRISON);
386 	free(origosd_copy, M_PRISON);
387 
388 	if (repeat && ++repeats < 3)
389 		goto again;
390 
391 	return (error);
392 }
393 
394 static int
jm_osd_method_get(void * obj,void * data,const struct meta * meta)395 jm_osd_method_get(void *obj, void *data, const struct meta *meta)
396 {
397 	struct prison *pr = obj;
398 	struct vfsoptlist *opts = data;
399 	struct vfsopt *opt;
400 	char *osd = NULL;
401 	char empty = '\0';
402 	int error = 0;
403 	bool locked = false;
404 	const char *key;
405 	size_t keylen;
406 	const char *p;
407 
408 	sx_assert(&allprison_lock, SA_SLOCKED);
409 
410 	TAILQ_FOREACH(opt, opts, link) {
411 		if (strstr(opt->name, meta->name) != opt->name)
412 			continue;
413 		if (opt->name[strlen(meta->name)] != '.' &&
414 		    opt->name[strlen(meta->name)] != '\0')
415 			continue;
416 
417 		if (!locked) {
418 			mtx_lock(&pr->pr_mtx);
419 			locked = true;
420 			osd = osd_jail_get(pr, meta->osd_slot);
421 			if (osd == NULL)
422 				osd = &empty;
423 		}
424 
425 		/* Provide full metadata. */
426 		if (strcmp(opt->name, meta->name) == 0) {
427 			if (strlcpy(opt->value, osd, opt->len) >= opt->len) {
428 				error = EINVAL;
429 				break;
430 			}
431 			opt->seen = 1;
432 			continue;
433 		}
434 
435 		/* Extract a specific key=value. */
436 		p = osd;
437 		key = opt->name + strlen(meta->name) + 1;
438 		keylen = strlen(key);
439 		while ((p = strstr(p, key)) != NULL) {
440 			if ((p == osd || *(p - 1) == '\n')
441 			    && p[keylen] == '=') {
442 				if (strlcpy(opt->value, p + keylen + 1,
443 				    MIN(opt->len, strchr(p + keylen + 1, '\n') -
444 				    (p + keylen + 1) + 1)) >= opt->len) {
445 					error = EINVAL;
446 					break;
447 				}
448 				opt->seen = 1;
449 			}
450 			p += keylen;
451 		}
452 		if (error != 0)
453 			break;
454 	}
455 
456 	if (locked)
457 		mtx_unlock(&pr->pr_mtx);
458 
459 	return (error);
460 }
461 
462 static int
jm_osd_method_check(void * obj __unused,void * data,const struct meta * meta)463 jm_osd_method_check(void *obj __unused, void *data, const struct meta *meta)
464 {
465 	struct vfsoptlist *opts = data;
466 	struct vfsopt *opt;
467 
468 	TAILQ_FOREACH(opt, opts, link) {
469 		if (strstr(opt->name, meta->name) != opt->name)
470 			continue;
471 		if (opt->name[strlen(meta->name)] != '.' &&
472 		    opt->name[strlen(meta->name)] != '\0')
473 			continue;
474 		opt->seen = 1;
475 	}
476 
477 	return (0);
478 }
479 
480 static void
jm_osd_destructor(void * osd)481 jm_osd_destructor(void *osd)
482 {
483 	free(osd, M_PRISON);
484 }
485 
486 
487 /* OSD for "meta" param */
488 
489 static struct meta meta;
490 
491 static inline int
jm_osd_method_set_meta(void * obj,void * data)492 jm_osd_method_set_meta(void *obj, void *data)
493 {
494 	return (jm_osd_method_set(obj, data, &meta));
495 }
496 
497 static inline int
jm_osd_method_get_meta(void * obj,void * data)498 jm_osd_method_get_meta(void *obj, void *data)
499 {
500 	return (jm_osd_method_get(obj, data, &meta));
501 }
502 
503 static inline int
jm_osd_method_check_meta(void * obj,void * data)504 jm_osd_method_check_meta(void *obj, void *data)
505 {
506 	return (jm_osd_method_check(obj, data, &meta));
507 }
508 
509 static struct meta meta = {
510 	.name = JAIL_META_PRIVATE,
511 	.osd_slot = 0,
512 	.methods = {
513 		[PR_METHOD_SET] =	jm_osd_method_set_meta,
514 		[PR_METHOD_GET] =	jm_osd_method_get_meta,
515 		[PR_METHOD_CHECK] =	jm_osd_method_check_meta,
516 	}
517 };
518 
519 
520 /* OSD for "env" param */
521 
522 static struct meta env;
523 
524 static inline int
jm_osd_method_set_env(void * obj,void * data)525 jm_osd_method_set_env(void *obj, void *data)
526 {
527 	return (jm_osd_method_set(obj, data, &env));
528 }
529 
530 static inline int
jm_osd_method_get_env(void * obj,void * data)531 jm_osd_method_get_env(void *obj, void *data)
532 {
533 	return (jm_osd_method_get(obj, data, &env));
534 }
535 
536 static inline int
jm_osd_method_check_env(void * obj,void * data)537 jm_osd_method_check_env(void *obj, void *data)
538 {
539 	return (jm_osd_method_check(obj, data, &env));
540 }
541 
542 static struct meta env = {
543 	.name = JAIL_META_SHARED,
544 	.osd_slot = 0,
545 	.methods = {
546 		[PR_METHOD_SET] =	jm_osd_method_set_env,
547 		[PR_METHOD_GET] =	jm_osd_method_get_env,
548 		[PR_METHOD_CHECK] =	jm_osd_method_check_env,
549 	}
550 };
551 
552 
553 /* A jail can read its "env". */
554 
555 static int
jm_sysctl_env(SYSCTL_HANDLER_ARGS)556 jm_sysctl_env(SYSCTL_HANDLER_ARGS)
557 {
558 	struct prison *pr;
559 	char empty = '\0';
560 	char *tmpbuf;
561 	size_t outlen;
562 	int error = 0;
563 
564 	pr = req->td->td_ucred->cr_prison;
565 
566 	mtx_lock(&pr->pr_mtx);
567 	arg1 = osd_jail_get(pr, env.osd_slot);
568 	if (arg1 == NULL) {
569 		tmpbuf = &empty;
570 		outlen = 1;
571 	} else {
572 		outlen = strlen(arg1) + 1;
573 		if (req->oldptr != NULL) {
574 			tmpbuf = malloc(outlen, M_PRISON, M_NOWAIT);
575 			error = (tmpbuf == NULL) ? ENOMEM : 0;
576 			if (error == 0)
577 				memcpy(tmpbuf, arg1, outlen);
578 		}
579 	}
580 	mtx_unlock(&pr->pr_mtx);
581 
582 	if (error != 0)
583 		return (error);
584 
585 	if (req->oldptr == NULL)
586 		SYSCTL_OUT(req, NULL, outlen);
587 	else {
588 		SYSCTL_OUT(req, tmpbuf, outlen);
589 		if (tmpbuf != &empty)
590 			free(tmpbuf, M_PRISON);
591 	}
592 
593 	return (error);
594 }
595 SYSCTL_PROC(_security_jail, OID_AUTO, env,
596     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
597     0, 0, jm_sysctl_env, "A", "Meta information provided by parent jail");
598 
599 
600 /* Setup and tear down. */
601 
602 static int
jm_sysinit(void * arg __unused)603 jm_sysinit(void *arg __unused)
604 {
605 	meta.osd_slot = osd_jail_register(jm_osd_destructor, meta.methods);
606 	env.osd_slot = osd_jail_register(jm_osd_destructor, env.methods);
607 
608 	return (0);
609 }
610 
611 static int
jm_sysuninit(void * arg __unused)612 jm_sysuninit(void *arg __unused)
613 {
614 	osd_jail_deregister(meta.osd_slot);
615 	osd_jail_deregister(env.osd_slot);
616 
617 	return (0);
618 }
619 
620 SYSINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysinit, NULL);
621 SYSUNINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysuninit, NULL);
622