1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2024 SkunkWerks GmbH
5 *
6 * This software was developed by Igor Ostapenko <igoro@FreeBSD.org>
7 * under sponsorship from SkunkWerks GmbH.
8 */
9
10 #include <sys/param.h>
11 #include <sys/_bitset.h>
12 #include <sys/bitset.h>
13 #include <sys/lock.h>
14 #include <sys/sx.h>
15 #include <sys/kernel.h>
16 #include <sys/mount.h>
17 #include <sys/malloc.h>
18 #include <sys/jail.h>
19 #include <sys/osd.h>
20 #include <sys/proc.h>
21
22 /*
23 * Buffer limit.
24 *
25 * The hard limit is the actual value used during setting or modification. The
26 * soft limit is used solely by the security.jail.param.meta and .env sysctl. If
27 * the hard limit is decreased, the soft limit may remain higher to ensure that
28 * previously set meta strings can still be correctly interpreted by end-user
29 * interfaces, such as jls(8).
30 */
31
32 static uint32_t jm_maxbufsize_hard = 4096;
33 static uint32_t jm_maxbufsize_soft = 4096;
34
35 static int
jm_sysctl_meta_maxbufsize(SYSCTL_HANDLER_ARGS)36 jm_sysctl_meta_maxbufsize(SYSCTL_HANDLER_ARGS)
37 {
38 int error;
39 uint32_t newmax = 0;
40
41 /* Reading only. */
42
43 if (req->newptr == NULL) {
44 sx_slock(&allprison_lock);
45 error = SYSCTL_OUT(req, &jm_maxbufsize_hard,
46 sizeof(jm_maxbufsize_hard));
47 sx_sunlock(&allprison_lock);
48
49 return (error);
50 }
51
52 /* Reading and writing. */
53
54 sx_xlock(&allprison_lock);
55
56 error = SYSCTL_OUT(req, &jm_maxbufsize_hard,
57 sizeof(jm_maxbufsize_hard));
58 if (error != 0)
59 goto end;
60
61 error = SYSCTL_IN(req, &newmax, sizeof(newmax));
62 if (error != 0)
63 goto end;
64
65 jm_maxbufsize_hard = newmax;
66 if (jm_maxbufsize_hard >= jm_maxbufsize_soft) {
67 jm_maxbufsize_soft = jm_maxbufsize_hard;
68 } else if (TAILQ_EMPTY(&allprison)) {
69 /*
70 * For now, this is the simplest way to
71 * avoid O(n) iteration over all prisons in
72 * case of a large n.
73 */
74 jm_maxbufsize_soft = jm_maxbufsize_hard;
75 }
76
77 end:
78 sx_xunlock(&allprison_lock);
79 return (error);
80 }
81 SYSCTL_PROC(_security_jail, OID_AUTO, meta_maxbufsize,
82 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
83 jm_sysctl_meta_maxbufsize, "IU",
84 "Maximum buffer size of each meta and env");
85
86
87 /* Jail parameter announcement. */
88
89 static int
jm_sysctl_param_meta(SYSCTL_HANDLER_ARGS)90 jm_sysctl_param_meta(SYSCTL_HANDLER_ARGS)
91 {
92 uint32_t soft;
93
94 sx_slock(&allprison_lock);
95 soft = jm_maxbufsize_soft;
96 sx_sunlock(&allprison_lock);
97
98 return (sysctl_jail_param(oidp, arg1, soft, req));
99 }
100 SYSCTL_PROC(_security_jail_param, OID_AUTO, meta,
101 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
102 jm_sysctl_param_meta, "A,keyvalue",
103 "Jail meta information hidden from the jail");
104 SYSCTL_PROC(_security_jail_param, OID_AUTO, env,
105 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
106 jm_sysctl_param_meta, "A,keyvalue",
107 "Jail meta information readable by the jail");
108
109
110 /* Generic OSD-based logic for any metadata buffer. */
111
112 struct meta {
113 char *name;
114 u_int osd_slot;
115 osd_method_t methods[PR_MAXMETHOD];
116 };
117
118 /* A chain of hunks representing the final buffer after all manipulations. */
119 struct hunk {
120 char *p; /* a buf reference */
121 size_t len; /* number of bytes referred */
122 char *owned; /* must be freed */
123 struct hunk *next;
124 };
125
126 static inline struct hunk *
jm_h_alloc(void)127 jm_h_alloc(void)
128 {
129 /* All fields are zeroed. */
130 return (malloc(sizeof(struct hunk), M_PRISON, M_WAITOK | M_ZERO));
131 }
132
133 static inline struct hunk *
jm_h_prepend(struct hunk * h,char * p,size_t len)134 jm_h_prepend(struct hunk *h, char *p, size_t len)
135 {
136 struct hunk *n;
137
138 n = jm_h_alloc();
139 n->p = p;
140 n->len = len;
141 n->next = h;
142 return (n);
143 }
144
145 static inline void
jm_h_cut_line(struct hunk * h,char * begin)146 jm_h_cut_line(struct hunk *h, char *begin)
147 {
148 struct hunk *rem;
149 char *end;
150
151 /* Find the end of key=value. */
152 for (end = begin; (end + 1) < (h->p + h->len); end++)
153 if (*end == '\0' || *end == '\n')
154 break;
155
156 /* Pick up a non-empty remainder. */
157 if ((end + 1) < (h->p + h->len) && *(end + 1) != '\0') {
158 rem = jm_h_alloc();
159 rem->p = end + 1;
160 rem->len = h->p + h->len - rem->p;
161
162 /* insert */
163 rem->next = h->next;
164 h->next = rem;
165 }
166
167 /* Shorten this hunk. */
168 h->len = begin - h->p;
169 }
170
171 static inline void
jm_h_cut_occurrences(struct hunk * h,const char * key,size_t keylen)172 jm_h_cut_occurrences(struct hunk *h, const char *key, size_t keylen)
173 {
174 char *p = h->p;
175
176 #define nexthunk() \
177 do { \
178 h = h->next; \
179 p = (h == NULL) ? NULL : h->p; \
180 } while (0)
181
182 while (p != NULL) {
183 p = strnstr(p, key, h->len - (p - h->p));
184 if (p == NULL) {
185 nexthunk();
186 continue;
187 }
188 if ((p == h->p || *(p - 1) == '\n') && p[keylen] == '=') {
189 jm_h_cut_line(h, p);
190 nexthunk();
191 continue;
192 }
193 /* Continue with this hunk. */
194 p += keylen;
195 /* Empty? The next hunk then. */
196 if ((p - h->p) >= h->len)
197 nexthunk();
198 }
199 }
200
201 static inline size_t
jm_h_len(struct hunk * h)202 jm_h_len(struct hunk *h)
203 {
204 size_t len = 0;
205 while (h != NULL) {
206 len += h->len;
207 h = h->next;
208 }
209 return (len);
210 }
211
212 static inline void
jm_h_assemble(char * dst,struct hunk * h)213 jm_h_assemble(char *dst, struct hunk *h)
214 {
215 while (h != NULL) {
216 if (h->len > 0) {
217 memcpy(dst, h->p, h->len);
218 dst += h->len;
219 /* If not the last hunk then concatenate with \n. */
220 if (h->next != NULL && *(dst - 1) == '\0')
221 *(dst - 1) = '\n';
222 }
223 h = h->next;
224 }
225 }
226
227 static inline struct hunk *
jm_h_freechain(struct hunk * h)228 jm_h_freechain(struct hunk *h)
229 {
230 struct hunk *n = h;
231 while (n != NULL) {
232 h = n;
233 n = h->next;
234 free(h->owned, M_PRISON);
235 free(h, M_PRISON);
236 }
237
238 return (NULL);
239 }
240
241 static int
jm_osd_method_set(void * obj,void * data,const struct meta * meta)242 jm_osd_method_set(void *obj, void *data, const struct meta *meta)
243 {
244 struct prison *pr = obj;
245 struct vfsoptlist *opts = data;
246 struct vfsopt *opt;
247
248 char *origosd;
249 char *origosd_copy;
250 char *oldosd;
251 char *osd;
252 size_t osdlen;
253 struct hunk *h;
254 char *key;
255 size_t keylen;
256 int error;
257 int repeats = 0;
258 bool repeat;
259
260 sx_assert(&allprison_lock, SA_XLOCKED);
261
262 again:
263 origosd = NULL;
264 origosd_copy = NULL;
265 osd = NULL;
266 h = NULL;
267 error = 0;
268 repeat = false;
269 TAILQ_FOREACH(opt, opts, link) {
270 /* Look for options with <metaname> prefix. */
271 if (strstr(opt->name, meta->name) != opt->name)
272 continue;
273 /* Consider only full <metaname> or <metaname>.* ones. */
274 if (opt->name[strlen(meta->name)] != '.' &&
275 opt->name[strlen(meta->name)] != '\0')
276 continue;
277 opt->seen = 1;
278
279 /* The very first preconditions. */
280 if (opt->len < 0)
281 continue;
282 if (opt->len > jm_maxbufsize_hard) {
283 error = EFBIG;
284 break;
285 }
286 /* NULL-terminated strings are expected from vfsopt. */
287 if (opt->value != NULL &&
288 ((char *)opt->value)[opt->len - 1] != '\0') {
289 error = EINVAL;
290 break;
291 }
292
293 /* Work with our own copy of existing metadata. */
294 if (h == NULL) {
295 h = jm_h_alloc(); /* zeroed */
296 mtx_lock(&pr->pr_mtx);
297 origosd = osd_jail_get(pr, meta->osd_slot);
298 if (origosd != NULL) {
299 origosd_copy = malloc(strlen(origosd) + 1,
300 M_PRISON, M_NOWAIT);
301 if (origosd_copy == NULL)
302 error = ENOMEM;
303 else {
304 h->p = origosd_copy;
305 h->len = strlen(origosd) + 1;
306 memcpy(h->p, origosd, h->len);
307 }
308 }
309 mtx_unlock(&pr->pr_mtx);
310 if (error != 0)
311 break;
312 }
313
314 /* 1) Change the whole metadata. */
315 if (strcmp(opt->name, meta->name) == 0) {
316 if (opt->len > jm_maxbufsize_hard) {
317 error = EFBIG;
318 break;
319 }
320 h = jm_h_freechain(h);
321 h = jm_h_prepend(h,
322 (opt->value != NULL) ? opt->value : "",
323 /* avoid empty NULL-terminated string */
324 (opt->len > 1) ? opt->len : 0);
325 continue;
326 }
327
328 /* 2) Or add/replace/remove a specific key=value. */
329 key = opt->name + strlen(meta->name) + 1;
330 keylen = strlen(key);
331 if (keylen < 1) {
332 error = EINVAL;
333 break;
334 }
335 jm_h_cut_occurrences(h, key, keylen);
336 if (opt->value == NULL)
337 continue; /* key removal */
338 h = jm_h_prepend(h, NULL, 0);
339 h->len = keylen + 1 + opt->len; /* key=value\0 */
340 h->owned = malloc(h->len, M_PRISON, M_WAITOK | M_ZERO);
341 h->p = h->owned;
342 memcpy(h->p, key, keylen);
343 h->p[keylen] = '=';
344 memcpy(h->p + keylen + 1, opt->value, opt->len);
345 }
346
347 if (h == NULL || error != 0)
348 goto end;
349
350 /* Assemble the final contiguous buffer. */
351 osdlen = jm_h_len(h);
352 if (osdlen > jm_maxbufsize_hard) {
353 error = EFBIG;
354 goto end;
355 }
356 if (osdlen > 1) {
357 osd = malloc(osdlen, M_PRISON, M_WAITOK);
358 jm_h_assemble(osd, h);
359 osd[osdlen - 1] = '\0'; /* sealed */
360 }
361
362 /* Compare and swap the buffers. */
363 mtx_lock(&pr->pr_mtx);
364 oldosd = osd_jail_get(pr, meta->osd_slot);
365 if (oldosd == origosd) {
366 error = osd_jail_set(pr, meta->osd_slot, osd);
367 } else {
368 /*
369 * The osd(9) framework requires protection only for pr_osd,
370 * which is covered by pr_mtx. Therefore, other code might
371 * legally alter jail metadata without allprison_lock. It
372 * means that here we could override data just added by other
373 * thread. This extra caution with retry mechanism aims to
374 * prevent user data loss in such potential cases.
375 */
376 error = EAGAIN;
377 repeat = true;
378 }
379 mtx_unlock(&pr->pr_mtx);
380 if (error == 0)
381 osd = oldosd;
382
383 end:
384 jm_h_freechain(h);
385 free(osd, M_PRISON);
386 free(origosd_copy, M_PRISON);
387
388 if (repeat && ++repeats < 3)
389 goto again;
390
391 return (error);
392 }
393
394 static int
jm_osd_method_get(void * obj,void * data,const struct meta * meta)395 jm_osd_method_get(void *obj, void *data, const struct meta *meta)
396 {
397 struct prison *pr = obj;
398 struct vfsoptlist *opts = data;
399 struct vfsopt *opt;
400 char *osd = NULL;
401 char empty = '\0';
402 int error = 0;
403 bool locked = false;
404 const char *key;
405 size_t keylen;
406 const char *p;
407
408 sx_assert(&allprison_lock, SA_SLOCKED);
409
410 TAILQ_FOREACH(opt, opts, link) {
411 if (strstr(opt->name, meta->name) != opt->name)
412 continue;
413 if (opt->name[strlen(meta->name)] != '.' &&
414 opt->name[strlen(meta->name)] != '\0')
415 continue;
416
417 if (!locked) {
418 mtx_lock(&pr->pr_mtx);
419 locked = true;
420 osd = osd_jail_get(pr, meta->osd_slot);
421 if (osd == NULL)
422 osd = ∅
423 }
424
425 /* Provide full metadata. */
426 if (strcmp(opt->name, meta->name) == 0) {
427 if (strlcpy(opt->value, osd, opt->len) >= opt->len) {
428 error = EINVAL;
429 break;
430 }
431 opt->seen = 1;
432 continue;
433 }
434
435 /* Extract a specific key=value. */
436 p = osd;
437 key = opt->name + strlen(meta->name) + 1;
438 keylen = strlen(key);
439 while ((p = strstr(p, key)) != NULL) {
440 if ((p == osd || *(p - 1) == '\n')
441 && p[keylen] == '=') {
442 if (strlcpy(opt->value, p + keylen + 1,
443 MIN(opt->len, strchr(p + keylen + 1, '\n') -
444 (p + keylen + 1) + 1)) >= opt->len) {
445 error = EINVAL;
446 break;
447 }
448 opt->seen = 1;
449 }
450 p += keylen;
451 }
452 if (error != 0)
453 break;
454 }
455
456 if (locked)
457 mtx_unlock(&pr->pr_mtx);
458
459 return (error);
460 }
461
462 static int
jm_osd_method_check(void * obj __unused,void * data,const struct meta * meta)463 jm_osd_method_check(void *obj __unused, void *data, const struct meta *meta)
464 {
465 struct vfsoptlist *opts = data;
466 struct vfsopt *opt;
467
468 TAILQ_FOREACH(opt, opts, link) {
469 if (strstr(opt->name, meta->name) != opt->name)
470 continue;
471 if (opt->name[strlen(meta->name)] != '.' &&
472 opt->name[strlen(meta->name)] != '\0')
473 continue;
474 opt->seen = 1;
475 }
476
477 return (0);
478 }
479
480 static void
jm_osd_destructor(void * osd)481 jm_osd_destructor(void *osd)
482 {
483 free(osd, M_PRISON);
484 }
485
486
487 /* OSD for "meta" param */
488
489 static struct meta meta;
490
491 static inline int
jm_osd_method_set_meta(void * obj,void * data)492 jm_osd_method_set_meta(void *obj, void *data)
493 {
494 return (jm_osd_method_set(obj, data, &meta));
495 }
496
497 static inline int
jm_osd_method_get_meta(void * obj,void * data)498 jm_osd_method_get_meta(void *obj, void *data)
499 {
500 return (jm_osd_method_get(obj, data, &meta));
501 }
502
503 static inline int
jm_osd_method_check_meta(void * obj,void * data)504 jm_osd_method_check_meta(void *obj, void *data)
505 {
506 return (jm_osd_method_check(obj, data, &meta));
507 }
508
509 static struct meta meta = {
510 .name = JAIL_META_PRIVATE,
511 .osd_slot = 0,
512 .methods = {
513 [PR_METHOD_SET] = jm_osd_method_set_meta,
514 [PR_METHOD_GET] = jm_osd_method_get_meta,
515 [PR_METHOD_CHECK] = jm_osd_method_check_meta,
516 }
517 };
518
519
520 /* OSD for "env" param */
521
522 static struct meta env;
523
524 static inline int
jm_osd_method_set_env(void * obj,void * data)525 jm_osd_method_set_env(void *obj, void *data)
526 {
527 return (jm_osd_method_set(obj, data, &env));
528 }
529
530 static inline int
jm_osd_method_get_env(void * obj,void * data)531 jm_osd_method_get_env(void *obj, void *data)
532 {
533 return (jm_osd_method_get(obj, data, &env));
534 }
535
536 static inline int
jm_osd_method_check_env(void * obj,void * data)537 jm_osd_method_check_env(void *obj, void *data)
538 {
539 return (jm_osd_method_check(obj, data, &env));
540 }
541
542 static struct meta env = {
543 .name = JAIL_META_SHARED,
544 .osd_slot = 0,
545 .methods = {
546 [PR_METHOD_SET] = jm_osd_method_set_env,
547 [PR_METHOD_GET] = jm_osd_method_get_env,
548 [PR_METHOD_CHECK] = jm_osd_method_check_env,
549 }
550 };
551
552
553 /* A jail can read its "env". */
554
555 static int
jm_sysctl_env(SYSCTL_HANDLER_ARGS)556 jm_sysctl_env(SYSCTL_HANDLER_ARGS)
557 {
558 struct prison *pr;
559 char empty = '\0';
560 char *tmpbuf;
561 size_t outlen;
562 int error = 0;
563
564 pr = req->td->td_ucred->cr_prison;
565
566 mtx_lock(&pr->pr_mtx);
567 arg1 = osd_jail_get(pr, env.osd_slot);
568 if (arg1 == NULL) {
569 tmpbuf = ∅
570 outlen = 1;
571 } else {
572 outlen = strlen(arg1) + 1;
573 if (req->oldptr != NULL) {
574 tmpbuf = malloc(outlen, M_PRISON, M_NOWAIT);
575 error = (tmpbuf == NULL) ? ENOMEM : 0;
576 if (error == 0)
577 memcpy(tmpbuf, arg1, outlen);
578 }
579 }
580 mtx_unlock(&pr->pr_mtx);
581
582 if (error != 0)
583 return (error);
584
585 if (req->oldptr == NULL)
586 SYSCTL_OUT(req, NULL, outlen);
587 else {
588 SYSCTL_OUT(req, tmpbuf, outlen);
589 if (tmpbuf != &empty)
590 free(tmpbuf, M_PRISON);
591 }
592
593 return (error);
594 }
595 SYSCTL_PROC(_security_jail, OID_AUTO, env,
596 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
597 0, 0, jm_sysctl_env, "A", "Meta information provided by parent jail");
598
599
600 /* Setup and tear down. */
601
602 static int
jm_sysinit(void * arg __unused)603 jm_sysinit(void *arg __unused)
604 {
605 meta.osd_slot = osd_jail_register(jm_osd_destructor, meta.methods);
606 env.osd_slot = osd_jail_register(jm_osd_destructor, env.methods);
607
608 return (0);
609 }
610
611 static int
jm_sysuninit(void * arg __unused)612 jm_sysuninit(void *arg __unused)
613 {
614 osd_jail_deregister(meta.osd_slot);
615 osd_jail_deregister(env.osd_slot);
616
617 return (0);
618 }
619
620 SYSINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysinit, NULL);
621 SYSUNINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysuninit, NULL);
622