1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2024 SkunkWerks GmbH 5 * 6 * This software was developed by Igor Ostapenko <igoro@FreeBSD.org> 7 * under sponsorship from SkunkWerks GmbH. 8 */ 9 10 #include <sys/param.h> 11 #include <sys/_bitset.h> 12 #include <sys/bitset.h> 13 #include <sys/lock.h> 14 #include <sys/sx.h> 15 #include <sys/kernel.h> 16 #include <sys/mount.h> 17 #include <sys/malloc.h> 18 #include <sys/jail.h> 19 #include <sys/osd.h> 20 #include <sys/proc.h> 21 22 /* 23 * Buffer limit. 24 * 25 * The hard limit is the actual value used during setting or modification. The 26 * soft limit is used solely by the security.jail.param.meta and .env sysctl. If 27 * the hard limit is decreased, the soft limit may remain higher to ensure that 28 * previously set meta strings can still be correctly interpreted by end-user 29 * interfaces, such as jls(8). 30 */ 31 32 static uint32_t jm_maxbufsize_hard = 4096; 33 static uint32_t jm_maxbufsize_soft = 4096; 34 35 static int 36 jm_sysctl_meta_maxbufsize(SYSCTL_HANDLER_ARGS) 37 { 38 int error; 39 uint32_t newmax = 0; 40 41 /* Reading only. */ 42 43 if (req->newptr == NULL) { 44 sx_slock(&allprison_lock); 45 error = SYSCTL_OUT(req, &jm_maxbufsize_hard, 46 sizeof(jm_maxbufsize_hard)); 47 sx_sunlock(&allprison_lock); 48 49 return (error); 50 } 51 52 /* Reading and writing. */ 53 54 sx_xlock(&allprison_lock); 55 56 error = SYSCTL_OUT(req, &jm_maxbufsize_hard, 57 sizeof(jm_maxbufsize_hard)); 58 if (error != 0) 59 goto end; 60 61 error = SYSCTL_IN(req, &newmax, sizeof(newmax)); 62 if (error != 0) 63 goto end; 64 65 jm_maxbufsize_hard = newmax; 66 if (jm_maxbufsize_hard >= jm_maxbufsize_soft) { 67 jm_maxbufsize_soft = jm_maxbufsize_hard; 68 } else if (TAILQ_EMPTY(&allprison)) { 69 /* 70 * For now, this is the simplest way to 71 * avoid O(n) iteration over all prisons in 72 * case of a large n. 73 */ 74 jm_maxbufsize_soft = jm_maxbufsize_hard; 75 } 76 77 end: 78 sx_xunlock(&allprison_lock); 79 return (error); 80 } 81 SYSCTL_PROC(_security_jail, OID_AUTO, meta_maxbufsize, 82 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 83 jm_sysctl_meta_maxbufsize, "IU", 84 "Maximum buffer size of each meta and env"); 85 86 87 /* Jail parameter announcement. */ 88 89 static int 90 jm_sysctl_param_meta(SYSCTL_HANDLER_ARGS) 91 { 92 uint32_t soft; 93 94 sx_slock(&allprison_lock); 95 soft = jm_maxbufsize_soft; 96 sx_sunlock(&allprison_lock); 97 98 return (sysctl_jail_param(oidp, arg1, soft, req)); 99 } 100 SYSCTL_PROC(_security_jail_param, OID_AUTO, meta, 101 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 102 jm_sysctl_param_meta, "A,keyvalue", 103 "Jail meta information hidden from the jail"); 104 SYSCTL_PROC(_security_jail_param, OID_AUTO, env, 105 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, 106 jm_sysctl_param_meta, "A,keyvalue", 107 "Jail meta information readable by the jail"); 108 109 110 /* Generic OSD-based logic for any metadata buffer. */ 111 112 struct meta { 113 char *name; 114 u_int osd_slot; 115 osd_method_t methods[PR_MAXMETHOD]; 116 }; 117 118 /* A chain of hunks representing the final buffer after all manipulations. */ 119 struct hunk { 120 char *p; /* a buf reference */ 121 size_t len; /* number of bytes referred */ 122 char *owned; /* must be freed */ 123 struct hunk *next; 124 }; 125 126 static inline struct hunk * 127 jm_h_alloc(void) 128 { 129 /* All fields are zeroed. */ 130 return (malloc(sizeof(struct hunk), M_PRISON, M_WAITOK | M_ZERO)); 131 } 132 133 static inline struct hunk * 134 jm_h_prepend(struct hunk *h, char *p, size_t len) 135 { 136 struct hunk *n; 137 138 n = jm_h_alloc(); 139 n->p = p; 140 n->len = len; 141 n->next = h; 142 return (n); 143 } 144 145 static inline void 146 jm_h_cut_line(struct hunk *h, char *begin) 147 { 148 struct hunk *rem; 149 char *end; 150 151 /* Find the end of key=value. */ 152 for (end = begin; (end + 1) < (h->p + h->len); end++) 153 if (*end == '\0' || *end == '\n') 154 break; 155 156 /* Pick up a non-empty remainder. */ 157 if ((end + 1) < (h->p + h->len) && *(end + 1) != '\0') { 158 rem = jm_h_alloc(); 159 rem->p = end + 1; 160 rem->len = h->p + h->len - rem->p; 161 162 /* insert */ 163 rem->next = h->next; 164 h->next = rem; 165 } 166 167 /* Shorten this hunk. */ 168 h->len = begin - h->p; 169 } 170 171 static inline void 172 jm_h_cut_occurrences(struct hunk *h, const char *key, size_t keylen) 173 { 174 char *p = h->p; 175 176 #define nexthunk() \ 177 do { \ 178 h = h->next; \ 179 p = (h == NULL) ? NULL : h->p; \ 180 } while (0) 181 182 while (p != NULL) { 183 p = strnstr(p, key, h->len - (p - h->p)); 184 if (p == NULL) { 185 nexthunk(); 186 continue; 187 } 188 if ((p == h->p || *(p - 1) == '\n') && p[keylen] == '=') { 189 jm_h_cut_line(h, p); 190 nexthunk(); 191 continue; 192 } 193 /* Continue with this hunk. */ 194 p += keylen; 195 /* Empty? The next hunk then. */ 196 if ((p - h->p) >= h->len) 197 nexthunk(); 198 } 199 } 200 201 static inline size_t 202 jm_h_len(struct hunk *h) 203 { 204 size_t len = 0; 205 while (h != NULL) { 206 len += h->len; 207 h = h->next; 208 } 209 return (len); 210 } 211 212 static inline void 213 jm_h_assemble(char *dst, struct hunk *h) 214 { 215 while (h != NULL) { 216 if (h->len > 0) { 217 memcpy(dst, h->p, h->len); 218 dst += h->len; 219 /* If not the last hunk then concatenate with \n. */ 220 if (h->next != NULL && *(dst - 1) == '\0') 221 *(dst - 1) = '\n'; 222 } 223 h = h->next; 224 } 225 } 226 227 static inline struct hunk * 228 jm_h_freechain(struct hunk *h) 229 { 230 struct hunk *n = h; 231 while (n != NULL) { 232 h = n; 233 n = h->next; 234 free(h->owned, M_PRISON); 235 free(h, M_PRISON); 236 } 237 238 return (NULL); 239 } 240 241 static int 242 jm_osd_method_set(void *obj, void *data, const struct meta *meta) 243 { 244 struct prison *pr = obj; 245 struct vfsoptlist *opts = data; 246 struct vfsopt *opt; 247 248 char *origosd; 249 char *origosd_copy; 250 char *oldosd; 251 char *osd; 252 size_t osdlen; 253 struct hunk *h; 254 char *key; 255 size_t keylen; 256 int error; 257 int repeats = 0; 258 bool repeat; 259 260 sx_assert(&allprison_lock, SA_XLOCKED); 261 262 again: 263 origosd = NULL; 264 origosd_copy = NULL; 265 osd = NULL; 266 h = NULL; 267 error = 0; 268 repeat = false; 269 TAILQ_FOREACH(opt, opts, link) { 270 /* Look for options with <metaname> prefix. */ 271 if (strstr(opt->name, meta->name) != opt->name) 272 continue; 273 /* Consider only full <metaname> or <metaname>.* ones. */ 274 if (opt->name[strlen(meta->name)] != '.' && 275 opt->name[strlen(meta->name)] != '\0') 276 continue; 277 opt->seen = 1; 278 279 /* The very first preconditions. */ 280 if (opt->len < 0) 281 continue; 282 if (opt->len > jm_maxbufsize_hard) { 283 error = EFBIG; 284 break; 285 } 286 /* NULL-terminated strings are expected from vfsopt. */ 287 if (opt->value != NULL && 288 ((char *)opt->value)[opt->len - 1] != '\0') { 289 error = EINVAL; 290 break; 291 } 292 293 /* Work with our own copy of existing metadata. */ 294 if (h == NULL) { 295 h = jm_h_alloc(); /* zeroed */ 296 mtx_lock(&pr->pr_mtx); 297 origosd = osd_jail_get(pr, meta->osd_slot); 298 if (origosd != NULL) { 299 origosd_copy = malloc(strlen(origosd) + 1, 300 M_PRISON, M_NOWAIT); 301 if (origosd_copy == NULL) 302 error = ENOMEM; 303 else { 304 h->p = origosd_copy; 305 h->len = strlen(origosd) + 1; 306 memcpy(h->p, origosd, h->len); 307 } 308 } 309 mtx_unlock(&pr->pr_mtx); 310 if (error != 0) 311 break; 312 } 313 314 /* 1) Change the whole metadata. */ 315 if (strcmp(opt->name, meta->name) == 0) { 316 if (opt->len > jm_maxbufsize_hard) { 317 error = EFBIG; 318 break; 319 } 320 h = jm_h_freechain(h); 321 h = jm_h_prepend(h, 322 (opt->value != NULL) ? opt->value : "", 323 /* avoid empty NULL-terminated string */ 324 (opt->len > 1) ? opt->len : 0); 325 continue; 326 } 327 328 /* 2) Or add/replace/remove a specific key=value. */ 329 key = opt->name + strlen(meta->name) + 1; 330 keylen = strlen(key); 331 if (keylen < 1) { 332 error = EINVAL; 333 break; 334 } 335 jm_h_cut_occurrences(h, key, keylen); 336 if (opt->value == NULL) 337 continue; /* key removal */ 338 h = jm_h_prepend(h, NULL, 0); 339 h->len = keylen + 1 + opt->len; /* key=value\0 */ 340 h->owned = malloc(h->len, M_PRISON, M_WAITOK | M_ZERO); 341 h->p = h->owned; 342 memcpy(h->p, key, keylen); 343 h->p[keylen] = '='; 344 memcpy(h->p + keylen + 1, opt->value, opt->len); 345 } 346 347 if (h == NULL || error != 0) 348 goto end; 349 350 /* Assemble the final contiguous buffer. */ 351 osdlen = jm_h_len(h); 352 if (osdlen > jm_maxbufsize_hard) { 353 error = EFBIG; 354 goto end; 355 } 356 if (osdlen > 1) { 357 osd = malloc(osdlen, M_PRISON, M_WAITOK); 358 jm_h_assemble(osd, h); 359 osd[osdlen - 1] = '\0'; /* sealed */ 360 } 361 362 /* Compare and swap the buffers. */ 363 mtx_lock(&pr->pr_mtx); 364 oldosd = osd_jail_get(pr, meta->osd_slot); 365 if (oldosd == origosd) { 366 error = osd_jail_set(pr, meta->osd_slot, osd); 367 } else { 368 /* 369 * The osd(9) framework requires protection only for pr_osd, 370 * which is covered by pr_mtx. Therefore, other code might 371 * legally alter jail metadata without allprison_lock. It 372 * means that here we could override data just added by other 373 * thread. This extra caution with retry mechanism aims to 374 * prevent user data loss in such potential cases. 375 */ 376 error = EAGAIN; 377 repeat = true; 378 } 379 mtx_unlock(&pr->pr_mtx); 380 if (error == 0) 381 osd = oldosd; 382 383 end: 384 jm_h_freechain(h); 385 free(osd, M_PRISON); 386 free(origosd_copy, M_PRISON); 387 388 if (repeat && ++repeats < 3) 389 goto again; 390 391 return (error); 392 } 393 394 static int 395 jm_osd_method_get(void *obj, void *data, const struct meta *meta) 396 { 397 struct prison *pr = obj; 398 struct vfsoptlist *opts = data; 399 struct vfsopt *opt; 400 char *osd = NULL; 401 char empty = '\0'; 402 int error = 0; 403 bool locked = false; 404 const char *key; 405 size_t keylen; 406 const char *p; 407 408 sx_assert(&allprison_lock, SA_SLOCKED); 409 410 TAILQ_FOREACH(opt, opts, link) { 411 if (strstr(opt->name, meta->name) != opt->name) 412 continue; 413 if (opt->name[strlen(meta->name)] != '.' && 414 opt->name[strlen(meta->name)] != '\0') 415 continue; 416 417 if (!locked) { 418 mtx_lock(&pr->pr_mtx); 419 locked = true; 420 osd = osd_jail_get(pr, meta->osd_slot); 421 if (osd == NULL) 422 osd = ∅ 423 } 424 425 /* Provide full metadata. */ 426 if (strcmp(opt->name, meta->name) == 0) { 427 if (strlcpy(opt->value, osd, opt->len) >= opt->len) { 428 error = EINVAL; 429 break; 430 } 431 opt->seen = 1; 432 continue; 433 } 434 435 /* Extract a specific key=value. */ 436 p = osd; 437 key = opt->name + strlen(meta->name) + 1; 438 keylen = strlen(key); 439 while ((p = strstr(p, key)) != NULL) { 440 if ((p == osd || *(p - 1) == '\n') 441 && p[keylen] == '=') { 442 if (strlcpy(opt->value, p + keylen + 1, 443 MIN(opt->len, strchr(p + keylen + 1, '\n') - 444 (p + keylen + 1) + 1)) >= opt->len) { 445 error = EINVAL; 446 break; 447 } 448 opt->seen = 1; 449 } 450 p += keylen; 451 } 452 if (error != 0) 453 break; 454 } 455 456 if (locked) 457 mtx_unlock(&pr->pr_mtx); 458 459 return (error); 460 } 461 462 static int 463 jm_osd_method_check(void *obj __unused, void *data, const struct meta *meta) 464 { 465 struct vfsoptlist *opts = data; 466 struct vfsopt *opt; 467 468 TAILQ_FOREACH(opt, opts, link) { 469 if (strstr(opt->name, meta->name) != opt->name) 470 continue; 471 if (opt->name[strlen(meta->name)] != '.' && 472 opt->name[strlen(meta->name)] != '\0') 473 continue; 474 opt->seen = 1; 475 } 476 477 return (0); 478 } 479 480 static void 481 jm_osd_destructor(void *osd) 482 { 483 free(osd, M_PRISON); 484 } 485 486 487 /* OSD for "meta" param */ 488 489 static struct meta meta; 490 491 static inline int 492 jm_osd_method_set_meta(void *obj, void *data) 493 { 494 return (jm_osd_method_set(obj, data, &meta)); 495 } 496 497 static inline int 498 jm_osd_method_get_meta(void *obj, void *data) 499 { 500 return (jm_osd_method_get(obj, data, &meta)); 501 } 502 503 static inline int 504 jm_osd_method_check_meta(void *obj, void *data) 505 { 506 return (jm_osd_method_check(obj, data, &meta)); 507 } 508 509 static struct meta meta = { 510 .name = JAIL_META_PRIVATE, 511 .osd_slot = 0, 512 .methods = { 513 [PR_METHOD_SET] = jm_osd_method_set_meta, 514 [PR_METHOD_GET] = jm_osd_method_get_meta, 515 [PR_METHOD_CHECK] = jm_osd_method_check_meta, 516 } 517 }; 518 519 520 /* OSD for "env" param */ 521 522 static struct meta env; 523 524 static inline int 525 jm_osd_method_set_env(void *obj, void *data) 526 { 527 return (jm_osd_method_set(obj, data, &env)); 528 } 529 530 static inline int 531 jm_osd_method_get_env(void *obj, void *data) 532 { 533 return (jm_osd_method_get(obj, data, &env)); 534 } 535 536 static inline int 537 jm_osd_method_check_env(void *obj, void *data) 538 { 539 return (jm_osd_method_check(obj, data, &env)); 540 } 541 542 static struct meta env = { 543 .name = JAIL_META_SHARED, 544 .osd_slot = 0, 545 .methods = { 546 [PR_METHOD_SET] = jm_osd_method_set_env, 547 [PR_METHOD_GET] = jm_osd_method_get_env, 548 [PR_METHOD_CHECK] = jm_osd_method_check_env, 549 } 550 }; 551 552 553 /* A jail can read its "env". */ 554 555 static int 556 jm_sysctl_env(SYSCTL_HANDLER_ARGS) 557 { 558 struct prison *pr; 559 char empty = '\0'; 560 char *tmpbuf; 561 size_t outlen; 562 int error = 0; 563 564 pr = req->td->td_ucred->cr_prison; 565 566 mtx_lock(&pr->pr_mtx); 567 arg1 = osd_jail_get(pr, env.osd_slot); 568 if (arg1 == NULL) { 569 tmpbuf = ∅ 570 outlen = 1; 571 } else { 572 outlen = strlen(arg1) + 1; 573 if (req->oldptr != NULL) { 574 tmpbuf = malloc(outlen, M_PRISON, M_NOWAIT); 575 error = (tmpbuf == NULL) ? ENOMEM : 0; 576 if (error == 0) 577 memcpy(tmpbuf, arg1, outlen); 578 } 579 } 580 mtx_unlock(&pr->pr_mtx); 581 582 if (error != 0) 583 return (error); 584 585 if (req->oldptr == NULL) 586 SYSCTL_OUT(req, NULL, outlen); 587 else { 588 SYSCTL_OUT(req, tmpbuf, outlen); 589 if (tmpbuf != &empty) 590 free(tmpbuf, M_PRISON); 591 } 592 593 return (error); 594 } 595 SYSCTL_PROC(_security_jail, OID_AUTO, env, 596 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 597 0, 0, jm_sysctl_env, "A", "Meta information provided by parent jail"); 598 599 600 /* Setup and tear down. */ 601 602 static int 603 jm_sysinit(void *arg __unused) 604 { 605 meta.osd_slot = osd_jail_register(jm_osd_destructor, meta.methods); 606 env.osd_slot = osd_jail_register(jm_osd_destructor, env.methods); 607 608 return (0); 609 } 610 611 static int 612 jm_sysuninit(void *arg __unused) 613 { 614 osd_jail_deregister(meta.osd_slot); 615 osd_jail_deregister(env.osd_slot); 616 617 return (0); 618 } 619 620 SYSINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysinit, NULL); 621 SYSUNINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysuninit, NULL); 622