1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/lock.h> 34 #include <sys/malloc.h> 35 #include <sys/mount.h> 36 #include <sys/jail.h> 37 #include <sys/proc.h> 38 #include <sys/sx.h> 39 40 #include <compat/linux/linux_mib.h> 41 #include <compat/linux/linux_misc.h> 42 43 struct linux_prison { 44 char pr_osname[LINUX_MAX_UTSNAME]; 45 char pr_osrelease[LINUX_MAX_UTSNAME]; 46 int pr_oss_version; 47 int pr_osrel; 48 }; 49 50 static struct linux_prison lprison0 = { 51 .pr_osname = "Linux", 52 .pr_osrelease = LINUX_VERSION_STR, 53 .pr_oss_version = 0x030600, 54 .pr_osrel = LINUX_VERSION_CODE 55 }; 56 57 static unsigned linux_osd_jail_slot; 58 59 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 60 "Linux mode"); 61 62 int linux_debug = 3; 63 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 64 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 65 66 int linux_default_openfiles = 1024; 67 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 68 &linux_default_openfiles, 0, 69 "Default soft openfiles resource limit, or -1 for unlimited"); 70 71 int linux_default_stacksize = 8 * 1024 * 1024; 72 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN, 73 &linux_default_stacksize, 0, 74 "Default soft stack size resource limit, or -1 for unlimited"); 75 76 int linux_dummy_rlimits = 0; 77 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN, 78 &linux_dummy_rlimits, 0, 79 "Return dummy values for unsupported Linux-specific rlimits"); 80 81 int linux_ignore_ip_recverr = 1; 82 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 83 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 84 85 int linux_preserve_vstatus = 1; 86 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 87 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 88 89 bool linux_map_sched_prio = true; 90 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 91 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 92 "(not POSIX compliant)"); 93 94 static bool linux_setid_allowed = true; 95 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN, 96 &linux_setid_allowed, 0, 97 "Allow setuid/setgid on execve of Linux binary"); 98 99 int 100 linux_setid_allowed_query(struct thread *td __unused, 101 struct image_params *imgp __unused) 102 { 103 return (linux_setid_allowed); 104 } 105 106 static int linux_set_osname(struct thread *td, char *osname); 107 static int linux_set_osrelease(struct thread *td, char *osrelease); 108 static int linux_set_oss_version(struct thread *td, int oss_version); 109 110 static int 111 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 112 { 113 char osname[LINUX_MAX_UTSNAME]; 114 int error; 115 116 linux_get_osname(req->td, osname); 117 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 118 if (error != 0 || req->newptr == NULL) 119 return (error); 120 error = linux_set_osname(req->td, osname); 121 122 return (error); 123 } 124 125 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 126 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 127 0, 0, linux_sysctl_osname, "A", 128 "Linux kernel OS name"); 129 130 static int 131 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 132 { 133 char osrelease[LINUX_MAX_UTSNAME]; 134 int error; 135 136 linux_get_osrelease(req->td, osrelease); 137 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 138 if (error != 0 || req->newptr == NULL) 139 return (error); 140 error = linux_set_osrelease(req->td, osrelease); 141 142 return (error); 143 } 144 145 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 146 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 147 0, 0, linux_sysctl_osrelease, "A", 148 "Linux kernel OS release"); 149 150 static int 151 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 152 { 153 int oss_version; 154 int error; 155 156 oss_version = linux_get_oss_version(req->td); 157 error = sysctl_handle_int(oidp, &oss_version, 0, req); 158 if (error != 0 || req->newptr == NULL) 159 return (error); 160 error = linux_set_oss_version(req->td, oss_version); 161 162 return (error); 163 } 164 165 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 166 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 167 0, 0, linux_sysctl_oss_version, "I", 168 "Linux OSS version"); 169 170 /* 171 * Map the osrelease into integer 172 */ 173 static int 174 linux_map_osrel(char *osrelease, int *osrel) 175 { 176 char *sep, *eosrelease; 177 int len, v0, v1, v2, v; 178 179 len = strlen(osrelease); 180 eosrelease = osrelease + len; 181 v0 = strtol(osrelease, &sep, 10); 182 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 183 return (EINVAL); 184 osrelease = sep + 1; 185 v1 = strtol(osrelease, &sep, 10); 186 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 187 return (EINVAL); 188 osrelease = sep + 1; 189 v2 = strtol(osrelease, &sep, 10); 190 if (osrelease == sep || 191 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 192 return (EINVAL); 193 194 v = LINUX_KERNVER(v0, v1, v2); 195 if (v < LINUX_KERNVER(1, 0, 0)) 196 return (EINVAL); 197 198 if (osrel != NULL) 199 *osrel = v; 200 201 return (0); 202 } 203 204 /* 205 * Find a prison with Linux info. 206 * Return the Linux info and the (locked) prison. 207 */ 208 static struct linux_prison * 209 linux_find_prison(struct prison *spr, struct prison **prp) 210 { 211 struct prison *pr; 212 struct linux_prison *lpr; 213 214 for (pr = spr;; pr = pr->pr_parent) { 215 mtx_lock(&pr->pr_mtx); 216 lpr = (pr == &prison0) 217 ? &lprison0 218 : osd_jail_get(pr, linux_osd_jail_slot); 219 if (lpr != NULL) 220 break; 221 mtx_unlock(&pr->pr_mtx); 222 } 223 *prp = pr; 224 225 return (lpr); 226 } 227 228 /* 229 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 230 * the Linux info and lock the prison. 231 */ 232 static void 233 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 234 { 235 struct prison *ppr; 236 struct linux_prison *lpr, *nlpr; 237 void **rsv; 238 239 /* If this prison already has Linux info, return that. */ 240 lpr = linux_find_prison(pr, &ppr); 241 if (ppr == pr) 242 goto done; 243 /* 244 * Allocate a new info record. Then check again, in case something 245 * changed during the allocation. 246 */ 247 mtx_unlock(&ppr->pr_mtx); 248 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 249 rsv = osd_reserve(linux_osd_jail_slot); 250 lpr = linux_find_prison(pr, &ppr); 251 if (ppr == pr) { 252 free(nlpr, M_PRISON); 253 osd_free_reserved(rsv); 254 goto done; 255 } 256 /* Inherit the initial values from the ancestor. */ 257 mtx_lock(&pr->pr_mtx); 258 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 259 bcopy(lpr, nlpr, sizeof(*lpr)); 260 lpr = nlpr; 261 mtx_unlock(&ppr->pr_mtx); 262 done: 263 if (lprp != NULL) 264 *lprp = lpr; 265 else 266 mtx_unlock(&pr->pr_mtx); 267 } 268 269 /* 270 * Jail OSD methods for Linux prison data. 271 */ 272 static int 273 linux_prison_create(void *obj, void *data) 274 { 275 struct prison *pr = obj; 276 struct vfsoptlist *opts = data; 277 int jsys; 278 279 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 280 jsys == JAIL_SYS_INHERIT) 281 return (0); 282 /* 283 * Inherit a prison's initial values from its parent 284 * (different from JAIL_SYS_INHERIT which also inherits changes). 285 */ 286 linux_alloc_prison(pr, NULL); 287 return (0); 288 } 289 290 static int 291 linux_prison_check(void *obj __unused, void *data) 292 { 293 struct vfsoptlist *opts = data; 294 char *osname, *osrelease; 295 int error, jsys, len, oss_version; 296 297 /* Check that the parameters are correct. */ 298 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 299 if (error != ENOENT) { 300 if (error != 0) 301 return (error); 302 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 303 return (EINVAL); 304 } 305 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 306 if (error != ENOENT) { 307 if (error != 0) 308 return (error); 309 if (len == 0 || osname[len - 1] != '\0') 310 return (EINVAL); 311 if (len > LINUX_MAX_UTSNAME) { 312 vfs_opterror(opts, "linux.osname too long"); 313 return (ENAMETOOLONG); 314 } 315 } 316 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 317 if (error != ENOENT) { 318 if (error != 0) 319 return (error); 320 if (len == 0 || osrelease[len - 1] != '\0') 321 return (EINVAL); 322 if (len > LINUX_MAX_UTSNAME) { 323 vfs_opterror(opts, "linux.osrelease too long"); 324 return (ENAMETOOLONG); 325 } 326 error = linux_map_osrel(osrelease, NULL); 327 if (error != 0) { 328 vfs_opterror(opts, "linux.osrelease format error"); 329 return (error); 330 } 331 } 332 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 333 sizeof(oss_version)); 334 335 if (error == ENOENT) 336 error = 0; 337 return (error); 338 } 339 340 static int 341 linux_prison_set(void *obj, void *data) 342 { 343 struct linux_prison *lpr; 344 struct prison *pr = obj; 345 struct vfsoptlist *opts = data; 346 char *osname, *osrelease; 347 int error, gotversion, jsys, len, oss_version; 348 349 /* Set the parameters, which should be correct. */ 350 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 351 if (error == ENOENT) 352 jsys = -1; 353 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 354 if (error == ENOENT) 355 osname = NULL; 356 else 357 jsys = JAIL_SYS_NEW; 358 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 359 if (error == ENOENT) 360 osrelease = NULL; 361 else 362 jsys = JAIL_SYS_NEW; 363 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 364 sizeof(oss_version)); 365 if (error == ENOENT) 366 gotversion = 0; 367 else { 368 gotversion = 1; 369 jsys = JAIL_SYS_NEW; 370 } 371 switch (jsys) { 372 case JAIL_SYS_INHERIT: 373 /* "linux=inherit": inherit the parent's Linux info. */ 374 mtx_lock(&pr->pr_mtx); 375 osd_jail_del(pr, linux_osd_jail_slot); 376 mtx_unlock(&pr->pr_mtx); 377 break; 378 case JAIL_SYS_NEW: 379 /* 380 * "linux=new" or "linux.*": 381 * the prison gets its own Linux info. 382 */ 383 linux_alloc_prison(pr, &lpr); 384 if (osrelease) { 385 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 386 strlcpy(lpr->pr_osrelease, osrelease, 387 LINUX_MAX_UTSNAME); 388 } 389 if (osname) 390 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 391 if (gotversion) 392 lpr->pr_oss_version = oss_version; 393 mtx_unlock(&pr->pr_mtx); 394 } 395 396 return (0); 397 } 398 399 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 400 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 401 "Jail Linux kernel OS name"); 402 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 403 "Jail Linux kernel OS release"); 404 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 405 "I", "Jail Linux OSS version"); 406 407 static int 408 linux_prison_get(void *obj, void *data) 409 { 410 struct linux_prison *lpr; 411 struct prison *ppr; 412 struct prison *pr = obj; 413 struct vfsoptlist *opts = data; 414 int error, i; 415 416 static int version0; 417 418 /* See if this prison is the one with the Linux info. */ 419 lpr = linux_find_prison(pr, &ppr); 420 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 421 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 422 if (error != 0 && error != ENOENT) 423 goto done; 424 if (i) { 425 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 426 if (error != 0 && error != ENOENT) 427 goto done; 428 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 429 if (error != 0 && error != ENOENT) 430 goto done; 431 error = vfs_setopt(opts, "linux.oss_version", 432 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 433 if (error != 0 && error != ENOENT) 434 goto done; 435 } else { 436 /* 437 * If this prison is inheriting its Linux info, report 438 * empty/zero parameters. 439 */ 440 error = vfs_setopts(opts, "linux.osname", ""); 441 if (error != 0 && error != ENOENT) 442 goto done; 443 error = vfs_setopts(opts, "linux.osrelease", ""); 444 if (error != 0 && error != ENOENT) 445 goto done; 446 error = vfs_setopt(opts, "linux.oss_version", &version0, 447 sizeof(lpr->pr_oss_version)); 448 if (error != 0 && error != ENOENT) 449 goto done; 450 } 451 error = 0; 452 453 done: 454 mtx_unlock(&ppr->pr_mtx); 455 456 return (error); 457 } 458 459 static void 460 linux_prison_destructor(void *data) 461 { 462 463 free(data, M_PRISON); 464 } 465 466 void 467 linux_osd_jail_register(void) 468 { 469 struct prison *pr; 470 osd_method_t methods[PR_MAXMETHOD] = { 471 [PR_METHOD_CREATE] = linux_prison_create, 472 [PR_METHOD_GET] = linux_prison_get, 473 [PR_METHOD_SET] = linux_prison_set, 474 [PR_METHOD_CHECK] = linux_prison_check 475 }; 476 477 linux_osd_jail_slot = 478 osd_jail_register(linux_prison_destructor, methods); 479 /* Copy the system Linux info to any current prisons. */ 480 sx_slock(&allprison_lock); 481 TAILQ_FOREACH(pr, &allprison, pr_list) 482 linux_alloc_prison(pr, NULL); 483 sx_sunlock(&allprison_lock); 484 } 485 486 void 487 linux_osd_jail_deregister(void) 488 { 489 490 osd_jail_deregister(linux_osd_jail_slot); 491 } 492 493 void 494 linux_get_osname(struct thread *td, char *dst) 495 { 496 struct prison *pr; 497 struct linux_prison *lpr; 498 499 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 500 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 501 mtx_unlock(&pr->pr_mtx); 502 } 503 504 static int 505 linux_set_osname(struct thread *td, char *osname) 506 { 507 struct prison *pr; 508 struct linux_prison *lpr; 509 510 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 511 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 512 mtx_unlock(&pr->pr_mtx); 513 514 return (0); 515 } 516 517 void 518 linux_get_osrelease(struct thread *td, char *dst) 519 { 520 struct prison *pr; 521 struct linux_prison *lpr; 522 523 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 524 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 525 mtx_unlock(&pr->pr_mtx); 526 } 527 528 int 529 linux_kernver(struct thread *td) 530 { 531 struct prison *pr; 532 struct linux_prison *lpr; 533 int osrel; 534 535 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 536 osrel = lpr->pr_osrel; 537 mtx_unlock(&pr->pr_mtx); 538 539 return (osrel); 540 } 541 542 static int 543 linux_set_osrelease(struct thread *td, char *osrelease) 544 { 545 struct prison *pr; 546 struct linux_prison *lpr; 547 int error; 548 549 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 550 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 551 if (error == 0) 552 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 553 mtx_unlock(&pr->pr_mtx); 554 555 return (error); 556 } 557 558 int 559 linux_get_oss_version(struct thread *td) 560 { 561 struct prison *pr; 562 struct linux_prison *lpr; 563 int version; 564 565 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 566 version = lpr->pr_oss_version; 567 mtx_unlock(&pr->pr_mtx); 568 569 return (version); 570 } 571 572 static int 573 linux_set_oss_version(struct thread *td, int oss_version) 574 { 575 struct prison *pr; 576 struct linux_prison *lpr; 577 578 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 579 lpr->pr_oss_version = oss_version; 580 mtx_unlock(&pr->pr_mtx); 581 582 return (0); 583 } 584