1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/lock.h> 34 #include <sys/malloc.h> 35 #include <sys/mount.h> 36 #include <sys/jail.h> 37 #include <sys/proc.h> 38 #include <sys/sx.h> 39 40 #include <compat/linux/linux_mib.h> 41 #include <compat/linux/linux_misc.h> 42 43 struct linux_prison { 44 char pr_osname[LINUX_MAX_UTSNAME]; 45 char pr_osrelease[LINUX_MAX_UTSNAME]; 46 int pr_oss_version; 47 int pr_osrel; 48 }; 49 50 static struct linux_prison lprison0 = { 51 .pr_osname = "Linux", 52 .pr_osrelease = LINUX_VERSION_STR, 53 .pr_oss_version = 0x030600, 54 .pr_osrel = LINUX_VERSION_CODE 55 }; 56 57 static unsigned linux_osd_jail_slot; 58 59 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 60 "Linux mode"); 61 62 int linux_debug = 3; 63 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 64 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 65 66 int linux_default_openfiles = 1024; 67 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 68 &linux_default_openfiles, 0, 69 "Default soft openfiles resource limit, or -1 for unlimited"); 70 71 int linux_default_stacksize = 8 * 1024 * 1024; 72 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN, 73 &linux_default_stacksize, 0, 74 "Default soft stack size resource limit, or -1 for unlimited"); 75 76 int linux_dummy_rlimits = 0; 77 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN, 78 &linux_dummy_rlimits, 0, 79 "Return dummy values for unsupported Linux-specific rlimits"); 80 81 int linux_ignore_ip_recverr = 1; 82 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 83 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 84 85 int linux_preserve_vstatus = 1; 86 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 87 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 88 89 bool linux_map_sched_prio = true; 90 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 91 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 92 "(not POSIX compliant)"); 93 94 int linux_use_emul_path = 1; 95 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN, 96 &linux_use_emul_path, 0, "Use linux.compat.emul_path"); 97 98 static bool linux_setid_allowed = true; 99 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN, 100 &linux_setid_allowed, 0, 101 "Allow setuid/setgid on execve of Linux binary"); 102 103 int 104 linux_setid_allowed_query(struct thread *td __unused, 105 struct image_params *imgp __unused) 106 { 107 return (linux_setid_allowed); 108 } 109 110 static int linux_set_osname(struct thread *td, char *osname); 111 static int linux_set_osrelease(struct thread *td, char *osrelease); 112 static int linux_set_oss_version(struct thread *td, int oss_version); 113 114 static int 115 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 116 { 117 char osname[LINUX_MAX_UTSNAME]; 118 int error; 119 120 linux_get_osname(req->td, osname); 121 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 122 if (error != 0 || req->newptr == NULL) 123 return (error); 124 error = linux_set_osname(req->td, osname); 125 126 return (error); 127 } 128 129 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 130 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 131 0, 0, linux_sysctl_osname, "A", 132 "Linux kernel OS name"); 133 134 static int 135 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 136 { 137 char osrelease[LINUX_MAX_UTSNAME]; 138 int error; 139 140 linux_get_osrelease(req->td, osrelease); 141 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 142 if (error != 0 || req->newptr == NULL) 143 return (error); 144 error = linux_set_osrelease(req->td, osrelease); 145 146 return (error); 147 } 148 149 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 150 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 151 0, 0, linux_sysctl_osrelease, "A", 152 "Linux kernel OS release"); 153 154 static int 155 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 156 { 157 int oss_version; 158 int error; 159 160 oss_version = linux_get_oss_version(req->td); 161 error = sysctl_handle_int(oidp, &oss_version, 0, req); 162 if (error != 0 || req->newptr == NULL) 163 return (error); 164 error = linux_set_oss_version(req->td, oss_version); 165 166 return (error); 167 } 168 169 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 170 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 171 0, 0, linux_sysctl_oss_version, "I", 172 "Linux OSS version"); 173 174 /* 175 * Map the osrelease into integer 176 */ 177 static int 178 linux_map_osrel(char *osrelease, int *osrel) 179 { 180 char *sep, *eosrelease; 181 int len, v0, v1, v2, v; 182 183 len = strlen(osrelease); 184 eosrelease = osrelease + len; 185 v0 = strtol(osrelease, &sep, 10); 186 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 187 return (EINVAL); 188 osrelease = sep + 1; 189 v1 = strtol(osrelease, &sep, 10); 190 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 191 return (EINVAL); 192 osrelease = sep + 1; 193 v2 = strtol(osrelease, &sep, 10); 194 if (osrelease == sep || 195 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 196 return (EINVAL); 197 198 v = LINUX_KERNVER(v0, v1, v2); 199 if (v < LINUX_KERNVER(1, 0, 0)) 200 return (EINVAL); 201 202 if (osrel != NULL) 203 *osrel = v; 204 205 return (0); 206 } 207 208 /* 209 * Find a prison with Linux info. 210 * Return the Linux info and the (locked) prison. 211 */ 212 static struct linux_prison * 213 linux_find_prison(struct prison *spr, struct prison **prp) 214 { 215 struct prison *pr; 216 struct linux_prison *lpr; 217 218 for (pr = spr;; pr = pr->pr_parent) { 219 mtx_lock(&pr->pr_mtx); 220 lpr = (pr == &prison0) 221 ? &lprison0 222 : osd_jail_get(pr, linux_osd_jail_slot); 223 if (lpr != NULL) 224 break; 225 mtx_unlock(&pr->pr_mtx); 226 } 227 *prp = pr; 228 229 return (lpr); 230 } 231 232 /* 233 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 234 * the Linux info and lock the prison. 235 */ 236 static void 237 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 238 { 239 struct prison *ppr; 240 struct linux_prison *lpr, *nlpr; 241 void **rsv; 242 243 /* If this prison already has Linux info, return that. */ 244 lpr = linux_find_prison(pr, &ppr); 245 if (ppr == pr) 246 goto done; 247 /* 248 * Allocate a new info record. Then check again, in case something 249 * changed during the allocation. 250 */ 251 mtx_unlock(&ppr->pr_mtx); 252 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 253 rsv = osd_reserve(linux_osd_jail_slot); 254 lpr = linux_find_prison(pr, &ppr); 255 if (ppr == pr) { 256 free(nlpr, M_PRISON); 257 osd_free_reserved(rsv); 258 goto done; 259 } 260 /* Inherit the initial values from the ancestor. */ 261 mtx_lock(&pr->pr_mtx); 262 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 263 bcopy(lpr, nlpr, sizeof(*lpr)); 264 lpr = nlpr; 265 mtx_unlock(&ppr->pr_mtx); 266 done: 267 if (lprp != NULL) 268 *lprp = lpr; 269 else 270 mtx_unlock(&pr->pr_mtx); 271 } 272 273 /* 274 * Jail OSD methods for Linux prison data. 275 */ 276 static int 277 linux_prison_create(void *obj, void *data) 278 { 279 struct prison *pr = obj; 280 struct vfsoptlist *opts = data; 281 int jsys; 282 283 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 284 jsys == JAIL_SYS_INHERIT) 285 return (0); 286 /* 287 * Inherit a prison's initial values from its parent 288 * (different from JAIL_SYS_INHERIT which also inherits changes). 289 */ 290 linux_alloc_prison(pr, NULL); 291 return (0); 292 } 293 294 static int 295 linux_prison_check(void *obj __unused, void *data) 296 { 297 struct vfsoptlist *opts = data; 298 char *osname, *osrelease; 299 int error, jsys, len, oss_version; 300 301 /* Check that the parameters are correct. */ 302 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 303 if (error != ENOENT) { 304 if (error != 0) 305 return (error); 306 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 307 return (EINVAL); 308 } 309 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 310 if (error != ENOENT) { 311 if (error != 0) 312 return (error); 313 if (len == 0 || osname[len - 1] != '\0') 314 return (EINVAL); 315 if (len > LINUX_MAX_UTSNAME) { 316 vfs_opterror(opts, "linux.osname too long"); 317 return (ENAMETOOLONG); 318 } 319 } 320 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 321 if (error != ENOENT) { 322 if (error != 0) 323 return (error); 324 if (len == 0 || osrelease[len - 1] != '\0') 325 return (EINVAL); 326 if (len > LINUX_MAX_UTSNAME) { 327 vfs_opterror(opts, "linux.osrelease too long"); 328 return (ENAMETOOLONG); 329 } 330 error = linux_map_osrel(osrelease, NULL); 331 if (error != 0) { 332 vfs_opterror(opts, "linux.osrelease format error"); 333 return (error); 334 } 335 } 336 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 337 sizeof(oss_version)); 338 339 if (error == ENOENT) 340 error = 0; 341 return (error); 342 } 343 344 static int 345 linux_prison_set(void *obj, void *data) 346 { 347 struct linux_prison *lpr; 348 struct prison *pr = obj; 349 struct vfsoptlist *opts = data; 350 char *osname, *osrelease; 351 int error, gotversion, jsys, len, oss_version; 352 353 /* Set the parameters, which should be correct. */ 354 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 355 if (error == ENOENT) 356 jsys = -1; 357 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 358 if (error == ENOENT) 359 osname = NULL; 360 else 361 jsys = JAIL_SYS_NEW; 362 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 363 if (error == ENOENT) 364 osrelease = NULL; 365 else 366 jsys = JAIL_SYS_NEW; 367 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 368 sizeof(oss_version)); 369 if (error == ENOENT) 370 gotversion = 0; 371 else { 372 gotversion = 1; 373 jsys = JAIL_SYS_NEW; 374 } 375 switch (jsys) { 376 case JAIL_SYS_INHERIT: 377 /* "linux=inherit": inherit the parent's Linux info. */ 378 mtx_lock(&pr->pr_mtx); 379 osd_jail_del(pr, linux_osd_jail_slot); 380 mtx_unlock(&pr->pr_mtx); 381 break; 382 case JAIL_SYS_NEW: 383 /* 384 * "linux=new" or "linux.*": 385 * the prison gets its own Linux info. 386 */ 387 linux_alloc_prison(pr, &lpr); 388 if (osrelease) { 389 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 390 strlcpy(lpr->pr_osrelease, osrelease, 391 LINUX_MAX_UTSNAME); 392 } 393 if (osname) 394 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 395 if (gotversion) 396 lpr->pr_oss_version = oss_version; 397 mtx_unlock(&pr->pr_mtx); 398 } 399 400 return (0); 401 } 402 403 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 404 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 405 "Jail Linux kernel OS name"); 406 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 407 "Jail Linux kernel OS release"); 408 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 409 "I", "Jail Linux OSS version"); 410 411 static int 412 linux_prison_get(void *obj, void *data) 413 { 414 struct linux_prison *lpr; 415 struct prison *ppr; 416 struct prison *pr = obj; 417 struct vfsoptlist *opts = data; 418 int error, i; 419 420 static int version0; 421 422 /* See if this prison is the one with the Linux info. */ 423 lpr = linux_find_prison(pr, &ppr); 424 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 425 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 426 if (error != 0 && error != ENOENT) 427 goto done; 428 if (i) { 429 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 430 if (error != 0 && error != ENOENT) 431 goto done; 432 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 433 if (error != 0 && error != ENOENT) 434 goto done; 435 error = vfs_setopt(opts, "linux.oss_version", 436 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 437 if (error != 0 && error != ENOENT) 438 goto done; 439 } else { 440 /* 441 * If this prison is inheriting its Linux info, report 442 * empty/zero parameters. 443 */ 444 error = vfs_setopts(opts, "linux.osname", ""); 445 if (error != 0 && error != ENOENT) 446 goto done; 447 error = vfs_setopts(opts, "linux.osrelease", ""); 448 if (error != 0 && error != ENOENT) 449 goto done; 450 error = vfs_setopt(opts, "linux.oss_version", &version0, 451 sizeof(lpr->pr_oss_version)); 452 if (error != 0 && error != ENOENT) 453 goto done; 454 } 455 error = 0; 456 457 done: 458 mtx_unlock(&ppr->pr_mtx); 459 460 return (error); 461 } 462 463 static void 464 linux_prison_destructor(void *data) 465 { 466 467 free(data, M_PRISON); 468 } 469 470 void 471 linux_osd_jail_register(void) 472 { 473 struct prison *pr; 474 osd_method_t methods[PR_MAXMETHOD] = { 475 [PR_METHOD_CREATE] = linux_prison_create, 476 [PR_METHOD_GET] = linux_prison_get, 477 [PR_METHOD_SET] = linux_prison_set, 478 [PR_METHOD_CHECK] = linux_prison_check 479 }; 480 481 linux_osd_jail_slot = 482 osd_jail_register(linux_prison_destructor, methods); 483 /* Copy the system Linux info to any current prisons. */ 484 sx_slock(&allprison_lock); 485 TAILQ_FOREACH(pr, &allprison, pr_list) 486 linux_alloc_prison(pr, NULL); 487 sx_sunlock(&allprison_lock); 488 } 489 490 void 491 linux_osd_jail_deregister(void) 492 { 493 494 osd_jail_deregister(linux_osd_jail_slot); 495 } 496 497 void 498 linux_get_osname(struct thread *td, char *dst) 499 { 500 struct prison *pr; 501 struct linux_prison *lpr; 502 503 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 504 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 505 mtx_unlock(&pr->pr_mtx); 506 } 507 508 static int 509 linux_set_osname(struct thread *td, char *osname) 510 { 511 struct prison *pr; 512 struct linux_prison *lpr; 513 514 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 515 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 516 mtx_unlock(&pr->pr_mtx); 517 518 return (0); 519 } 520 521 void 522 linux_get_osrelease(struct thread *td, char *dst) 523 { 524 struct prison *pr; 525 struct linux_prison *lpr; 526 527 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 528 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 529 mtx_unlock(&pr->pr_mtx); 530 } 531 532 int 533 linux_kernver(struct thread *td) 534 { 535 struct prison *pr; 536 struct linux_prison *lpr; 537 int osrel; 538 539 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 540 osrel = lpr->pr_osrel; 541 mtx_unlock(&pr->pr_mtx); 542 543 return (osrel); 544 } 545 546 static int 547 linux_set_osrelease(struct thread *td, char *osrelease) 548 { 549 struct prison *pr; 550 struct linux_prison *lpr; 551 int error; 552 553 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 554 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 555 if (error == 0) 556 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 557 mtx_unlock(&pr->pr_mtx); 558 559 return (error); 560 } 561 562 int 563 linux_get_oss_version(struct thread *td) 564 { 565 struct prison *pr; 566 struct linux_prison *lpr; 567 int version; 568 569 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 570 version = lpr->pr_oss_version; 571 mtx_unlock(&pr->pr_mtx); 572 573 return (version); 574 } 575 576 static int 577 linux_set_oss_version(struct thread *td, int oss_version) 578 { 579 struct prison *pr; 580 struct linux_prison *lpr; 581 582 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 583 lpr->pr_oss_version = oss_version; 584 mtx_unlock(&pr->pr_mtx); 585 586 return (0); 587 } 588