1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/sdt.h> 35 #include <sys/systm.h> 36 #include <sys/sysctl.h> 37 #include <sys/proc.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/jail.h> 41 #include <sys/lock.h> 42 #include <sys/sx.h> 43 44 #include <compat/linux/linux_mib.h> 45 #include <compat/linux/linux_misc.h> 46 47 struct linux_prison { 48 char pr_osname[LINUX_MAX_UTSNAME]; 49 char pr_osrelease[LINUX_MAX_UTSNAME]; 50 int pr_oss_version; 51 int pr_osrel; 52 }; 53 54 static struct linux_prison lprison0 = { 55 .pr_osname = "Linux", 56 .pr_osrelease = LINUX_VERSION_STR, 57 .pr_oss_version = 0x030600, 58 .pr_osrel = LINUX_VERSION_CODE 59 }; 60 61 static unsigned linux_osd_jail_slot; 62 63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 64 "Linux mode"); 65 66 int linux_debug = 3; 67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 68 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 69 70 int linux_default_openfiles = 1024; 71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 72 &linux_default_openfiles, 0, 73 "Default soft openfiles resource limit, or -1 for unlimited"); 74 75 int linux_default_stacksize = 8 * 1024 * 1024; 76 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN, 77 &linux_default_stacksize, 0, 78 "Default soft stack size resource limit, or -1 for unlimited"); 79 80 int linux_dummy_rlimits = 0; 81 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN, 82 &linux_dummy_rlimits, 0, 83 "Return dummy values for unsupported Linux-specific rlimits"); 84 85 int linux_ignore_ip_recverr = 1; 86 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 87 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 88 89 int linux_preserve_vstatus = 1; 90 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 91 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 92 93 bool linux_map_sched_prio = true; 94 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 95 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 96 "(not POSIX compliant)"); 97 98 int linux_use_emul_path = 1; 99 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN, 100 &linux_use_emul_path, 0, "Use linux.compat.emul_path"); 101 102 static bool linux_setid_allowed = true; 103 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN, 104 &linux_setid_allowed, 0, 105 "Allow setuid/setgid on execve of Linux binary"); 106 107 int 108 linux_setid_allowed_query(struct thread *td __unused, 109 struct image_params *imgp __unused) 110 { 111 return (linux_setid_allowed); 112 } 113 114 static int linux_set_osname(struct thread *td, char *osname); 115 static int linux_set_osrelease(struct thread *td, char *osrelease); 116 static int linux_set_oss_version(struct thread *td, int oss_version); 117 118 static int 119 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 120 { 121 char osname[LINUX_MAX_UTSNAME]; 122 int error; 123 124 linux_get_osname(req->td, osname); 125 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 126 if (error != 0 || req->newptr == NULL) 127 return (error); 128 error = linux_set_osname(req->td, osname); 129 130 return (error); 131 } 132 133 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 134 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 135 0, 0, linux_sysctl_osname, "A", 136 "Linux kernel OS name"); 137 138 static int 139 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 140 { 141 char osrelease[LINUX_MAX_UTSNAME]; 142 int error; 143 144 linux_get_osrelease(req->td, osrelease); 145 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 146 if (error != 0 || req->newptr == NULL) 147 return (error); 148 error = linux_set_osrelease(req->td, osrelease); 149 150 return (error); 151 } 152 153 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 154 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 155 0, 0, linux_sysctl_osrelease, "A", 156 "Linux kernel OS release"); 157 158 static int 159 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 160 { 161 int oss_version; 162 int error; 163 164 oss_version = linux_get_oss_version(req->td); 165 error = sysctl_handle_int(oidp, &oss_version, 0, req); 166 if (error != 0 || req->newptr == NULL) 167 return (error); 168 error = linux_set_oss_version(req->td, oss_version); 169 170 return (error); 171 } 172 173 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 174 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 175 0, 0, linux_sysctl_oss_version, "I", 176 "Linux OSS version"); 177 178 /* 179 * Map the osrelease into integer 180 */ 181 static int 182 linux_map_osrel(char *osrelease, int *osrel) 183 { 184 char *sep, *eosrelease; 185 int len, v0, v1, v2, v; 186 187 len = strlen(osrelease); 188 eosrelease = osrelease + len; 189 v0 = strtol(osrelease, &sep, 10); 190 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 191 return (EINVAL); 192 osrelease = sep + 1; 193 v1 = strtol(osrelease, &sep, 10); 194 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 195 return (EINVAL); 196 osrelease = sep + 1; 197 v2 = strtol(osrelease, &sep, 10); 198 if (osrelease == sep || 199 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 200 return (EINVAL); 201 202 v = LINUX_KERNVER(v0, v1, v2); 203 if (v < LINUX_KERNVER(1, 0, 0)) 204 return (EINVAL); 205 206 if (osrel != NULL) 207 *osrel = v; 208 209 return (0); 210 } 211 212 /* 213 * Find a prison with Linux info. 214 * Return the Linux info and the (locked) prison. 215 */ 216 static struct linux_prison * 217 linux_find_prison(struct prison *spr, struct prison **prp) 218 { 219 struct prison *pr; 220 struct linux_prison *lpr; 221 222 for (pr = spr;; pr = pr->pr_parent) { 223 mtx_lock(&pr->pr_mtx); 224 lpr = (pr == &prison0) 225 ? &lprison0 226 : osd_jail_get(pr, linux_osd_jail_slot); 227 if (lpr != NULL) 228 break; 229 mtx_unlock(&pr->pr_mtx); 230 } 231 *prp = pr; 232 233 return (lpr); 234 } 235 236 /* 237 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 238 * the Linux info and lock the prison. 239 */ 240 static void 241 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 242 { 243 struct prison *ppr; 244 struct linux_prison *lpr, *nlpr; 245 void **rsv; 246 247 /* If this prison already has Linux info, return that. */ 248 lpr = linux_find_prison(pr, &ppr); 249 if (ppr == pr) 250 goto done; 251 /* 252 * Allocate a new info record. Then check again, in case something 253 * changed during the allocation. 254 */ 255 mtx_unlock(&ppr->pr_mtx); 256 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 257 rsv = osd_reserve(linux_osd_jail_slot); 258 lpr = linux_find_prison(pr, &ppr); 259 if (ppr == pr) { 260 free(nlpr, M_PRISON); 261 osd_free_reserved(rsv); 262 goto done; 263 } 264 /* Inherit the initial values from the ancestor. */ 265 mtx_lock(&pr->pr_mtx); 266 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 267 bcopy(lpr, nlpr, sizeof(*lpr)); 268 lpr = nlpr; 269 mtx_unlock(&ppr->pr_mtx); 270 done: 271 if (lprp != NULL) 272 *lprp = lpr; 273 else 274 mtx_unlock(&pr->pr_mtx); 275 } 276 277 /* 278 * Jail OSD methods for Linux prison data. 279 */ 280 static int 281 linux_prison_create(void *obj, void *data) 282 { 283 struct prison *pr = obj; 284 struct vfsoptlist *opts = data; 285 int jsys; 286 287 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 288 jsys == JAIL_SYS_INHERIT) 289 return (0); 290 /* 291 * Inherit a prison's initial values from its parent 292 * (different from JAIL_SYS_INHERIT which also inherits changes). 293 */ 294 linux_alloc_prison(pr, NULL); 295 return (0); 296 } 297 298 static int 299 linux_prison_check(void *obj __unused, void *data) 300 { 301 struct vfsoptlist *opts = data; 302 char *osname, *osrelease; 303 int error, jsys, len, oss_version; 304 305 /* Check that the parameters are correct. */ 306 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 307 if (error != ENOENT) { 308 if (error != 0) 309 return (error); 310 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 311 return (EINVAL); 312 } 313 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 314 if (error != ENOENT) { 315 if (error != 0) 316 return (error); 317 if (len == 0 || osname[len - 1] != '\0') 318 return (EINVAL); 319 if (len > LINUX_MAX_UTSNAME) { 320 vfs_opterror(opts, "linux.osname too long"); 321 return (ENAMETOOLONG); 322 } 323 } 324 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 325 if (error != ENOENT) { 326 if (error != 0) 327 return (error); 328 if (len == 0 || osrelease[len - 1] != '\0') 329 return (EINVAL); 330 if (len > LINUX_MAX_UTSNAME) { 331 vfs_opterror(opts, "linux.osrelease too long"); 332 return (ENAMETOOLONG); 333 } 334 error = linux_map_osrel(osrelease, NULL); 335 if (error != 0) { 336 vfs_opterror(opts, "linux.osrelease format error"); 337 return (error); 338 } 339 } 340 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 341 sizeof(oss_version)); 342 343 if (error == ENOENT) 344 error = 0; 345 return (error); 346 } 347 348 static int 349 linux_prison_set(void *obj, void *data) 350 { 351 struct linux_prison *lpr; 352 struct prison *pr = obj; 353 struct vfsoptlist *opts = data; 354 char *osname, *osrelease; 355 int error, gotversion, jsys, len, oss_version; 356 357 /* Set the parameters, which should be correct. */ 358 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 359 if (error == ENOENT) 360 jsys = -1; 361 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 362 if (error == ENOENT) 363 osname = NULL; 364 else 365 jsys = JAIL_SYS_NEW; 366 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 367 if (error == ENOENT) 368 osrelease = NULL; 369 else 370 jsys = JAIL_SYS_NEW; 371 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 372 sizeof(oss_version)); 373 if (error == ENOENT) 374 gotversion = 0; 375 else { 376 gotversion = 1; 377 jsys = JAIL_SYS_NEW; 378 } 379 switch (jsys) { 380 case JAIL_SYS_INHERIT: 381 /* "linux=inherit": inherit the parent's Linux info. */ 382 mtx_lock(&pr->pr_mtx); 383 osd_jail_del(pr, linux_osd_jail_slot); 384 mtx_unlock(&pr->pr_mtx); 385 break; 386 case JAIL_SYS_NEW: 387 /* 388 * "linux=new" or "linux.*": 389 * the prison gets its own Linux info. 390 */ 391 linux_alloc_prison(pr, &lpr); 392 if (osrelease) { 393 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 394 strlcpy(lpr->pr_osrelease, osrelease, 395 LINUX_MAX_UTSNAME); 396 } 397 if (osname) 398 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 399 if (gotversion) 400 lpr->pr_oss_version = oss_version; 401 mtx_unlock(&pr->pr_mtx); 402 } 403 404 return (0); 405 } 406 407 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 408 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 409 "Jail Linux kernel OS name"); 410 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 411 "Jail Linux kernel OS release"); 412 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 413 "I", "Jail Linux OSS version"); 414 415 static int 416 linux_prison_get(void *obj, void *data) 417 { 418 struct linux_prison *lpr; 419 struct prison *ppr; 420 struct prison *pr = obj; 421 struct vfsoptlist *opts = data; 422 int error, i; 423 424 static int version0; 425 426 /* See if this prison is the one with the Linux info. */ 427 lpr = linux_find_prison(pr, &ppr); 428 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 429 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 430 if (error != 0 && error != ENOENT) 431 goto done; 432 if (i) { 433 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 434 if (error != 0 && error != ENOENT) 435 goto done; 436 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 437 if (error != 0 && error != ENOENT) 438 goto done; 439 error = vfs_setopt(opts, "linux.oss_version", 440 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 441 if (error != 0 && error != ENOENT) 442 goto done; 443 } else { 444 /* 445 * If this prison is inheriting its Linux info, report 446 * empty/zero parameters. 447 */ 448 error = vfs_setopts(opts, "linux.osname", ""); 449 if (error != 0 && error != ENOENT) 450 goto done; 451 error = vfs_setopts(opts, "linux.osrelease", ""); 452 if (error != 0 && error != ENOENT) 453 goto done; 454 error = vfs_setopt(opts, "linux.oss_version", &version0, 455 sizeof(lpr->pr_oss_version)); 456 if (error != 0 && error != ENOENT) 457 goto done; 458 } 459 error = 0; 460 461 done: 462 mtx_unlock(&ppr->pr_mtx); 463 464 return (error); 465 } 466 467 static void 468 linux_prison_destructor(void *data) 469 { 470 471 free(data, M_PRISON); 472 } 473 474 void 475 linux_osd_jail_register(void) 476 { 477 struct prison *pr; 478 osd_method_t methods[PR_MAXMETHOD] = { 479 [PR_METHOD_CREATE] = linux_prison_create, 480 [PR_METHOD_GET] = linux_prison_get, 481 [PR_METHOD_SET] = linux_prison_set, 482 [PR_METHOD_CHECK] = linux_prison_check 483 }; 484 485 linux_osd_jail_slot = 486 osd_jail_register(linux_prison_destructor, methods); 487 /* Copy the system Linux info to any current prisons. */ 488 sx_slock(&allprison_lock); 489 TAILQ_FOREACH(pr, &allprison, pr_list) 490 linux_alloc_prison(pr, NULL); 491 sx_sunlock(&allprison_lock); 492 } 493 494 void 495 linux_osd_jail_deregister(void) 496 { 497 498 osd_jail_deregister(linux_osd_jail_slot); 499 } 500 501 void 502 linux_get_osname(struct thread *td, char *dst) 503 { 504 struct prison *pr; 505 struct linux_prison *lpr; 506 507 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 508 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 509 mtx_unlock(&pr->pr_mtx); 510 } 511 512 static int 513 linux_set_osname(struct thread *td, char *osname) 514 { 515 struct prison *pr; 516 struct linux_prison *lpr; 517 518 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 519 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 520 mtx_unlock(&pr->pr_mtx); 521 522 return (0); 523 } 524 525 void 526 linux_get_osrelease(struct thread *td, char *dst) 527 { 528 struct prison *pr; 529 struct linux_prison *lpr; 530 531 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 532 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 533 mtx_unlock(&pr->pr_mtx); 534 } 535 536 int 537 linux_kernver(struct thread *td) 538 { 539 struct prison *pr; 540 struct linux_prison *lpr; 541 int osrel; 542 543 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 544 osrel = lpr->pr_osrel; 545 mtx_unlock(&pr->pr_mtx); 546 547 return (osrel); 548 } 549 550 static int 551 linux_set_osrelease(struct thread *td, char *osrelease) 552 { 553 struct prison *pr; 554 struct linux_prison *lpr; 555 int error; 556 557 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 558 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 559 if (error == 0) 560 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 561 mtx_unlock(&pr->pr_mtx); 562 563 return (error); 564 } 565 566 int 567 linux_get_oss_version(struct thread *td) 568 { 569 struct prison *pr; 570 struct linux_prison *lpr; 571 int version; 572 573 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 574 version = lpr->pr_oss_version; 575 mtx_unlock(&pr->pr_mtx); 576 577 return (version); 578 } 579 580 static int 581 linux_set_oss_version(struct thread *td, int oss_version) 582 { 583 struct prison *pr; 584 struct linux_prison *lpr; 585 586 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 587 lpr->pr_oss_version = oss_version; 588 mtx_unlock(&pr->pr_mtx); 589 590 return (0); 591 } 592