1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/lock.h> 31 #include <sys/malloc.h> 32 #include <sys/mount.h> 33 #include <sys/jail.h> 34 #include <sys/proc.h> 35 #include <sys/sx.h> 36 37 #include <compat/linux/linux_mib.h> 38 #include <compat/linux/linux_misc.h> 39 40 struct linux_prison { 41 char pr_osname[LINUX_MAX_UTSNAME]; 42 char pr_osrelease[LINUX_MAX_UTSNAME]; 43 int pr_oss_version; 44 int pr_osrel; 45 }; 46 47 static struct linux_prison lprison0 = { 48 .pr_osname = "Linux", 49 .pr_osrelease = LINUX_VERSION_STR, 50 .pr_oss_version = 0x030600, 51 .pr_osrel = LINUX_VERSION_CODE 52 }; 53 54 static unsigned linux_osd_jail_slot; 55 56 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 57 "Linux mode"); 58 59 int linux_debug = 3; 60 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 61 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 62 63 int linux_default_openfiles = 1024; 64 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 65 &linux_default_openfiles, 0, 66 "Default soft openfiles resource limit, or -1 for unlimited"); 67 68 int linux_default_stacksize = 8 * 1024 * 1024; 69 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN, 70 &linux_default_stacksize, 0, 71 "Default soft stack size resource limit, or -1 for unlimited"); 72 73 int linux_dummy_rlimits = 0; 74 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN, 75 &linux_dummy_rlimits, 0, 76 "Return dummy values for unsupported Linux-specific rlimits"); 77 78 int linux_ignore_ip_recverr = 1; 79 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 80 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 81 82 int linux_preserve_vstatus = 1; 83 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 84 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 85 86 bool linux_map_sched_prio = true; 87 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 88 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 89 "(not POSIX compliant)"); 90 91 static bool linux_setid_allowed = true; 92 SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN, 93 &linux_setid_allowed, 0, 94 "Allow setuid/setgid on execve of Linux binary"); 95 96 int 97 linux_setid_allowed_query(struct thread *td __unused, 98 struct image_params *imgp __unused) 99 { 100 return (linux_setid_allowed); 101 } 102 103 static int linux_set_osname(struct thread *td, char *osname); 104 static int linux_set_osrelease(struct thread *td, char *osrelease); 105 static int linux_set_oss_version(struct thread *td, int oss_version); 106 107 static int 108 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 109 { 110 char osname[LINUX_MAX_UTSNAME]; 111 int error; 112 113 linux_get_osname(req->td, osname); 114 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 115 if (error != 0 || req->newptr == NULL) 116 return (error); 117 error = linux_set_osname(req->td, osname); 118 119 return (error); 120 } 121 122 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 123 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 124 0, 0, linux_sysctl_osname, "A", 125 "Linux kernel OS name"); 126 127 static int 128 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 129 { 130 char osrelease[LINUX_MAX_UTSNAME]; 131 int error; 132 133 linux_get_osrelease(req->td, osrelease); 134 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 135 if (error != 0 || req->newptr == NULL) 136 return (error); 137 error = linux_set_osrelease(req->td, osrelease); 138 139 return (error); 140 } 141 142 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 143 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 144 0, 0, linux_sysctl_osrelease, "A", 145 "Linux kernel OS release"); 146 147 static int 148 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 149 { 150 int oss_version; 151 int error; 152 153 oss_version = linux_get_oss_version(req->td); 154 error = sysctl_handle_int(oidp, &oss_version, 0, req); 155 if (error != 0 || req->newptr == NULL) 156 return (error); 157 error = linux_set_oss_version(req->td, oss_version); 158 159 return (error); 160 } 161 162 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 163 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 164 0, 0, linux_sysctl_oss_version, "I", 165 "Linux OSS version"); 166 167 /* 168 * Map the osrelease into integer 169 */ 170 static int 171 linux_map_osrel(char *osrelease, int *osrel) 172 { 173 char *sep, *eosrelease; 174 int len, v0, v1, v2, v; 175 176 len = strlen(osrelease); 177 eosrelease = osrelease + len; 178 v0 = strtol(osrelease, &sep, 10); 179 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 180 return (EINVAL); 181 osrelease = sep + 1; 182 v1 = strtol(osrelease, &sep, 10); 183 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 184 return (EINVAL); 185 osrelease = sep + 1; 186 v2 = strtol(osrelease, &sep, 10); 187 if (osrelease == sep || 188 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 189 return (EINVAL); 190 191 v = LINUX_KERNVER(v0, v1, v2); 192 if (v < LINUX_KERNVER(1, 0, 0)) 193 return (EINVAL); 194 195 if (osrel != NULL) 196 *osrel = v; 197 198 return (0); 199 } 200 201 /* 202 * Find a prison with Linux info. 203 * Return the Linux info and the (locked) prison. 204 */ 205 static struct linux_prison * 206 linux_find_prison(struct prison *spr, struct prison **prp) 207 { 208 struct prison *pr; 209 struct linux_prison *lpr; 210 211 for (pr = spr;; pr = pr->pr_parent) { 212 mtx_lock(&pr->pr_mtx); 213 lpr = (pr == &prison0) 214 ? &lprison0 215 : osd_jail_get(pr, linux_osd_jail_slot); 216 if (lpr != NULL) 217 break; 218 mtx_unlock(&pr->pr_mtx); 219 } 220 *prp = pr; 221 222 return (lpr); 223 } 224 225 /* 226 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 227 * the Linux info and lock the prison. 228 */ 229 static void 230 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 231 { 232 struct prison *ppr; 233 struct linux_prison *lpr, *nlpr; 234 void **rsv; 235 236 /* If this prison already has Linux info, return that. */ 237 lpr = linux_find_prison(pr, &ppr); 238 if (ppr == pr) 239 goto done; 240 /* 241 * Allocate a new info record. Then check again, in case something 242 * changed during the allocation. 243 */ 244 mtx_unlock(&ppr->pr_mtx); 245 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 246 rsv = osd_reserve(linux_osd_jail_slot); 247 lpr = linux_find_prison(pr, &ppr); 248 if (ppr == pr) { 249 free(nlpr, M_PRISON); 250 osd_free_reserved(rsv); 251 goto done; 252 } 253 /* Inherit the initial values from the ancestor. */ 254 mtx_lock(&pr->pr_mtx); 255 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 256 bcopy(lpr, nlpr, sizeof(*lpr)); 257 lpr = nlpr; 258 mtx_unlock(&ppr->pr_mtx); 259 done: 260 if (lprp != NULL) 261 *lprp = lpr; 262 else 263 mtx_unlock(&pr->pr_mtx); 264 } 265 266 /* 267 * Jail OSD methods for Linux prison data. 268 */ 269 static int 270 linux_prison_create(void *obj, void *data) 271 { 272 struct prison *pr = obj; 273 struct vfsoptlist *opts = data; 274 int jsys; 275 276 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 277 jsys == JAIL_SYS_INHERIT) 278 return (0); 279 /* 280 * Inherit a prison's initial values from its parent 281 * (different from JAIL_SYS_INHERIT which also inherits changes). 282 */ 283 linux_alloc_prison(pr, NULL); 284 return (0); 285 } 286 287 static int 288 linux_prison_check(void *obj __unused, void *data) 289 { 290 struct vfsoptlist *opts = data; 291 char *osname, *osrelease; 292 int error, jsys, len, oss_version; 293 294 /* Check that the parameters are correct. */ 295 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 296 if (error != ENOENT) { 297 if (error != 0) 298 return (error); 299 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 300 return (EINVAL); 301 } 302 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 303 if (error != ENOENT) { 304 if (error != 0) 305 return (error); 306 if (len == 0 || osname[len - 1] != '\0') 307 return (EINVAL); 308 if (len > LINUX_MAX_UTSNAME) { 309 vfs_opterror(opts, "linux.osname too long"); 310 return (ENAMETOOLONG); 311 } 312 } 313 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 314 if (error != ENOENT) { 315 if (error != 0) 316 return (error); 317 if (len == 0 || osrelease[len - 1] != '\0') 318 return (EINVAL); 319 if (len > LINUX_MAX_UTSNAME) { 320 vfs_opterror(opts, "linux.osrelease too long"); 321 return (ENAMETOOLONG); 322 } 323 error = linux_map_osrel(osrelease, NULL); 324 if (error != 0) { 325 vfs_opterror(opts, "linux.osrelease format error"); 326 return (error); 327 } 328 } 329 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 330 sizeof(oss_version)); 331 332 if (error == ENOENT) 333 error = 0; 334 return (error); 335 } 336 337 static int 338 linux_prison_set(void *obj, void *data) 339 { 340 struct linux_prison *lpr; 341 struct prison *pr = obj; 342 struct vfsoptlist *opts = data; 343 char *osname, *osrelease; 344 int error, gotversion, jsys, len, oss_version; 345 346 /* Set the parameters, which should be correct. */ 347 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 348 if (error == ENOENT) 349 jsys = -1; 350 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 351 if (error == ENOENT) 352 osname = NULL; 353 else 354 jsys = JAIL_SYS_NEW; 355 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 356 if (error == ENOENT) 357 osrelease = NULL; 358 else 359 jsys = JAIL_SYS_NEW; 360 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 361 sizeof(oss_version)); 362 if (error == ENOENT) 363 gotversion = 0; 364 else { 365 gotversion = 1; 366 jsys = JAIL_SYS_NEW; 367 } 368 switch (jsys) { 369 case JAIL_SYS_INHERIT: 370 /* "linux=inherit": inherit the parent's Linux info. */ 371 mtx_lock(&pr->pr_mtx); 372 osd_jail_del(pr, linux_osd_jail_slot); 373 mtx_unlock(&pr->pr_mtx); 374 break; 375 case JAIL_SYS_NEW: 376 /* 377 * "linux=new" or "linux.*": 378 * the prison gets its own Linux info. 379 */ 380 linux_alloc_prison(pr, &lpr); 381 if (osrelease) { 382 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 383 strlcpy(lpr->pr_osrelease, osrelease, 384 LINUX_MAX_UTSNAME); 385 } 386 if (osname) 387 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 388 if (gotversion) 389 lpr->pr_oss_version = oss_version; 390 mtx_unlock(&pr->pr_mtx); 391 } 392 393 return (0); 394 } 395 396 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 397 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 398 "Jail Linux kernel OS name"); 399 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 400 "Jail Linux kernel OS release"); 401 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 402 "I", "Jail Linux OSS version"); 403 404 static int 405 linux_prison_get(void *obj, void *data) 406 { 407 struct linux_prison *lpr; 408 struct prison *ppr; 409 struct prison *pr = obj; 410 struct vfsoptlist *opts = data; 411 int error, i; 412 413 static int version0; 414 415 /* See if this prison is the one with the Linux info. */ 416 lpr = linux_find_prison(pr, &ppr); 417 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 418 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 419 if (error != 0 && error != ENOENT) 420 goto done; 421 if (i) { 422 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 423 if (error != 0 && error != ENOENT) 424 goto done; 425 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 426 if (error != 0 && error != ENOENT) 427 goto done; 428 error = vfs_setopt(opts, "linux.oss_version", 429 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 430 if (error != 0 && error != ENOENT) 431 goto done; 432 } else { 433 /* 434 * If this prison is inheriting its Linux info, report 435 * empty/zero parameters. 436 */ 437 error = vfs_setopts(opts, "linux.osname", ""); 438 if (error != 0 && error != ENOENT) 439 goto done; 440 error = vfs_setopts(opts, "linux.osrelease", ""); 441 if (error != 0 && error != ENOENT) 442 goto done; 443 error = vfs_setopt(opts, "linux.oss_version", &version0, 444 sizeof(lpr->pr_oss_version)); 445 if (error != 0 && error != ENOENT) 446 goto done; 447 } 448 error = 0; 449 450 done: 451 mtx_unlock(&ppr->pr_mtx); 452 453 return (error); 454 } 455 456 static void 457 linux_prison_destructor(void *data) 458 { 459 460 free(data, M_PRISON); 461 } 462 463 void 464 linux_osd_jail_register(void) 465 { 466 struct prison *pr; 467 osd_method_t methods[PR_MAXMETHOD] = { 468 [PR_METHOD_CREATE] = linux_prison_create, 469 [PR_METHOD_GET] = linux_prison_get, 470 [PR_METHOD_SET] = linux_prison_set, 471 [PR_METHOD_CHECK] = linux_prison_check 472 }; 473 474 linux_osd_jail_slot = 475 osd_jail_register(linux_prison_destructor, methods); 476 /* Copy the system Linux info to any current prisons. */ 477 sx_slock(&allprison_lock); 478 TAILQ_FOREACH(pr, &allprison, pr_list) 479 linux_alloc_prison(pr, NULL); 480 sx_sunlock(&allprison_lock); 481 } 482 483 void 484 linux_osd_jail_deregister(void) 485 { 486 487 osd_jail_deregister(linux_osd_jail_slot); 488 } 489 490 void 491 linux_get_osname(struct thread *td, char *dst) 492 { 493 struct prison *pr; 494 struct linux_prison *lpr; 495 496 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 497 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 498 mtx_unlock(&pr->pr_mtx); 499 } 500 501 static int 502 linux_set_osname(struct thread *td, char *osname) 503 { 504 struct prison *pr; 505 struct linux_prison *lpr; 506 507 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 508 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 509 mtx_unlock(&pr->pr_mtx); 510 511 return (0); 512 } 513 514 void 515 linux_get_osrelease(struct thread *td, char *dst) 516 { 517 struct prison *pr; 518 struct linux_prison *lpr; 519 520 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 521 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 522 mtx_unlock(&pr->pr_mtx); 523 } 524 525 int 526 linux_kernver(struct thread *td) 527 { 528 struct prison *pr; 529 struct linux_prison *lpr; 530 int osrel; 531 532 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 533 osrel = lpr->pr_osrel; 534 mtx_unlock(&pr->pr_mtx); 535 536 return (osrel); 537 } 538 539 static int 540 linux_set_osrelease(struct thread *td, char *osrelease) 541 { 542 struct prison *pr; 543 struct linux_prison *lpr; 544 int error; 545 546 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 547 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 548 if (error == 0) 549 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 550 mtx_unlock(&pr->pr_mtx); 551 552 return (error); 553 } 554 555 int 556 linux_get_oss_version(struct thread *td) 557 { 558 struct prison *pr; 559 struct linux_prison *lpr; 560 int version; 561 562 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 563 version = lpr->pr_oss_version; 564 mtx_unlock(&pr->pr_mtx); 565 566 return (version); 567 } 568 569 static int 570 linux_set_oss_version(struct thread *td, int oss_version) 571 { 572 struct prison *pr; 573 struct linux_prison *lpr; 574 575 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 576 lpr->pr_oss_version = oss_version; 577 mtx_unlock(&pr->pr_mtx); 578 579 return (0); 580 } 581