1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/sdt.h> 35 #include <sys/systm.h> 36 #include <sys/sysctl.h> 37 #include <sys/proc.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/jail.h> 41 #include <sys/lock.h> 42 #include <sys/sx.h> 43 44 #include <compat/linux/linux_mib.h> 45 #include <compat/linux/linux_misc.h> 46 47 struct linux_prison { 48 char pr_osname[LINUX_MAX_UTSNAME]; 49 char pr_osrelease[LINUX_MAX_UTSNAME]; 50 int pr_oss_version; 51 int pr_osrel; 52 }; 53 54 static struct linux_prison lprison0 = { 55 .pr_osname = "Linux", 56 .pr_osrelease = LINUX_VERSION_STR, 57 .pr_oss_version = 0x030600, 58 .pr_osrel = LINUX_VERSION_CODE 59 }; 60 61 static unsigned linux_osd_jail_slot; 62 63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 64 "Linux mode"); 65 66 int linux_debug = 3; 67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 68 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 69 70 int linux_default_openfiles = 1024; 71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 72 &linux_default_openfiles, 0, 73 "Default soft openfiles resource limit, or -1 for unlimited"); 74 75 int linux_default_stacksize = 8 * 1024 * 1024; 76 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN, 77 &linux_default_stacksize, 0, 78 "Default soft stack size resource limit, or -1 for unlimited"); 79 80 int linux_dummy_rlimits = 0; 81 SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN, 82 &linux_dummy_rlimits, 0, 83 "Return dummy values for unsupported Linux-specific rlimits"); 84 85 int linux_ignore_ip_recverr = 1; 86 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 87 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 88 89 int linux_preserve_vstatus = 0; 90 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 91 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 92 93 bool linux_map_sched_prio = true; 94 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 95 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 96 "(not POSIX compliant)"); 97 98 int linux_use_emul_path = 1; 99 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN, 100 &linux_use_emul_path, 0, "Use linux.compat.emul_path"); 101 102 static int linux_set_osname(struct thread *td, char *osname); 103 static int linux_set_osrelease(struct thread *td, char *osrelease); 104 static int linux_set_oss_version(struct thread *td, int oss_version); 105 106 static int 107 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 108 { 109 char osname[LINUX_MAX_UTSNAME]; 110 int error; 111 112 linux_get_osname(req->td, osname); 113 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 114 if (error != 0 || req->newptr == NULL) 115 return (error); 116 error = linux_set_osname(req->td, osname); 117 118 return (error); 119 } 120 121 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 122 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 123 0, 0, linux_sysctl_osname, "A", 124 "Linux kernel OS name"); 125 126 static int 127 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 128 { 129 char osrelease[LINUX_MAX_UTSNAME]; 130 int error; 131 132 linux_get_osrelease(req->td, osrelease); 133 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 134 if (error != 0 || req->newptr == NULL) 135 return (error); 136 error = linux_set_osrelease(req->td, osrelease); 137 138 return (error); 139 } 140 141 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 142 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 143 0, 0, linux_sysctl_osrelease, "A", 144 "Linux kernel OS release"); 145 146 static int 147 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 148 { 149 int oss_version; 150 int error; 151 152 oss_version = linux_get_oss_version(req->td); 153 error = sysctl_handle_int(oidp, &oss_version, 0, req); 154 if (error != 0 || req->newptr == NULL) 155 return (error); 156 error = linux_set_oss_version(req->td, oss_version); 157 158 return (error); 159 } 160 161 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 162 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 163 0, 0, linux_sysctl_oss_version, "I", 164 "Linux OSS version"); 165 166 /* 167 * Map the osrelease into integer 168 */ 169 static int 170 linux_map_osrel(char *osrelease, int *osrel) 171 { 172 char *sep, *eosrelease; 173 int len, v0, v1, v2, v; 174 175 len = strlen(osrelease); 176 eosrelease = osrelease + len; 177 v0 = strtol(osrelease, &sep, 10); 178 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 179 return (EINVAL); 180 osrelease = sep + 1; 181 v1 = strtol(osrelease, &sep, 10); 182 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 183 return (EINVAL); 184 osrelease = sep + 1; 185 v2 = strtol(osrelease, &sep, 10); 186 if (osrelease == sep || 187 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 188 return (EINVAL); 189 190 v = LINUX_KERNVER(v0, v1, v2); 191 if (v < LINUX_KERNVER(1, 0, 0)) 192 return (EINVAL); 193 194 if (osrel != NULL) 195 *osrel = v; 196 197 return (0); 198 } 199 200 /* 201 * Find a prison with Linux info. 202 * Return the Linux info and the (locked) prison. 203 */ 204 static struct linux_prison * 205 linux_find_prison(struct prison *spr, struct prison **prp) 206 { 207 struct prison *pr; 208 struct linux_prison *lpr; 209 210 for (pr = spr;; pr = pr->pr_parent) { 211 mtx_lock(&pr->pr_mtx); 212 lpr = (pr == &prison0) 213 ? &lprison0 214 : osd_jail_get(pr, linux_osd_jail_slot); 215 if (lpr != NULL) 216 break; 217 mtx_unlock(&pr->pr_mtx); 218 } 219 *prp = pr; 220 221 return (lpr); 222 } 223 224 /* 225 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 226 * the Linux info and lock the prison. 227 */ 228 static void 229 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 230 { 231 struct prison *ppr; 232 struct linux_prison *lpr, *nlpr; 233 void **rsv; 234 235 /* If this prison already has Linux info, return that. */ 236 lpr = linux_find_prison(pr, &ppr); 237 if (ppr == pr) 238 goto done; 239 /* 240 * Allocate a new info record. Then check again, in case something 241 * changed during the allocation. 242 */ 243 mtx_unlock(&ppr->pr_mtx); 244 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 245 rsv = osd_reserve(linux_osd_jail_slot); 246 lpr = linux_find_prison(pr, &ppr); 247 if (ppr == pr) { 248 free(nlpr, M_PRISON); 249 osd_free_reserved(rsv); 250 goto done; 251 } 252 /* Inherit the initial values from the ancestor. */ 253 mtx_lock(&pr->pr_mtx); 254 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 255 bcopy(lpr, nlpr, sizeof(*lpr)); 256 lpr = nlpr; 257 mtx_unlock(&ppr->pr_mtx); 258 done: 259 if (lprp != NULL) 260 *lprp = lpr; 261 else 262 mtx_unlock(&pr->pr_mtx); 263 } 264 265 /* 266 * Jail OSD methods for Linux prison data. 267 */ 268 static int 269 linux_prison_create(void *obj, void *data) 270 { 271 struct prison *pr = obj; 272 struct vfsoptlist *opts = data; 273 int jsys; 274 275 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 276 jsys == JAIL_SYS_INHERIT) 277 return (0); 278 /* 279 * Inherit a prison's initial values from its parent 280 * (different from JAIL_SYS_INHERIT which also inherits changes). 281 */ 282 linux_alloc_prison(pr, NULL); 283 return (0); 284 } 285 286 static int 287 linux_prison_check(void *obj __unused, void *data) 288 { 289 struct vfsoptlist *opts = data; 290 char *osname, *osrelease; 291 int error, jsys, len, oss_version; 292 293 /* Check that the parameters are correct. */ 294 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 295 if (error != ENOENT) { 296 if (error != 0) 297 return (error); 298 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 299 return (EINVAL); 300 } 301 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 302 if (error != ENOENT) { 303 if (error != 0) 304 return (error); 305 if (len == 0 || osname[len - 1] != '\0') 306 return (EINVAL); 307 if (len > LINUX_MAX_UTSNAME) { 308 vfs_opterror(opts, "linux.osname too long"); 309 return (ENAMETOOLONG); 310 } 311 } 312 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 313 if (error != ENOENT) { 314 if (error != 0) 315 return (error); 316 if (len == 0 || osrelease[len - 1] != '\0') 317 return (EINVAL); 318 if (len > LINUX_MAX_UTSNAME) { 319 vfs_opterror(opts, "linux.osrelease too long"); 320 return (ENAMETOOLONG); 321 } 322 error = linux_map_osrel(osrelease, NULL); 323 if (error != 0) { 324 vfs_opterror(opts, "linux.osrelease format error"); 325 return (error); 326 } 327 } 328 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 329 sizeof(oss_version)); 330 331 if (error == ENOENT) 332 error = 0; 333 return (error); 334 } 335 336 static int 337 linux_prison_set(void *obj, void *data) 338 { 339 struct linux_prison *lpr; 340 struct prison *pr = obj; 341 struct vfsoptlist *opts = data; 342 char *osname, *osrelease; 343 int error, gotversion, jsys, len, oss_version; 344 345 /* Set the parameters, which should be correct. */ 346 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 347 if (error == ENOENT) 348 jsys = -1; 349 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 350 if (error == ENOENT) 351 osname = NULL; 352 else 353 jsys = JAIL_SYS_NEW; 354 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 355 if (error == ENOENT) 356 osrelease = NULL; 357 else 358 jsys = JAIL_SYS_NEW; 359 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 360 sizeof(oss_version)); 361 if (error == ENOENT) 362 gotversion = 0; 363 else { 364 gotversion = 1; 365 jsys = JAIL_SYS_NEW; 366 } 367 switch (jsys) { 368 case JAIL_SYS_INHERIT: 369 /* "linux=inherit": inherit the parent's Linux info. */ 370 mtx_lock(&pr->pr_mtx); 371 osd_jail_del(pr, linux_osd_jail_slot); 372 mtx_unlock(&pr->pr_mtx); 373 break; 374 case JAIL_SYS_NEW: 375 /* 376 * "linux=new" or "linux.*": 377 * the prison gets its own Linux info. 378 */ 379 linux_alloc_prison(pr, &lpr); 380 if (osrelease) { 381 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 382 strlcpy(lpr->pr_osrelease, osrelease, 383 LINUX_MAX_UTSNAME); 384 } 385 if (osname) 386 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 387 if (gotversion) 388 lpr->pr_oss_version = oss_version; 389 mtx_unlock(&pr->pr_mtx); 390 } 391 392 return (0); 393 } 394 395 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 396 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 397 "Jail Linux kernel OS name"); 398 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 399 "Jail Linux kernel OS release"); 400 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 401 "I", "Jail Linux OSS version"); 402 403 static int 404 linux_prison_get(void *obj, void *data) 405 { 406 struct linux_prison *lpr; 407 struct prison *ppr; 408 struct prison *pr = obj; 409 struct vfsoptlist *opts = data; 410 int error, i; 411 412 static int version0; 413 414 /* See if this prison is the one with the Linux info. */ 415 lpr = linux_find_prison(pr, &ppr); 416 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 417 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 418 if (error != 0 && error != ENOENT) 419 goto done; 420 if (i) { 421 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 422 if (error != 0 && error != ENOENT) 423 goto done; 424 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 425 if (error != 0 && error != ENOENT) 426 goto done; 427 error = vfs_setopt(opts, "linux.oss_version", 428 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 429 if (error != 0 && error != ENOENT) 430 goto done; 431 } else { 432 /* 433 * If this prison is inheriting its Linux info, report 434 * empty/zero parameters. 435 */ 436 error = vfs_setopts(opts, "linux.osname", ""); 437 if (error != 0 && error != ENOENT) 438 goto done; 439 error = vfs_setopts(opts, "linux.osrelease", ""); 440 if (error != 0 && error != ENOENT) 441 goto done; 442 error = vfs_setopt(opts, "linux.oss_version", &version0, 443 sizeof(lpr->pr_oss_version)); 444 if (error != 0 && error != ENOENT) 445 goto done; 446 } 447 error = 0; 448 449 done: 450 mtx_unlock(&ppr->pr_mtx); 451 452 return (error); 453 } 454 455 static void 456 linux_prison_destructor(void *data) 457 { 458 459 free(data, M_PRISON); 460 } 461 462 void 463 linux_osd_jail_register(void) 464 { 465 struct prison *pr; 466 osd_method_t methods[PR_MAXMETHOD] = { 467 [PR_METHOD_CREATE] = linux_prison_create, 468 [PR_METHOD_GET] = linux_prison_get, 469 [PR_METHOD_SET] = linux_prison_set, 470 [PR_METHOD_CHECK] = linux_prison_check 471 }; 472 473 linux_osd_jail_slot = 474 osd_jail_register(linux_prison_destructor, methods); 475 /* Copy the system Linux info to any current prisons. */ 476 sx_slock(&allprison_lock); 477 TAILQ_FOREACH(pr, &allprison, pr_list) 478 linux_alloc_prison(pr, NULL); 479 sx_sunlock(&allprison_lock); 480 } 481 482 void 483 linux_osd_jail_deregister(void) 484 { 485 486 osd_jail_deregister(linux_osd_jail_slot); 487 } 488 489 void 490 linux_get_osname(struct thread *td, char *dst) 491 { 492 struct prison *pr; 493 struct linux_prison *lpr; 494 495 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 496 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 497 mtx_unlock(&pr->pr_mtx); 498 } 499 500 static int 501 linux_set_osname(struct thread *td, char *osname) 502 { 503 struct prison *pr; 504 struct linux_prison *lpr; 505 506 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 507 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 508 mtx_unlock(&pr->pr_mtx); 509 510 return (0); 511 } 512 513 void 514 linux_get_osrelease(struct thread *td, char *dst) 515 { 516 struct prison *pr; 517 struct linux_prison *lpr; 518 519 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 520 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 521 mtx_unlock(&pr->pr_mtx); 522 } 523 524 int 525 linux_kernver(struct thread *td) 526 { 527 struct prison *pr; 528 struct linux_prison *lpr; 529 int osrel; 530 531 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 532 osrel = lpr->pr_osrel; 533 mtx_unlock(&pr->pr_mtx); 534 535 return (osrel); 536 } 537 538 static int 539 linux_set_osrelease(struct thread *td, char *osrelease) 540 { 541 struct prison *pr; 542 struct linux_prison *lpr; 543 int error; 544 545 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 546 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 547 if (error == 0) 548 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 549 mtx_unlock(&pr->pr_mtx); 550 551 return (error); 552 } 553 554 int 555 linux_get_oss_version(struct thread *td) 556 { 557 struct prison *pr; 558 struct linux_prison *lpr; 559 int version; 560 561 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 562 version = lpr->pr_oss_version; 563 mtx_unlock(&pr->pr_mtx); 564 565 return (version); 566 } 567 568 static int 569 linux_set_oss_version(struct thread *td, int oss_version) 570 { 571 struct prison *pr; 572 struct linux_prison *lpr; 573 574 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 575 lpr->pr_oss_version = oss_version; 576 mtx_unlock(&pr->pr_mtx); 577 578 return (0); 579 } 580