1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/sdt.h> 35 #include <sys/systm.h> 36 #include <sys/sysctl.h> 37 #include <sys/proc.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/jail.h> 41 #include <sys/lock.h> 42 #include <sys/sx.h> 43 44 #include <compat/linux/linux_mib.h> 45 #include <compat/linux/linux_misc.h> 46 47 struct linux_prison { 48 char pr_osname[LINUX_MAX_UTSNAME]; 49 char pr_osrelease[LINUX_MAX_UTSNAME]; 50 int pr_oss_version; 51 int pr_osrel; 52 }; 53 54 static struct linux_prison lprison0 = { 55 .pr_osname = "Linux", 56 .pr_osrelease = LINUX_VERSION_STR, 57 .pr_oss_version = 0x030600, 58 .pr_osrel = LINUX_VERSION_CODE 59 }; 60 61 static unsigned linux_osd_jail_slot; 62 63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 64 "Linux mode"); 65 66 int linux_debug = 1; 67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 68 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 69 70 int linux_default_openfiles = 1024; 71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 72 &linux_default_openfiles, 0, 73 "Default soft openfiles resource limit, or -1 for unlimited"); 74 75 int linux_default_stacksize = 8 * 1024 * 1024; 76 SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN, 77 &linux_default_stacksize, 0, 78 "Default soft stack size resource limit, or -1 for unlimited"); 79 80 int linux_ignore_ip_recverr = 1; 81 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 82 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 83 84 int linux_preserve_vstatus = 0; 85 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 86 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 87 88 bool linux_map_sched_prio = true; 89 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 90 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 91 "(not POSIX compliant)"); 92 93 int linux_use_emul_path = 1; 94 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN, 95 &linux_use_emul_path, 0, "Use linux.compat.emul_path"); 96 97 static int linux_set_osname(struct thread *td, char *osname); 98 static int linux_set_osrelease(struct thread *td, char *osrelease); 99 static int linux_set_oss_version(struct thread *td, int oss_version); 100 101 static int 102 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 103 { 104 char osname[LINUX_MAX_UTSNAME]; 105 int error; 106 107 linux_get_osname(req->td, osname); 108 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 109 if (error != 0 || req->newptr == NULL) 110 return (error); 111 error = linux_set_osname(req->td, osname); 112 113 return (error); 114 } 115 116 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 117 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 118 0, 0, linux_sysctl_osname, "A", 119 "Linux kernel OS name"); 120 121 static int 122 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 123 { 124 char osrelease[LINUX_MAX_UTSNAME]; 125 int error; 126 127 linux_get_osrelease(req->td, osrelease); 128 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 129 if (error != 0 || req->newptr == NULL) 130 return (error); 131 error = linux_set_osrelease(req->td, osrelease); 132 133 return (error); 134 } 135 136 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 137 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 138 0, 0, linux_sysctl_osrelease, "A", 139 "Linux kernel OS release"); 140 141 static int 142 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 143 { 144 int oss_version; 145 int error; 146 147 oss_version = linux_get_oss_version(req->td); 148 error = sysctl_handle_int(oidp, &oss_version, 0, req); 149 if (error != 0 || req->newptr == NULL) 150 return (error); 151 error = linux_set_oss_version(req->td, oss_version); 152 153 return (error); 154 } 155 156 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 157 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 158 0, 0, linux_sysctl_oss_version, "I", 159 "Linux OSS version"); 160 161 /* 162 * Map the osrelease into integer 163 */ 164 static int 165 linux_map_osrel(char *osrelease, int *osrel) 166 { 167 char *sep, *eosrelease; 168 int len, v0, v1, v2, v; 169 170 len = strlen(osrelease); 171 eosrelease = osrelease + len; 172 v0 = strtol(osrelease, &sep, 10); 173 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 174 return (EINVAL); 175 osrelease = sep + 1; 176 v1 = strtol(osrelease, &sep, 10); 177 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 178 return (EINVAL); 179 osrelease = sep + 1; 180 v2 = strtol(osrelease, &sep, 10); 181 if (osrelease == sep || 182 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 183 return (EINVAL); 184 185 v = LINUX_KERNVER(v0, v1, v2); 186 if (v < LINUX_KERNVER(1, 0, 0)) 187 return (EINVAL); 188 189 if (osrel != NULL) 190 *osrel = v; 191 192 return (0); 193 } 194 195 /* 196 * Find a prison with Linux info. 197 * Return the Linux info and the (locked) prison. 198 */ 199 static struct linux_prison * 200 linux_find_prison(struct prison *spr, struct prison **prp) 201 { 202 struct prison *pr; 203 struct linux_prison *lpr; 204 205 for (pr = spr;; pr = pr->pr_parent) { 206 mtx_lock(&pr->pr_mtx); 207 lpr = (pr == &prison0) 208 ? &lprison0 209 : osd_jail_get(pr, linux_osd_jail_slot); 210 if (lpr != NULL) 211 break; 212 mtx_unlock(&pr->pr_mtx); 213 } 214 *prp = pr; 215 216 return (lpr); 217 } 218 219 /* 220 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 221 * the Linux info and lock the prison. 222 */ 223 static void 224 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 225 { 226 struct prison *ppr; 227 struct linux_prison *lpr, *nlpr; 228 void **rsv; 229 230 /* If this prison already has Linux info, return that. */ 231 lpr = linux_find_prison(pr, &ppr); 232 if (ppr == pr) 233 goto done; 234 /* 235 * Allocate a new info record. Then check again, in case something 236 * changed during the allocation. 237 */ 238 mtx_unlock(&ppr->pr_mtx); 239 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 240 rsv = osd_reserve(linux_osd_jail_slot); 241 lpr = linux_find_prison(pr, &ppr); 242 if (ppr == pr) { 243 free(nlpr, M_PRISON); 244 osd_free_reserved(rsv); 245 goto done; 246 } 247 /* Inherit the initial values from the ancestor. */ 248 mtx_lock(&pr->pr_mtx); 249 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 250 bcopy(lpr, nlpr, sizeof(*lpr)); 251 lpr = nlpr; 252 mtx_unlock(&ppr->pr_mtx); 253 done: 254 if (lprp != NULL) 255 *lprp = lpr; 256 else 257 mtx_unlock(&pr->pr_mtx); 258 } 259 260 /* 261 * Jail OSD methods for Linux prison data. 262 */ 263 static int 264 linux_prison_create(void *obj, void *data) 265 { 266 struct prison *pr = obj; 267 struct vfsoptlist *opts = data; 268 int jsys; 269 270 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 271 jsys == JAIL_SYS_INHERIT) 272 return (0); 273 /* 274 * Inherit a prison's initial values from its parent 275 * (different from JAIL_SYS_INHERIT which also inherits changes). 276 */ 277 linux_alloc_prison(pr, NULL); 278 return (0); 279 } 280 281 static int 282 linux_prison_check(void *obj __unused, void *data) 283 { 284 struct vfsoptlist *opts = data; 285 char *osname, *osrelease; 286 int error, jsys, len, oss_version; 287 288 /* Check that the parameters are correct. */ 289 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 290 if (error != ENOENT) { 291 if (error != 0) 292 return (error); 293 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 294 return (EINVAL); 295 } 296 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 297 if (error != ENOENT) { 298 if (error != 0) 299 return (error); 300 if (len == 0 || osname[len - 1] != '\0') 301 return (EINVAL); 302 if (len > LINUX_MAX_UTSNAME) { 303 vfs_opterror(opts, "linux.osname too long"); 304 return (ENAMETOOLONG); 305 } 306 } 307 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 308 if (error != ENOENT) { 309 if (error != 0) 310 return (error); 311 if (len == 0 || osrelease[len - 1] != '\0') 312 return (EINVAL); 313 if (len > LINUX_MAX_UTSNAME) { 314 vfs_opterror(opts, "linux.osrelease too long"); 315 return (ENAMETOOLONG); 316 } 317 error = linux_map_osrel(osrelease, NULL); 318 if (error != 0) { 319 vfs_opterror(opts, "linux.osrelease format error"); 320 return (error); 321 } 322 } 323 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 324 sizeof(oss_version)); 325 326 if (error == ENOENT) 327 error = 0; 328 return (error); 329 } 330 331 static int 332 linux_prison_set(void *obj, void *data) 333 { 334 struct linux_prison *lpr; 335 struct prison *pr = obj; 336 struct vfsoptlist *opts = data; 337 char *osname, *osrelease; 338 int error, gotversion, jsys, len, oss_version; 339 340 /* Set the parameters, which should be correct. */ 341 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 342 if (error == ENOENT) 343 jsys = -1; 344 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 345 if (error == ENOENT) 346 osname = NULL; 347 else 348 jsys = JAIL_SYS_NEW; 349 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 350 if (error == ENOENT) 351 osrelease = NULL; 352 else 353 jsys = JAIL_SYS_NEW; 354 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 355 sizeof(oss_version)); 356 if (error == ENOENT) 357 gotversion = 0; 358 else { 359 gotversion = 1; 360 jsys = JAIL_SYS_NEW; 361 } 362 switch (jsys) { 363 case JAIL_SYS_INHERIT: 364 /* "linux=inherit": inherit the parent's Linux info. */ 365 mtx_lock(&pr->pr_mtx); 366 osd_jail_del(pr, linux_osd_jail_slot); 367 mtx_unlock(&pr->pr_mtx); 368 break; 369 case JAIL_SYS_NEW: 370 /* 371 * "linux=new" or "linux.*": 372 * the prison gets its own Linux info. 373 */ 374 linux_alloc_prison(pr, &lpr); 375 if (osrelease) { 376 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 377 strlcpy(lpr->pr_osrelease, osrelease, 378 LINUX_MAX_UTSNAME); 379 } 380 if (osname) 381 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 382 if (gotversion) 383 lpr->pr_oss_version = oss_version; 384 mtx_unlock(&pr->pr_mtx); 385 } 386 387 return (0); 388 } 389 390 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 391 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 392 "Jail Linux kernel OS name"); 393 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 394 "Jail Linux kernel OS release"); 395 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 396 "I", "Jail Linux OSS version"); 397 398 static int 399 linux_prison_get(void *obj, void *data) 400 { 401 struct linux_prison *lpr; 402 struct prison *ppr; 403 struct prison *pr = obj; 404 struct vfsoptlist *opts = data; 405 int error, i; 406 407 static int version0; 408 409 /* See if this prison is the one with the Linux info. */ 410 lpr = linux_find_prison(pr, &ppr); 411 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 412 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 413 if (error != 0 && error != ENOENT) 414 goto done; 415 if (i) { 416 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 417 if (error != 0 && error != ENOENT) 418 goto done; 419 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 420 if (error != 0 && error != ENOENT) 421 goto done; 422 error = vfs_setopt(opts, "linux.oss_version", 423 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 424 if (error != 0 && error != ENOENT) 425 goto done; 426 } else { 427 /* 428 * If this prison is inheriting its Linux info, report 429 * empty/zero parameters. 430 */ 431 error = vfs_setopts(opts, "linux.osname", ""); 432 if (error != 0 && error != ENOENT) 433 goto done; 434 error = vfs_setopts(opts, "linux.osrelease", ""); 435 if (error != 0 && error != ENOENT) 436 goto done; 437 error = vfs_setopt(opts, "linux.oss_version", &version0, 438 sizeof(lpr->pr_oss_version)); 439 if (error != 0 && error != ENOENT) 440 goto done; 441 } 442 error = 0; 443 444 done: 445 mtx_unlock(&ppr->pr_mtx); 446 447 return (error); 448 } 449 450 static void 451 linux_prison_destructor(void *data) 452 { 453 454 free(data, M_PRISON); 455 } 456 457 void 458 linux_osd_jail_register(void) 459 { 460 struct prison *pr; 461 osd_method_t methods[PR_MAXMETHOD] = { 462 [PR_METHOD_CREATE] = linux_prison_create, 463 [PR_METHOD_GET] = linux_prison_get, 464 [PR_METHOD_SET] = linux_prison_set, 465 [PR_METHOD_CHECK] = linux_prison_check 466 }; 467 468 linux_osd_jail_slot = 469 osd_jail_register(linux_prison_destructor, methods); 470 /* Copy the system Linux info to any current prisons. */ 471 sx_slock(&allprison_lock); 472 TAILQ_FOREACH(pr, &allprison, pr_list) 473 linux_alloc_prison(pr, NULL); 474 sx_sunlock(&allprison_lock); 475 } 476 477 void 478 linux_osd_jail_deregister(void) 479 { 480 481 osd_jail_deregister(linux_osd_jail_slot); 482 } 483 484 void 485 linux_get_osname(struct thread *td, char *dst) 486 { 487 struct prison *pr; 488 struct linux_prison *lpr; 489 490 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 491 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 492 mtx_unlock(&pr->pr_mtx); 493 } 494 495 static int 496 linux_set_osname(struct thread *td, char *osname) 497 { 498 struct prison *pr; 499 struct linux_prison *lpr; 500 501 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 502 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 503 mtx_unlock(&pr->pr_mtx); 504 505 return (0); 506 } 507 508 void 509 linux_get_osrelease(struct thread *td, char *dst) 510 { 511 struct prison *pr; 512 struct linux_prison *lpr; 513 514 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 515 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 516 mtx_unlock(&pr->pr_mtx); 517 } 518 519 int 520 linux_kernver(struct thread *td) 521 { 522 struct prison *pr; 523 struct linux_prison *lpr; 524 int osrel; 525 526 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 527 osrel = lpr->pr_osrel; 528 mtx_unlock(&pr->pr_mtx); 529 530 return (osrel); 531 } 532 533 static int 534 linux_set_osrelease(struct thread *td, char *osrelease) 535 { 536 struct prison *pr; 537 struct linux_prison *lpr; 538 int error; 539 540 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 541 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 542 if (error == 0) 543 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 544 mtx_unlock(&pr->pr_mtx); 545 546 return (error); 547 } 548 549 int 550 linux_get_oss_version(struct thread *td) 551 { 552 struct prison *pr; 553 struct linux_prison *lpr; 554 int version; 555 556 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 557 version = lpr->pr_oss_version; 558 mtx_unlock(&pr->pr_mtx); 559 560 return (version); 561 } 562 563 static int 564 linux_set_oss_version(struct thread *td, int oss_version) 565 { 566 struct prison *pr; 567 struct linux_prison *lpr; 568 569 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 570 lpr->pr_oss_version = oss_version; 571 mtx_unlock(&pr->pr_mtx); 572 573 return (0); 574 } 575