1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/sdt.h> 35 #include <sys/systm.h> 36 #include <sys/sysctl.h> 37 #include <sys/proc.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/jail.h> 41 #include <sys/lock.h> 42 #include <sys/sx.h> 43 44 #include <compat/linux/linux_mib.h> 45 #include <compat/linux/linux_misc.h> 46 47 struct linux_prison { 48 char pr_osname[LINUX_MAX_UTSNAME]; 49 char pr_osrelease[LINUX_MAX_UTSNAME]; 50 int pr_oss_version; 51 int pr_osrel; 52 }; 53 54 static struct linux_prison lprison0 = { 55 .pr_osname = "Linux", 56 .pr_osrelease = LINUX_VERSION_STR, 57 .pr_oss_version = 0x030600, 58 .pr_osrel = LINUX_VERSION_CODE 59 }; 60 61 static unsigned linux_osd_jail_slot; 62 63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 64 "Linux mode"); 65 66 int linux_debug = 1; 67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 68 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 69 70 int linux_default_openfiles = 1024; 71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 72 &linux_default_openfiles, 0, 73 "Default soft openfiles resource limit, or -1 for unlimited"); 74 75 int linux_ignore_ip_recverr = 1; 76 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 77 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 78 79 int linux_preserve_vstatus = 0; 80 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 81 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 82 83 bool linux_map_sched_prio = true; 84 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 85 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 86 "(not POSIX compliant)"); 87 88 int linux_use_emul_path = 1; 89 SYSCTL_INT(_compat_linux, OID_AUTO, use_emul_path, CTLFLAG_RWTUN, 90 &linux_use_emul_path, 0, "Use linux.compat.emul_path"); 91 92 static int linux_set_osname(struct thread *td, char *osname); 93 static int linux_set_osrelease(struct thread *td, char *osrelease); 94 static int linux_set_oss_version(struct thread *td, int oss_version); 95 96 static int 97 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 98 { 99 char osname[LINUX_MAX_UTSNAME]; 100 int error; 101 102 linux_get_osname(req->td, osname); 103 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 104 if (error != 0 || req->newptr == NULL) 105 return (error); 106 error = linux_set_osname(req->td, osname); 107 108 return (error); 109 } 110 111 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 112 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 113 0, 0, linux_sysctl_osname, "A", 114 "Linux kernel OS name"); 115 116 static int 117 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 118 { 119 char osrelease[LINUX_MAX_UTSNAME]; 120 int error; 121 122 linux_get_osrelease(req->td, osrelease); 123 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 124 if (error != 0 || req->newptr == NULL) 125 return (error); 126 error = linux_set_osrelease(req->td, osrelease); 127 128 return (error); 129 } 130 131 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 132 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 133 0, 0, linux_sysctl_osrelease, "A", 134 "Linux kernel OS release"); 135 136 static int 137 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 138 { 139 int oss_version; 140 int error; 141 142 oss_version = linux_get_oss_version(req->td); 143 error = sysctl_handle_int(oidp, &oss_version, 0, req); 144 if (error != 0 || req->newptr == NULL) 145 return (error); 146 error = linux_set_oss_version(req->td, oss_version); 147 148 return (error); 149 } 150 151 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 152 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 153 0, 0, linux_sysctl_oss_version, "I", 154 "Linux OSS version"); 155 156 /* 157 * Map the osrelease into integer 158 */ 159 static int 160 linux_map_osrel(char *osrelease, int *osrel) 161 { 162 char *sep, *eosrelease; 163 int len, v0, v1, v2, v; 164 165 len = strlen(osrelease); 166 eosrelease = osrelease + len; 167 v0 = strtol(osrelease, &sep, 10); 168 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 169 return (EINVAL); 170 osrelease = sep + 1; 171 v1 = strtol(osrelease, &sep, 10); 172 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 173 return (EINVAL); 174 osrelease = sep + 1; 175 v2 = strtol(osrelease, &sep, 10); 176 if (osrelease == sep || 177 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 178 return (EINVAL); 179 180 v = LINUX_KERNVER(v0, v1, v2); 181 if (v < LINUX_KERNVER(1, 0, 0)) 182 return (EINVAL); 183 184 if (osrel != NULL) 185 *osrel = v; 186 187 return (0); 188 } 189 190 /* 191 * Find a prison with Linux info. 192 * Return the Linux info and the (locked) prison. 193 */ 194 static struct linux_prison * 195 linux_find_prison(struct prison *spr, struct prison **prp) 196 { 197 struct prison *pr; 198 struct linux_prison *lpr; 199 200 for (pr = spr;; pr = pr->pr_parent) { 201 mtx_lock(&pr->pr_mtx); 202 lpr = (pr == &prison0) 203 ? &lprison0 204 : osd_jail_get(pr, linux_osd_jail_slot); 205 if (lpr != NULL) 206 break; 207 mtx_unlock(&pr->pr_mtx); 208 } 209 *prp = pr; 210 211 return (lpr); 212 } 213 214 /* 215 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 216 * the Linux info and lock the prison. 217 */ 218 static void 219 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 220 { 221 struct prison *ppr; 222 struct linux_prison *lpr, *nlpr; 223 void **rsv; 224 225 /* If this prison already has Linux info, return that. */ 226 lpr = linux_find_prison(pr, &ppr); 227 if (ppr == pr) 228 goto done; 229 /* 230 * Allocate a new info record. Then check again, in case something 231 * changed during the allocation. 232 */ 233 mtx_unlock(&ppr->pr_mtx); 234 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 235 rsv = osd_reserve(linux_osd_jail_slot); 236 lpr = linux_find_prison(pr, &ppr); 237 if (ppr == pr) { 238 free(nlpr, M_PRISON); 239 osd_free_reserved(rsv); 240 goto done; 241 } 242 /* Inherit the initial values from the ancestor. */ 243 mtx_lock(&pr->pr_mtx); 244 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 245 bcopy(lpr, nlpr, sizeof(*lpr)); 246 lpr = nlpr; 247 mtx_unlock(&ppr->pr_mtx); 248 done: 249 if (lprp != NULL) 250 *lprp = lpr; 251 else 252 mtx_unlock(&pr->pr_mtx); 253 } 254 255 /* 256 * Jail OSD methods for Linux prison data. 257 */ 258 static int 259 linux_prison_create(void *obj, void *data) 260 { 261 struct prison *pr = obj; 262 struct vfsoptlist *opts = data; 263 int jsys; 264 265 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 266 jsys == JAIL_SYS_INHERIT) 267 return (0); 268 /* 269 * Inherit a prison's initial values from its parent 270 * (different from JAIL_SYS_INHERIT which also inherits changes). 271 */ 272 linux_alloc_prison(pr, NULL); 273 return (0); 274 } 275 276 static int 277 linux_prison_check(void *obj __unused, void *data) 278 { 279 struct vfsoptlist *opts = data; 280 char *osname, *osrelease; 281 int error, jsys, len, oss_version; 282 283 /* Check that the parameters are correct. */ 284 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 285 if (error != ENOENT) { 286 if (error != 0) 287 return (error); 288 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 289 return (EINVAL); 290 } 291 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 292 if (error != ENOENT) { 293 if (error != 0) 294 return (error); 295 if (len == 0 || osname[len - 1] != '\0') 296 return (EINVAL); 297 if (len > LINUX_MAX_UTSNAME) { 298 vfs_opterror(opts, "linux.osname too long"); 299 return (ENAMETOOLONG); 300 } 301 } 302 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 303 if (error != ENOENT) { 304 if (error != 0) 305 return (error); 306 if (len == 0 || osrelease[len - 1] != '\0') 307 return (EINVAL); 308 if (len > LINUX_MAX_UTSNAME) { 309 vfs_opterror(opts, "linux.osrelease too long"); 310 return (ENAMETOOLONG); 311 } 312 error = linux_map_osrel(osrelease, NULL); 313 if (error != 0) { 314 vfs_opterror(opts, "linux.osrelease format error"); 315 return (error); 316 } 317 } 318 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 319 sizeof(oss_version)); 320 321 if (error == ENOENT) 322 error = 0; 323 return (error); 324 } 325 326 static int 327 linux_prison_set(void *obj, void *data) 328 { 329 struct linux_prison *lpr; 330 struct prison *pr = obj; 331 struct vfsoptlist *opts = data; 332 char *osname, *osrelease; 333 int error, gotversion, jsys, len, oss_version; 334 335 /* Set the parameters, which should be correct. */ 336 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 337 if (error == ENOENT) 338 jsys = -1; 339 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 340 if (error == ENOENT) 341 osname = NULL; 342 else 343 jsys = JAIL_SYS_NEW; 344 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 345 if (error == ENOENT) 346 osrelease = NULL; 347 else 348 jsys = JAIL_SYS_NEW; 349 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 350 sizeof(oss_version)); 351 if (error == ENOENT) 352 gotversion = 0; 353 else { 354 gotversion = 1; 355 jsys = JAIL_SYS_NEW; 356 } 357 switch (jsys) { 358 case JAIL_SYS_INHERIT: 359 /* "linux=inherit": inherit the parent's Linux info. */ 360 mtx_lock(&pr->pr_mtx); 361 osd_jail_del(pr, linux_osd_jail_slot); 362 mtx_unlock(&pr->pr_mtx); 363 break; 364 case JAIL_SYS_NEW: 365 /* 366 * "linux=new" or "linux.*": 367 * the prison gets its own Linux info. 368 */ 369 linux_alloc_prison(pr, &lpr); 370 if (osrelease) { 371 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 372 strlcpy(lpr->pr_osrelease, osrelease, 373 LINUX_MAX_UTSNAME); 374 } 375 if (osname) 376 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 377 if (gotversion) 378 lpr->pr_oss_version = oss_version; 379 mtx_unlock(&pr->pr_mtx); 380 } 381 382 return (0); 383 } 384 385 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 386 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 387 "Jail Linux kernel OS name"); 388 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 389 "Jail Linux kernel OS release"); 390 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 391 "I", "Jail Linux OSS version"); 392 393 static int 394 linux_prison_get(void *obj, void *data) 395 { 396 struct linux_prison *lpr; 397 struct prison *ppr; 398 struct prison *pr = obj; 399 struct vfsoptlist *opts = data; 400 int error, i; 401 402 static int version0; 403 404 /* See if this prison is the one with the Linux info. */ 405 lpr = linux_find_prison(pr, &ppr); 406 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 407 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 408 if (error != 0 && error != ENOENT) 409 goto done; 410 if (i) { 411 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 412 if (error != 0 && error != ENOENT) 413 goto done; 414 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 415 if (error != 0 && error != ENOENT) 416 goto done; 417 error = vfs_setopt(opts, "linux.oss_version", 418 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 419 if (error != 0 && error != ENOENT) 420 goto done; 421 } else { 422 /* 423 * If this prison is inheriting its Linux info, report 424 * empty/zero parameters. 425 */ 426 error = vfs_setopts(opts, "linux.osname", ""); 427 if (error != 0 && error != ENOENT) 428 goto done; 429 error = vfs_setopts(opts, "linux.osrelease", ""); 430 if (error != 0 && error != ENOENT) 431 goto done; 432 error = vfs_setopt(opts, "linux.oss_version", &version0, 433 sizeof(lpr->pr_oss_version)); 434 if (error != 0 && error != ENOENT) 435 goto done; 436 } 437 error = 0; 438 439 done: 440 mtx_unlock(&ppr->pr_mtx); 441 442 return (error); 443 } 444 445 static void 446 linux_prison_destructor(void *data) 447 { 448 449 free(data, M_PRISON); 450 } 451 452 void 453 linux_osd_jail_register(void) 454 { 455 struct prison *pr; 456 osd_method_t methods[PR_MAXMETHOD] = { 457 [PR_METHOD_CREATE] = linux_prison_create, 458 [PR_METHOD_GET] = linux_prison_get, 459 [PR_METHOD_SET] = linux_prison_set, 460 [PR_METHOD_CHECK] = linux_prison_check 461 }; 462 463 linux_osd_jail_slot = 464 osd_jail_register(linux_prison_destructor, methods); 465 /* Copy the system Linux info to any current prisons. */ 466 sx_slock(&allprison_lock); 467 TAILQ_FOREACH(pr, &allprison, pr_list) 468 linux_alloc_prison(pr, NULL); 469 sx_sunlock(&allprison_lock); 470 } 471 472 void 473 linux_osd_jail_deregister(void) 474 { 475 476 osd_jail_deregister(linux_osd_jail_slot); 477 } 478 479 void 480 linux_get_osname(struct thread *td, char *dst) 481 { 482 struct prison *pr; 483 struct linux_prison *lpr; 484 485 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 486 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 487 mtx_unlock(&pr->pr_mtx); 488 } 489 490 static int 491 linux_set_osname(struct thread *td, char *osname) 492 { 493 struct prison *pr; 494 struct linux_prison *lpr; 495 496 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 497 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 498 mtx_unlock(&pr->pr_mtx); 499 500 return (0); 501 } 502 503 void 504 linux_get_osrelease(struct thread *td, char *dst) 505 { 506 struct prison *pr; 507 struct linux_prison *lpr; 508 509 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 510 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 511 mtx_unlock(&pr->pr_mtx); 512 } 513 514 int 515 linux_kernver(struct thread *td) 516 { 517 struct prison *pr; 518 struct linux_prison *lpr; 519 int osrel; 520 521 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 522 osrel = lpr->pr_osrel; 523 mtx_unlock(&pr->pr_mtx); 524 525 return (osrel); 526 } 527 528 static int 529 linux_set_osrelease(struct thread *td, char *osrelease) 530 { 531 struct prison *pr; 532 struct linux_prison *lpr; 533 int error; 534 535 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 536 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 537 if (error == 0) 538 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 539 mtx_unlock(&pr->pr_mtx); 540 541 return (error); 542 } 543 544 int 545 linux_get_oss_version(struct thread *td) 546 { 547 struct prison *pr; 548 struct linux_prison *lpr; 549 int version; 550 551 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 552 version = lpr->pr_oss_version; 553 mtx_unlock(&pr->pr_mtx); 554 555 return (version); 556 } 557 558 static int 559 linux_set_oss_version(struct thread *td, int oss_version) 560 { 561 struct prison *pr; 562 struct linux_prison *lpr; 563 564 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 565 lpr->pr_oss_version = oss_version; 566 mtx_unlock(&pr->pr_mtx); 567 568 return (0); 569 } 570