1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/sdt.h> 35 #include <sys/systm.h> 36 #include <sys/sysctl.h> 37 #include <sys/proc.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/jail.h> 41 #include <sys/lock.h> 42 #include <sys/sx.h> 43 44 #include <compat/linux/linux_mib.h> 45 #include <compat/linux/linux_misc.h> 46 47 struct linux_prison { 48 char pr_osname[LINUX_MAX_UTSNAME]; 49 char pr_osrelease[LINUX_MAX_UTSNAME]; 50 int pr_oss_version; 51 int pr_osrel; 52 }; 53 54 static struct linux_prison lprison0 = { 55 .pr_osname = "Linux", 56 .pr_osrelease = LINUX_VERSION_STR, 57 .pr_oss_version = 0x030600, 58 .pr_osrel = LINUX_VERSION_CODE 59 }; 60 61 static unsigned linux_osd_jail_slot; 62 63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 64 "Linux mode"); 65 66 int linux_default_openfiles = 1024; 67 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 68 &linux_default_openfiles, 0, 69 "Default soft openfiles resource limit, or -1 for unlimited"); 70 71 int linux_ignore_ip_recverr = 1; 72 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 73 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 74 75 int linux_preserve_vstatus = 0; 76 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 77 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 78 79 bool linux_map_sched_prio = true; 80 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 81 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 82 "(not POSIX compliant)"); 83 84 static int linux_set_osname(struct thread *td, char *osname); 85 static int linux_set_osrelease(struct thread *td, char *osrelease); 86 static int linux_set_oss_version(struct thread *td, int oss_version); 87 88 static int 89 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 90 { 91 char osname[LINUX_MAX_UTSNAME]; 92 int error; 93 94 linux_get_osname(req->td, osname); 95 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 96 if (error != 0 || req->newptr == NULL) 97 return (error); 98 error = linux_set_osname(req->td, osname); 99 100 return (error); 101 } 102 103 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 104 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 105 0, 0, linux_sysctl_osname, "A", 106 "Linux kernel OS name"); 107 108 static int 109 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 110 { 111 char osrelease[LINUX_MAX_UTSNAME]; 112 int error; 113 114 linux_get_osrelease(req->td, osrelease); 115 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 116 if (error != 0 || req->newptr == NULL) 117 return (error); 118 error = linux_set_osrelease(req->td, osrelease); 119 120 return (error); 121 } 122 123 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 124 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 125 0, 0, linux_sysctl_osrelease, "A", 126 "Linux kernel OS release"); 127 128 static int 129 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 130 { 131 int oss_version; 132 int error; 133 134 oss_version = linux_get_oss_version(req->td); 135 error = sysctl_handle_int(oidp, &oss_version, 0, req); 136 if (error != 0 || req->newptr == NULL) 137 return (error); 138 error = linux_set_oss_version(req->td, oss_version); 139 140 return (error); 141 } 142 143 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 144 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 145 0, 0, linux_sysctl_oss_version, "I", 146 "Linux OSS version"); 147 148 /* 149 * Map the osrelease into integer 150 */ 151 static int 152 linux_map_osrel(char *osrelease, int *osrel) 153 { 154 char *sep, *eosrelease; 155 int len, v0, v1, v2, v; 156 157 len = strlen(osrelease); 158 eosrelease = osrelease + len; 159 v0 = strtol(osrelease, &sep, 10); 160 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 161 return (EINVAL); 162 osrelease = sep + 1; 163 v1 = strtol(osrelease, &sep, 10); 164 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 165 return (EINVAL); 166 osrelease = sep + 1; 167 v2 = strtol(osrelease, &sep, 10); 168 if (osrelease == sep || 169 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 170 return (EINVAL); 171 172 v = LINUX_KERNVER(v0, v1, v2); 173 if (v < LINUX_KERNVER(1, 0, 0)) 174 return (EINVAL); 175 176 if (osrel != NULL) 177 *osrel = v; 178 179 return (0); 180 } 181 182 /* 183 * Find a prison with Linux info. 184 * Return the Linux info and the (locked) prison. 185 */ 186 static struct linux_prison * 187 linux_find_prison(struct prison *spr, struct prison **prp) 188 { 189 struct prison *pr; 190 struct linux_prison *lpr; 191 192 for (pr = spr;; pr = pr->pr_parent) { 193 mtx_lock(&pr->pr_mtx); 194 lpr = (pr == &prison0) 195 ? &lprison0 196 : osd_jail_get(pr, linux_osd_jail_slot); 197 if (lpr != NULL) 198 break; 199 mtx_unlock(&pr->pr_mtx); 200 } 201 *prp = pr; 202 203 return (lpr); 204 } 205 206 /* 207 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 208 * the Linux info and lock the prison. 209 */ 210 static void 211 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 212 { 213 struct prison *ppr; 214 struct linux_prison *lpr, *nlpr; 215 void **rsv; 216 217 /* If this prison already has Linux info, return that. */ 218 lpr = linux_find_prison(pr, &ppr); 219 if (ppr == pr) 220 goto done; 221 /* 222 * Allocate a new info record. Then check again, in case something 223 * changed during the allocation. 224 */ 225 mtx_unlock(&ppr->pr_mtx); 226 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 227 rsv = osd_reserve(linux_osd_jail_slot); 228 lpr = linux_find_prison(pr, &ppr); 229 if (ppr == pr) { 230 free(nlpr, M_PRISON); 231 osd_free_reserved(rsv); 232 goto done; 233 } 234 /* Inherit the initial values from the ancestor. */ 235 mtx_lock(&pr->pr_mtx); 236 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 237 bcopy(lpr, nlpr, sizeof(*lpr)); 238 lpr = nlpr; 239 mtx_unlock(&ppr->pr_mtx); 240 done: 241 if (lprp != NULL) 242 *lprp = lpr; 243 else 244 mtx_unlock(&pr->pr_mtx); 245 } 246 247 /* 248 * Jail OSD methods for Linux prison data. 249 */ 250 static int 251 linux_prison_create(void *obj, void *data) 252 { 253 struct prison *pr = obj; 254 struct vfsoptlist *opts = data; 255 int jsys; 256 257 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 258 jsys == JAIL_SYS_INHERIT) 259 return (0); 260 /* 261 * Inherit a prison's initial values from its parent 262 * (different from JAIL_SYS_INHERIT which also inherits changes). 263 */ 264 linux_alloc_prison(pr, NULL); 265 return (0); 266 } 267 268 static int 269 linux_prison_check(void *obj __unused, void *data) 270 { 271 struct vfsoptlist *opts = data; 272 char *osname, *osrelease; 273 int error, jsys, len, oss_version; 274 275 /* Check that the parameters are correct. */ 276 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 277 if (error != ENOENT) { 278 if (error != 0) 279 return (error); 280 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 281 return (EINVAL); 282 } 283 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 284 if (error != ENOENT) { 285 if (error != 0) 286 return (error); 287 if (len == 0 || osname[len - 1] != '\0') 288 return (EINVAL); 289 if (len > LINUX_MAX_UTSNAME) { 290 vfs_opterror(opts, "linux.osname too long"); 291 return (ENAMETOOLONG); 292 } 293 } 294 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 295 if (error != ENOENT) { 296 if (error != 0) 297 return (error); 298 if (len == 0 || osrelease[len - 1] != '\0') 299 return (EINVAL); 300 if (len > LINUX_MAX_UTSNAME) { 301 vfs_opterror(opts, "linux.osrelease too long"); 302 return (ENAMETOOLONG); 303 } 304 error = linux_map_osrel(osrelease, NULL); 305 if (error != 0) { 306 vfs_opterror(opts, "linux.osrelease format error"); 307 return (error); 308 } 309 } 310 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 311 sizeof(oss_version)); 312 313 if (error == ENOENT) 314 error = 0; 315 return (error); 316 } 317 318 static int 319 linux_prison_set(void *obj, void *data) 320 { 321 struct linux_prison *lpr; 322 struct prison *pr = obj; 323 struct vfsoptlist *opts = data; 324 char *osname, *osrelease; 325 int error, gotversion, jsys, len, oss_version; 326 327 /* Set the parameters, which should be correct. */ 328 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 329 if (error == ENOENT) 330 jsys = -1; 331 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 332 if (error == ENOENT) 333 osname = NULL; 334 else 335 jsys = JAIL_SYS_NEW; 336 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 337 if (error == ENOENT) 338 osrelease = NULL; 339 else 340 jsys = JAIL_SYS_NEW; 341 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 342 sizeof(oss_version)); 343 if (error == ENOENT) 344 gotversion = 0; 345 else { 346 gotversion = 1; 347 jsys = JAIL_SYS_NEW; 348 } 349 switch (jsys) { 350 case JAIL_SYS_INHERIT: 351 /* "linux=inherit": inherit the parent's Linux info. */ 352 mtx_lock(&pr->pr_mtx); 353 osd_jail_del(pr, linux_osd_jail_slot); 354 mtx_unlock(&pr->pr_mtx); 355 break; 356 case JAIL_SYS_NEW: 357 /* 358 * "linux=new" or "linux.*": 359 * the prison gets its own Linux info. 360 */ 361 linux_alloc_prison(pr, &lpr); 362 if (osrelease) { 363 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 364 strlcpy(lpr->pr_osrelease, osrelease, 365 LINUX_MAX_UTSNAME); 366 } 367 if (osname) 368 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 369 if (gotversion) 370 lpr->pr_oss_version = oss_version; 371 mtx_unlock(&pr->pr_mtx); 372 } 373 374 return (0); 375 } 376 377 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 378 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 379 "Jail Linux kernel OS name"); 380 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 381 "Jail Linux kernel OS release"); 382 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 383 "I", "Jail Linux OSS version"); 384 385 static int 386 linux_prison_get(void *obj, void *data) 387 { 388 struct linux_prison *lpr; 389 struct prison *ppr; 390 struct prison *pr = obj; 391 struct vfsoptlist *opts = data; 392 int error, i; 393 394 static int version0; 395 396 /* See if this prison is the one with the Linux info. */ 397 lpr = linux_find_prison(pr, &ppr); 398 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 399 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 400 if (error != 0 && error != ENOENT) 401 goto done; 402 if (i) { 403 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 404 if (error != 0 && error != ENOENT) 405 goto done; 406 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 407 if (error != 0 && error != ENOENT) 408 goto done; 409 error = vfs_setopt(opts, "linux.oss_version", 410 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 411 if (error != 0 && error != ENOENT) 412 goto done; 413 } else { 414 /* 415 * If this prison is inheriting its Linux info, report 416 * empty/zero parameters. 417 */ 418 error = vfs_setopts(opts, "linux.osname", ""); 419 if (error != 0 && error != ENOENT) 420 goto done; 421 error = vfs_setopts(opts, "linux.osrelease", ""); 422 if (error != 0 && error != ENOENT) 423 goto done; 424 error = vfs_setopt(opts, "linux.oss_version", &version0, 425 sizeof(lpr->pr_oss_version)); 426 if (error != 0 && error != ENOENT) 427 goto done; 428 } 429 error = 0; 430 431 done: 432 mtx_unlock(&ppr->pr_mtx); 433 434 return (error); 435 } 436 437 static void 438 linux_prison_destructor(void *data) 439 { 440 441 free(data, M_PRISON); 442 } 443 444 void 445 linux_osd_jail_register(void) 446 { 447 struct prison *pr; 448 osd_method_t methods[PR_MAXMETHOD] = { 449 [PR_METHOD_CREATE] = linux_prison_create, 450 [PR_METHOD_GET] = linux_prison_get, 451 [PR_METHOD_SET] = linux_prison_set, 452 [PR_METHOD_CHECK] = linux_prison_check 453 }; 454 455 linux_osd_jail_slot = 456 osd_jail_register(linux_prison_destructor, methods); 457 /* Copy the system Linux info to any current prisons. */ 458 sx_slock(&allprison_lock); 459 TAILQ_FOREACH(pr, &allprison, pr_list) 460 linux_alloc_prison(pr, NULL); 461 sx_sunlock(&allprison_lock); 462 } 463 464 void 465 linux_osd_jail_deregister(void) 466 { 467 468 osd_jail_deregister(linux_osd_jail_slot); 469 } 470 471 void 472 linux_get_osname(struct thread *td, char *dst) 473 { 474 struct prison *pr; 475 struct linux_prison *lpr; 476 477 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 478 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 479 mtx_unlock(&pr->pr_mtx); 480 } 481 482 static int 483 linux_set_osname(struct thread *td, char *osname) 484 { 485 struct prison *pr; 486 struct linux_prison *lpr; 487 488 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 489 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 490 mtx_unlock(&pr->pr_mtx); 491 492 return (0); 493 } 494 495 void 496 linux_get_osrelease(struct thread *td, char *dst) 497 { 498 struct prison *pr; 499 struct linux_prison *lpr; 500 501 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 502 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 503 mtx_unlock(&pr->pr_mtx); 504 } 505 506 int 507 linux_kernver(struct thread *td) 508 { 509 struct prison *pr; 510 struct linux_prison *lpr; 511 int osrel; 512 513 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 514 osrel = lpr->pr_osrel; 515 mtx_unlock(&pr->pr_mtx); 516 517 return (osrel); 518 } 519 520 static int 521 linux_set_osrelease(struct thread *td, char *osrelease) 522 { 523 struct prison *pr; 524 struct linux_prison *lpr; 525 int error; 526 527 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 528 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 529 if (error == 0) 530 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 531 mtx_unlock(&pr->pr_mtx); 532 533 return (error); 534 } 535 536 int 537 linux_get_oss_version(struct thread *td) 538 { 539 struct prison *pr; 540 struct linux_prison *lpr; 541 int version; 542 543 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 544 version = lpr->pr_oss_version; 545 mtx_unlock(&pr->pr_mtx); 546 547 return (version); 548 } 549 550 static int 551 linux_set_oss_version(struct thread *td, int oss_version) 552 { 553 struct prison *pr; 554 struct linux_prison *lpr; 555 556 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 557 lpr->pr_oss_version = oss_version; 558 mtx_unlock(&pr->pr_mtx); 559 560 return (0); 561 } 562