1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1999 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/sdt.h> 35 #include <sys/systm.h> 36 #include <sys/sysctl.h> 37 #include <sys/proc.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/jail.h> 41 #include <sys/lock.h> 42 #include <sys/sx.h> 43 44 #include <compat/linux/linux_mib.h> 45 #include <compat/linux/linux_misc.h> 46 47 struct linux_prison { 48 char pr_osname[LINUX_MAX_UTSNAME]; 49 char pr_osrelease[LINUX_MAX_UTSNAME]; 50 int pr_oss_version; 51 int pr_osrel; 52 }; 53 54 static struct linux_prison lprison0 = { 55 .pr_osname = "Linux", 56 .pr_osrelease = LINUX_VERSION_STR, 57 .pr_oss_version = 0x030600, 58 .pr_osrel = LINUX_VERSION_CODE 59 }; 60 61 static unsigned linux_osd_jail_slot; 62 63 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 64 "Linux mode"); 65 66 int linux_debug = 1; 67 SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN, 68 &linux_debug, 0, "Log warnings from linux(4); or 0 to disable"); 69 70 int linux_default_openfiles = 1024; 71 SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN, 72 &linux_default_openfiles, 0, 73 "Default soft openfiles resource limit, or -1 for unlimited"); 74 75 int linux_ignore_ip_recverr = 1; 76 SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN, 77 &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR"); 78 79 int linux_preserve_vstatus = 0; 80 SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN, 81 &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag"); 82 83 bool linux_map_sched_prio = true; 84 SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN, 85 &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities " 86 "(not POSIX compliant)"); 87 88 static int linux_set_osname(struct thread *td, char *osname); 89 static int linux_set_osrelease(struct thread *td, char *osrelease); 90 static int linux_set_oss_version(struct thread *td, int oss_version); 91 92 static int 93 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 94 { 95 char osname[LINUX_MAX_UTSNAME]; 96 int error; 97 98 linux_get_osname(req->td, osname); 99 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 100 if (error != 0 || req->newptr == NULL) 101 return (error); 102 error = linux_set_osname(req->td, osname); 103 104 return (error); 105 } 106 107 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 108 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 109 0, 0, linux_sysctl_osname, "A", 110 "Linux kernel OS name"); 111 112 static int 113 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 114 { 115 char osrelease[LINUX_MAX_UTSNAME]; 116 int error; 117 118 linux_get_osrelease(req->td, osrelease); 119 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 120 if (error != 0 || req->newptr == NULL) 121 return (error); 122 error = linux_set_osrelease(req->td, osrelease); 123 124 return (error); 125 } 126 127 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 128 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 129 0, 0, linux_sysctl_osrelease, "A", 130 "Linux kernel OS release"); 131 132 static int 133 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 134 { 135 int oss_version; 136 int error; 137 138 oss_version = linux_get_oss_version(req->td); 139 error = sysctl_handle_int(oidp, &oss_version, 0, req); 140 if (error != 0 || req->newptr == NULL) 141 return (error); 142 error = linux_set_oss_version(req->td, oss_version); 143 144 return (error); 145 } 146 147 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 148 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 149 0, 0, linux_sysctl_oss_version, "I", 150 "Linux OSS version"); 151 152 /* 153 * Map the osrelease into integer 154 */ 155 static int 156 linux_map_osrel(char *osrelease, int *osrel) 157 { 158 char *sep, *eosrelease; 159 int len, v0, v1, v2, v; 160 161 len = strlen(osrelease); 162 eosrelease = osrelease + len; 163 v0 = strtol(osrelease, &sep, 10); 164 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 165 return (EINVAL); 166 osrelease = sep + 1; 167 v1 = strtol(osrelease, &sep, 10); 168 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 169 return (EINVAL); 170 osrelease = sep + 1; 171 v2 = strtol(osrelease, &sep, 10); 172 if (osrelease == sep || 173 (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-'))) 174 return (EINVAL); 175 176 v = LINUX_KERNVER(v0, v1, v2); 177 if (v < LINUX_KERNVER(1, 0, 0)) 178 return (EINVAL); 179 180 if (osrel != NULL) 181 *osrel = v; 182 183 return (0); 184 } 185 186 /* 187 * Find a prison with Linux info. 188 * Return the Linux info and the (locked) prison. 189 */ 190 static struct linux_prison * 191 linux_find_prison(struct prison *spr, struct prison **prp) 192 { 193 struct prison *pr; 194 struct linux_prison *lpr; 195 196 for (pr = spr;; pr = pr->pr_parent) { 197 mtx_lock(&pr->pr_mtx); 198 lpr = (pr == &prison0) 199 ? &lprison0 200 : osd_jail_get(pr, linux_osd_jail_slot); 201 if (lpr != NULL) 202 break; 203 mtx_unlock(&pr->pr_mtx); 204 } 205 *prp = pr; 206 207 return (lpr); 208 } 209 210 /* 211 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 212 * the Linux info and lock the prison. 213 */ 214 static void 215 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 216 { 217 struct prison *ppr; 218 struct linux_prison *lpr, *nlpr; 219 void **rsv; 220 221 /* If this prison already has Linux info, return that. */ 222 lpr = linux_find_prison(pr, &ppr); 223 if (ppr == pr) 224 goto done; 225 /* 226 * Allocate a new info record. Then check again, in case something 227 * changed during the allocation. 228 */ 229 mtx_unlock(&ppr->pr_mtx); 230 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 231 rsv = osd_reserve(linux_osd_jail_slot); 232 lpr = linux_find_prison(pr, &ppr); 233 if (ppr == pr) { 234 free(nlpr, M_PRISON); 235 osd_free_reserved(rsv); 236 goto done; 237 } 238 /* Inherit the initial values from the ancestor. */ 239 mtx_lock(&pr->pr_mtx); 240 (void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr); 241 bcopy(lpr, nlpr, sizeof(*lpr)); 242 lpr = nlpr; 243 mtx_unlock(&ppr->pr_mtx); 244 done: 245 if (lprp != NULL) 246 *lprp = lpr; 247 else 248 mtx_unlock(&pr->pr_mtx); 249 } 250 251 /* 252 * Jail OSD methods for Linux prison data. 253 */ 254 static int 255 linux_prison_create(void *obj, void *data) 256 { 257 struct prison *pr = obj; 258 struct vfsoptlist *opts = data; 259 int jsys; 260 261 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 262 jsys == JAIL_SYS_INHERIT) 263 return (0); 264 /* 265 * Inherit a prison's initial values from its parent 266 * (different from JAIL_SYS_INHERIT which also inherits changes). 267 */ 268 linux_alloc_prison(pr, NULL); 269 return (0); 270 } 271 272 static int 273 linux_prison_check(void *obj __unused, void *data) 274 { 275 struct vfsoptlist *opts = data; 276 char *osname, *osrelease; 277 int error, jsys, len, oss_version; 278 279 /* Check that the parameters are correct. */ 280 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 281 if (error != ENOENT) { 282 if (error != 0) 283 return (error); 284 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 285 return (EINVAL); 286 } 287 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 288 if (error != ENOENT) { 289 if (error != 0) 290 return (error); 291 if (len == 0 || osname[len - 1] != '\0') 292 return (EINVAL); 293 if (len > LINUX_MAX_UTSNAME) { 294 vfs_opterror(opts, "linux.osname too long"); 295 return (ENAMETOOLONG); 296 } 297 } 298 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 299 if (error != ENOENT) { 300 if (error != 0) 301 return (error); 302 if (len == 0 || osrelease[len - 1] != '\0') 303 return (EINVAL); 304 if (len > LINUX_MAX_UTSNAME) { 305 vfs_opterror(opts, "linux.osrelease too long"); 306 return (ENAMETOOLONG); 307 } 308 error = linux_map_osrel(osrelease, NULL); 309 if (error != 0) { 310 vfs_opterror(opts, "linux.osrelease format error"); 311 return (error); 312 } 313 } 314 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 315 sizeof(oss_version)); 316 317 if (error == ENOENT) 318 error = 0; 319 return (error); 320 } 321 322 static int 323 linux_prison_set(void *obj, void *data) 324 { 325 struct linux_prison *lpr; 326 struct prison *pr = obj; 327 struct vfsoptlist *opts = data; 328 char *osname, *osrelease; 329 int error, gotversion, jsys, len, oss_version; 330 331 /* Set the parameters, which should be correct. */ 332 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 333 if (error == ENOENT) 334 jsys = -1; 335 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 336 if (error == ENOENT) 337 osname = NULL; 338 else 339 jsys = JAIL_SYS_NEW; 340 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 341 if (error == ENOENT) 342 osrelease = NULL; 343 else 344 jsys = JAIL_SYS_NEW; 345 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 346 sizeof(oss_version)); 347 if (error == ENOENT) 348 gotversion = 0; 349 else { 350 gotversion = 1; 351 jsys = JAIL_SYS_NEW; 352 } 353 switch (jsys) { 354 case JAIL_SYS_INHERIT: 355 /* "linux=inherit": inherit the parent's Linux info. */ 356 mtx_lock(&pr->pr_mtx); 357 osd_jail_del(pr, linux_osd_jail_slot); 358 mtx_unlock(&pr->pr_mtx); 359 break; 360 case JAIL_SYS_NEW: 361 /* 362 * "linux=new" or "linux.*": 363 * the prison gets its own Linux info. 364 */ 365 linux_alloc_prison(pr, &lpr); 366 if (osrelease) { 367 (void)linux_map_osrel(osrelease, &lpr->pr_osrel); 368 strlcpy(lpr->pr_osrelease, osrelease, 369 LINUX_MAX_UTSNAME); 370 } 371 if (osname) 372 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 373 if (gotversion) 374 lpr->pr_oss_version = oss_version; 375 mtx_unlock(&pr->pr_mtx); 376 } 377 378 return (0); 379 } 380 381 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 382 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 383 "Jail Linux kernel OS name"); 384 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 385 "Jail Linux kernel OS release"); 386 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 387 "I", "Jail Linux OSS version"); 388 389 static int 390 linux_prison_get(void *obj, void *data) 391 { 392 struct linux_prison *lpr; 393 struct prison *ppr; 394 struct prison *pr = obj; 395 struct vfsoptlist *opts = data; 396 int error, i; 397 398 static int version0; 399 400 /* See if this prison is the one with the Linux info. */ 401 lpr = linux_find_prison(pr, &ppr); 402 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 403 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 404 if (error != 0 && error != ENOENT) 405 goto done; 406 if (i) { 407 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 408 if (error != 0 && error != ENOENT) 409 goto done; 410 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 411 if (error != 0 && error != ENOENT) 412 goto done; 413 error = vfs_setopt(opts, "linux.oss_version", 414 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 415 if (error != 0 && error != ENOENT) 416 goto done; 417 } else { 418 /* 419 * If this prison is inheriting its Linux info, report 420 * empty/zero parameters. 421 */ 422 error = vfs_setopts(opts, "linux.osname", ""); 423 if (error != 0 && error != ENOENT) 424 goto done; 425 error = vfs_setopts(opts, "linux.osrelease", ""); 426 if (error != 0 && error != ENOENT) 427 goto done; 428 error = vfs_setopt(opts, "linux.oss_version", &version0, 429 sizeof(lpr->pr_oss_version)); 430 if (error != 0 && error != ENOENT) 431 goto done; 432 } 433 error = 0; 434 435 done: 436 mtx_unlock(&ppr->pr_mtx); 437 438 return (error); 439 } 440 441 static void 442 linux_prison_destructor(void *data) 443 { 444 445 free(data, M_PRISON); 446 } 447 448 void 449 linux_osd_jail_register(void) 450 { 451 struct prison *pr; 452 osd_method_t methods[PR_MAXMETHOD] = { 453 [PR_METHOD_CREATE] = linux_prison_create, 454 [PR_METHOD_GET] = linux_prison_get, 455 [PR_METHOD_SET] = linux_prison_set, 456 [PR_METHOD_CHECK] = linux_prison_check 457 }; 458 459 linux_osd_jail_slot = 460 osd_jail_register(linux_prison_destructor, methods); 461 /* Copy the system Linux info to any current prisons. */ 462 sx_slock(&allprison_lock); 463 TAILQ_FOREACH(pr, &allprison, pr_list) 464 linux_alloc_prison(pr, NULL); 465 sx_sunlock(&allprison_lock); 466 } 467 468 void 469 linux_osd_jail_deregister(void) 470 { 471 472 osd_jail_deregister(linux_osd_jail_slot); 473 } 474 475 void 476 linux_get_osname(struct thread *td, char *dst) 477 { 478 struct prison *pr; 479 struct linux_prison *lpr; 480 481 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 482 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 483 mtx_unlock(&pr->pr_mtx); 484 } 485 486 static int 487 linux_set_osname(struct thread *td, char *osname) 488 { 489 struct prison *pr; 490 struct linux_prison *lpr; 491 492 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 493 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 494 mtx_unlock(&pr->pr_mtx); 495 496 return (0); 497 } 498 499 void 500 linux_get_osrelease(struct thread *td, char *dst) 501 { 502 struct prison *pr; 503 struct linux_prison *lpr; 504 505 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 506 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 507 mtx_unlock(&pr->pr_mtx); 508 } 509 510 int 511 linux_kernver(struct thread *td) 512 { 513 struct prison *pr; 514 struct linux_prison *lpr; 515 int osrel; 516 517 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 518 osrel = lpr->pr_osrel; 519 mtx_unlock(&pr->pr_mtx); 520 521 return (osrel); 522 } 523 524 static int 525 linux_set_osrelease(struct thread *td, char *osrelease) 526 { 527 struct prison *pr; 528 struct linux_prison *lpr; 529 int error; 530 531 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 532 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 533 if (error == 0) 534 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 535 mtx_unlock(&pr->pr_mtx); 536 537 return (error); 538 } 539 540 int 541 linux_get_oss_version(struct thread *td) 542 { 543 struct prison *pr; 544 struct linux_prison *lpr; 545 int version; 546 547 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 548 version = lpr->pr_oss_version; 549 mtx_unlock(&pr->pr_mtx); 550 551 return (version); 552 } 553 554 static int 555 linux_set_oss_version(struct thread *td, int oss_version) 556 { 557 struct prison *pr; 558 struct linux_prison *lpr; 559 560 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 561 lpr->pr_oss_version = oss_version; 562 mtx_unlock(&pr->pr_mtx); 563 564 return (0); 565 } 566