1 /*- 2 * Copyright (c) 1999 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/systm.h> 35 #include <sys/sysctl.h> 36 #include <sys/proc.h> 37 #include <sys/malloc.h> 38 #include <sys/mount.h> 39 #include <sys/jail.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/sx.h> 43 44 #include "opt_compat.h" 45 46 #ifdef COMPAT_LINUX32 47 #include <machine/../linux32/linux.h> 48 #else 49 #include <machine/../linux/linux.h> 50 #endif 51 #include <compat/linux/linux_mib.h> 52 53 struct linux_prison { 54 char pr_osname[LINUX_MAX_UTSNAME]; 55 char pr_osrelease[LINUX_MAX_UTSNAME]; 56 int pr_oss_version; 57 int pr_osrel; 58 }; 59 60 static unsigned linux_osd_jail_slot; 61 62 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, 63 "Linux mode"); 64 65 static struct mtx osname_lock; 66 MTX_SYSINIT(linux_osname, &osname_lock, "linux osname", MTX_DEF); 67 68 static char linux_osname[LINUX_MAX_UTSNAME] = "Linux"; 69 70 static int 71 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 72 { 73 char osname[LINUX_MAX_UTSNAME]; 74 int error; 75 76 linux_get_osname(req->td, osname); 77 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 78 if (error || req->newptr == NULL) 79 return (error); 80 error = linux_set_osname(req->td, osname); 81 return (error); 82 } 83 84 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 85 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 86 0, 0, linux_sysctl_osname, "A", 87 "Linux kernel OS name"); 88 89 static char linux_osrelease[LINUX_MAX_UTSNAME] = "2.6.16"; 90 static int linux_osrel = 2006016; 91 92 static int 93 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 94 { 95 char osrelease[LINUX_MAX_UTSNAME]; 96 int error; 97 98 linux_get_osrelease(req->td, osrelease); 99 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 100 if (error || req->newptr == NULL) 101 return (error); 102 error = linux_set_osrelease(req->td, osrelease); 103 return (error); 104 } 105 106 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 107 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 108 0, 0, linux_sysctl_osrelease, "A", 109 "Linux kernel OS release"); 110 111 static int linux_oss_version = 0x030600; 112 113 static int 114 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 115 { 116 int oss_version; 117 int error; 118 119 oss_version = linux_get_oss_version(req->td); 120 error = sysctl_handle_int(oidp, &oss_version, 0, req); 121 if (error || req->newptr == NULL) 122 return (error); 123 error = linux_set_oss_version(req->td, oss_version); 124 return (error); 125 } 126 127 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 128 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 129 0, 0, linux_sysctl_oss_version, "I", 130 "Linux OSS version"); 131 132 /* 133 * Map the osrelease into integer 134 */ 135 static int 136 linux_map_osrel(char *osrelease, int *osrel) 137 { 138 char *sep, *eosrelease; 139 int len, v0, v1, v2, v; 140 141 len = strlen(osrelease); 142 eosrelease = osrelease + len; 143 v0 = strtol(osrelease, &sep, 10); 144 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 145 return (EINVAL); 146 osrelease = sep + 1; 147 v1 = strtol(osrelease, &sep, 10); 148 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 149 return (EINVAL); 150 osrelease = sep + 1; 151 v2 = strtol(osrelease, &sep, 10); 152 if (osrelease == sep || sep != eosrelease) 153 return (EINVAL); 154 155 v = v0 * 1000000 + v1 * 1000 + v2; 156 if (v < 1000000) 157 return (EINVAL); 158 159 *osrel = v; 160 return (0); 161 } 162 163 /* 164 * Returns holding the prison mutex if return non-NULL. 165 */ 166 static struct linux_prison * 167 linux_get_prison(struct thread *td, struct prison **prp) 168 { 169 struct prison *pr; 170 struct linux_prison *lpr; 171 172 KASSERT(td == curthread, ("linux_get_prison() called on !curthread")); 173 *prp = pr = td->td_ucred->cr_prison; 174 if (pr == NULL || !linux_osd_jail_slot) 175 return (NULL); 176 mtx_lock(&pr->pr_mtx); 177 lpr = osd_jail_get(pr, linux_osd_jail_slot); 178 if (lpr == NULL) 179 mtx_unlock(&pr->pr_mtx); 180 return (lpr); 181 } 182 183 /* 184 * Ensure a prison has its own Linux info. The prison should be locked on 185 * entrance and will be locked on exit (though it may get unlocked in the 186 * interrim). 187 */ 188 static int 189 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 190 { 191 struct linux_prison *lpr, *nlpr; 192 int error; 193 194 /* If this prison already has Linux info, return that. */ 195 error = 0; 196 mtx_assert(&pr->pr_mtx, MA_OWNED); 197 lpr = osd_jail_get(pr, linux_osd_jail_slot); 198 if (lpr != NULL) 199 goto done; 200 /* 201 * Allocate a new info record. Then check again, in case something 202 * changed during the allocation. 203 */ 204 mtx_unlock(&pr->pr_mtx); 205 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 206 mtx_lock(&pr->pr_mtx); 207 lpr = osd_jail_get(pr, linux_osd_jail_slot); 208 if (lpr != NULL) { 209 free(nlpr, M_PRISON); 210 goto done; 211 } 212 error = osd_jail_set(pr, linux_osd_jail_slot, nlpr); 213 if (error) 214 free(nlpr, M_PRISON); 215 else { 216 lpr = nlpr; 217 mtx_lock(&osname_lock); 218 strncpy(lpr->pr_osname, linux_osname, LINUX_MAX_UTSNAME); 219 strncpy(lpr->pr_osrelease, linux_osrelease, LINUX_MAX_UTSNAME); 220 lpr->pr_oss_version = linux_oss_version; 221 lpr->pr_osrel = linux_osrel; 222 mtx_unlock(&osname_lock); 223 } 224 done: 225 if (lprp != NULL) 226 *lprp = lpr; 227 return (error); 228 } 229 230 /* 231 * Jail OSD methods for Linux prison data. 232 */ 233 static int 234 linux_prison_create(void *obj, void *data) 235 { 236 int error; 237 struct prison *pr = obj; 238 struct vfsoptlist *opts = data; 239 240 if (vfs_flagopt(opts, "nolinux", NULL, 0)) 241 return (0); 242 /* 243 * Inherit a prison's initial values from its parent 244 * (different from NULL which also inherits changes). 245 */ 246 mtx_lock(&pr->pr_mtx); 247 error = linux_alloc_prison(pr, NULL); 248 mtx_unlock(&pr->pr_mtx); 249 return (error); 250 } 251 252 static int 253 linux_prison_check(void *obj __unused, void *data) 254 { 255 struct vfsoptlist *opts = data; 256 char *osname, *osrelease; 257 int error, len, oss_version; 258 259 /* Check that the parameters are correct. */ 260 (void)vfs_flagopt(opts, "linux", NULL, 0); 261 (void)vfs_flagopt(opts, "nolinux", NULL, 0); 262 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 263 if (error != ENOENT) { 264 if (error != 0) 265 return (error); 266 if (len == 0 || osname[len - 1] != '\0') 267 return (EINVAL); 268 if (len > LINUX_MAX_UTSNAME) { 269 vfs_opterror(opts, "linux.osname too long"); 270 return (ENAMETOOLONG); 271 } 272 } 273 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 274 if (error != ENOENT) { 275 if (error != 0) 276 return (error); 277 if (len == 0 || osrelease[len - 1] != '\0') 278 return (EINVAL); 279 if (len > LINUX_MAX_UTSNAME) { 280 vfs_opterror(opts, "linux.osrelease too long"); 281 return (ENAMETOOLONG); 282 } 283 } 284 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 285 sizeof(oss_version)); 286 return (error == ENOENT ? 0 : error); 287 } 288 289 static int 290 linux_prison_set(void *obj, void *data) 291 { 292 struct linux_prison *lpr; 293 struct prison *pr = obj; 294 struct vfsoptlist *opts = data; 295 char *osname, *osrelease; 296 int error, gotversion, len, nolinux, oss_version, yeslinux; 297 298 /* Set the parameters, which should be correct. */ 299 yeslinux = vfs_flagopt(opts, "linux", NULL, 0); 300 nolinux = vfs_flagopt(opts, "nolinux", NULL, 0); 301 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 302 if (error == ENOENT) 303 osname = NULL; 304 else 305 yeslinux = 1; 306 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 307 if (error == ENOENT) 308 osrelease = NULL; 309 else 310 yeslinux = 1; 311 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 312 sizeof(oss_version)); 313 gotversion = error == 0; 314 yeslinux |= gotversion; 315 if (nolinux) { 316 /* "nolinux": inherit the parent's Linux info. */ 317 mtx_lock(&pr->pr_mtx); 318 osd_jail_del(pr, linux_osd_jail_slot); 319 mtx_unlock(&pr->pr_mtx); 320 } else if (yeslinux) { 321 /* 322 * "linux" or "linux.*": 323 * the prison gets its own Linux info. 324 */ 325 mtx_lock(&pr->pr_mtx); 326 error = linux_alloc_prison(pr, &lpr); 327 if (error) { 328 mtx_unlock(&pr->pr_mtx); 329 return (error); 330 } 331 if (osrelease) { 332 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 333 if (error) { 334 mtx_unlock(&pr->pr_mtx); 335 return (error); 336 } 337 strlcpy(lpr->pr_osrelease, osrelease, 338 LINUX_MAX_UTSNAME); 339 } 340 if (osname) 341 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 342 if (gotversion) 343 lpr->pr_oss_version = oss_version; 344 mtx_unlock(&pr->pr_mtx); 345 } 346 return (0); 347 } 348 349 SYSCTL_JAIL_PARAM_NODE(linux, "Jail Linux parameters"); 350 SYSCTL_JAIL_PARAM(, nolinux, CTLTYPE_INT | CTLFLAG_RW, 351 "BN", "Jail w/ no Linux parameters"); 352 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 353 "Jail Linux kernel OS name"); 354 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 355 "Jail Linux kernel OS release"); 356 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 357 "I", "Jail Linux OSS version"); 358 359 static int 360 linux_prison_get(void *obj, void *data) 361 { 362 struct linux_prison *lpr; 363 struct prison *pr = obj; 364 struct vfsoptlist *opts = data; 365 int error, i; 366 367 mtx_lock(&pr->pr_mtx); 368 /* Tell whether this prison has its own Linux info. */ 369 lpr = osd_jail_get(pr, linux_osd_jail_slot); 370 i = lpr != NULL; 371 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 372 if (error != 0 && error != ENOENT) 373 goto done; 374 i = !i; 375 error = vfs_setopt(opts, "nolinux", &i, sizeof(i)); 376 if (error != 0 && error != ENOENT) 377 goto done; 378 /* 379 * It's kind of bogus to give the root info, but leave it to the caller 380 * to check the above flag. 381 */ 382 if (lpr != NULL) { 383 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 384 if (error != 0 && error != ENOENT) 385 goto done; 386 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 387 if (error != 0 && error != ENOENT) 388 goto done; 389 error = vfs_setopt(opts, "linux.oss_version", 390 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 391 if (error != 0 && error != ENOENT) 392 goto done; 393 } else { 394 mtx_lock(&osname_lock); 395 error = vfs_setopts(opts, "linux.osname", linux_osname); 396 if (error != 0 && error != ENOENT) 397 goto done; 398 error = vfs_setopts(opts, "linux.osrelease", linux_osrelease); 399 if (error != 0 && error != ENOENT) 400 goto done; 401 error = vfs_setopt(opts, "linux.oss_version", 402 &linux_oss_version, sizeof(linux_oss_version)); 403 if (error != 0 && error != ENOENT) 404 goto done; 405 mtx_unlock(&osname_lock); 406 } 407 error = 0; 408 409 done: 410 mtx_unlock(&pr->pr_mtx); 411 return (error); 412 } 413 414 static void 415 linux_prison_destructor(void *data) 416 { 417 418 free(data, M_PRISON); 419 } 420 421 void 422 linux_osd_jail_register(void) 423 { 424 struct prison *pr; 425 osd_method_t methods[PR_MAXMETHOD] = { 426 [PR_METHOD_CREATE] = linux_prison_create, 427 [PR_METHOD_GET] = linux_prison_get, 428 [PR_METHOD_SET] = linux_prison_set, 429 [PR_METHOD_CHECK] = linux_prison_check 430 }; 431 432 linux_osd_jail_slot = 433 osd_jail_register(linux_prison_destructor, methods); 434 if (linux_osd_jail_slot > 0) { 435 /* Copy the system linux info to any current prisons. */ 436 sx_xlock(&allprison_lock); 437 TAILQ_FOREACH(pr, &allprison, pr_list) { 438 mtx_lock(&pr->pr_mtx); 439 (void)linux_alloc_prison(pr, NULL); 440 mtx_unlock(&pr->pr_mtx); 441 } 442 sx_xunlock(&allprison_lock); 443 } 444 } 445 446 void 447 linux_osd_jail_deregister(void) 448 { 449 450 if (linux_osd_jail_slot) 451 osd_jail_deregister(linux_osd_jail_slot); 452 } 453 454 void 455 linux_get_osname(struct thread *td, char *dst) 456 { 457 struct prison *pr; 458 struct linux_prison *lpr; 459 460 lpr = linux_get_prison(td, &pr); 461 if (lpr != NULL) { 462 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 463 mtx_unlock(&pr->pr_mtx); 464 } else { 465 mtx_lock(&osname_lock); 466 bcopy(linux_osname, dst, LINUX_MAX_UTSNAME); 467 mtx_unlock(&osname_lock); 468 } 469 } 470 471 int 472 linux_set_osname(struct thread *td, char *osname) 473 { 474 struct prison *pr; 475 struct linux_prison *lpr; 476 477 lpr = linux_get_prison(td, &pr); 478 if (lpr != NULL) { 479 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 480 mtx_unlock(&pr->pr_mtx); 481 } else { 482 mtx_lock(&osname_lock); 483 strcpy(linux_osname, osname); 484 mtx_unlock(&osname_lock); 485 } 486 487 return (0); 488 } 489 490 void 491 linux_get_osrelease(struct thread *td, char *dst) 492 { 493 struct prison *pr; 494 struct linux_prison *lpr; 495 496 lpr = linux_get_prison(td, &pr); 497 if (lpr != NULL) { 498 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 499 mtx_unlock(&pr->pr_mtx); 500 } else { 501 mtx_lock(&osname_lock); 502 bcopy(linux_osrelease, dst, LINUX_MAX_UTSNAME); 503 mtx_unlock(&osname_lock); 504 } 505 } 506 507 int 508 linux_kernver(struct thread *td) 509 { 510 struct prison *pr; 511 struct linux_prison *lpr; 512 int osrel; 513 514 lpr = linux_get_prison(td, &pr); 515 if (lpr != NULL) { 516 osrel = lpr->pr_osrel; 517 mtx_unlock(&pr->pr_mtx); 518 } else 519 osrel = linux_osrel; 520 return (osrel); 521 } 522 523 int 524 linux_set_osrelease(struct thread *td, char *osrelease) 525 { 526 struct prison *pr; 527 struct linux_prison *lpr; 528 int error; 529 530 lpr = linux_get_prison(td, &pr); 531 if (lpr != NULL) { 532 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 533 if (error) { 534 mtx_unlock(&pr->pr_mtx); 535 return (error); 536 } 537 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 538 mtx_unlock(&pr->pr_mtx); 539 } else { 540 mtx_lock(&osname_lock); 541 error = linux_map_osrel(osrelease, &linux_osrel); 542 if (error) { 543 mtx_unlock(&osname_lock); 544 return (error); 545 } 546 strcpy(linux_osrelease, osrelease); 547 mtx_unlock(&osname_lock); 548 } 549 550 return (0); 551 } 552 553 int 554 linux_get_oss_version(struct thread *td) 555 { 556 struct prison *pr; 557 struct linux_prison *lpr; 558 int version; 559 560 lpr = linux_get_prison(td, &pr); 561 if (lpr != NULL) { 562 version = lpr->pr_oss_version; 563 mtx_unlock(&pr->pr_mtx); 564 } else 565 version = linux_oss_version; 566 return (version); 567 } 568 569 int 570 linux_set_oss_version(struct thread *td, int oss_version) 571 { 572 struct prison *pr; 573 struct linux_prison *lpr; 574 575 lpr = linux_get_prison(td, &pr); 576 if (lpr != NULL) { 577 lpr->pr_oss_version = oss_version; 578 mtx_unlock(&pr->pr_mtx); 579 } else { 580 mtx_lock(&osname_lock); 581 linux_oss_version = oss_version; 582 mtx_unlock(&osname_lock); 583 } 584 585 return (0); 586 } 587 588 #if defined(DEBUG) || defined(KTR) 589 590 u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))]; 591 592 static int 593 linux_debug(int syscall, int toggle, int global) 594 { 595 596 if (global) { 597 char c = toggle ? 0 : 0xff; 598 599 memset(linux_debug_map, c, sizeof(linux_debug_map)); 600 return (0); 601 } 602 if (syscall < 0 || syscall >= LINUX_SYS_MAXSYSCALL) 603 return (EINVAL); 604 if (toggle) 605 clrbit(linux_debug_map, syscall); 606 else 607 setbit(linux_debug_map, syscall); 608 return (0); 609 } 610 611 /* 612 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 613 * 614 * E.g.: sysctl linux.debug=21.0 615 * 616 * As a special case, syscall "all" will apply to all syscalls globally. 617 */ 618 #define LINUX_MAX_DEBUGSTR 16 619 static int 620 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 621 { 622 char value[LINUX_MAX_DEBUGSTR], *p; 623 int error, sysc, toggle; 624 int global = 0; 625 626 value[0] = '\0'; 627 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 628 if (error || req->newptr == NULL) 629 return (error); 630 for (p = value; *p != '\0' && *p != '.'; p++); 631 if (*p == '\0') 632 return (EINVAL); 633 *p++ = '\0'; 634 sysc = strtol(value, NULL, 0); 635 toggle = strtol(p, NULL, 0); 636 if (strcmp(value, "all") == 0) 637 global = 1; 638 error = linux_debug(sysc, toggle, global); 639 return (error); 640 } 641 642 SYSCTL_PROC(_compat_linux, OID_AUTO, debug, 643 CTLTYPE_STRING | CTLFLAG_RW, 644 0, 0, linux_sysctl_debug, "A", 645 "Linux debugging control"); 646 647 #endif /* DEBUG || KTR */ 648