1 /*- 2 * Copyright (c) 1999 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/systm.h> 35 #include <sys/sysctl.h> 36 #include <sys/proc.h> 37 #include <sys/malloc.h> 38 #include <sys/mount.h> 39 #include <sys/jail.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/sx.h> 43 44 #include "opt_compat.h" 45 46 #ifdef COMPAT_LINUX32 47 #include <machine/../linux32/linux.h> 48 #else 49 #include <machine/../linux/linux.h> 50 #endif 51 #include <compat/linux/linux_mib.h> 52 53 struct linux_prison { 54 char pr_osname[LINUX_MAX_UTSNAME]; 55 char pr_osrelease[LINUX_MAX_UTSNAME]; 56 int pr_oss_version; 57 int pr_use_linux26; /* flag to determine whether to use 2.6 emulation */ 58 }; 59 60 static unsigned linux_osd_jail_slot; 61 62 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, 63 "Linux mode"); 64 65 static struct mtx osname_lock; 66 MTX_SYSINIT(linux_osname, &osname_lock, "linux osname", MTX_DEF); 67 68 static char linux_osname[LINUX_MAX_UTSNAME] = "Linux"; 69 70 static int 71 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 72 { 73 char osname[LINUX_MAX_UTSNAME]; 74 int error; 75 76 linux_get_osname(req->td, osname); 77 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 78 if (error || req->newptr == NULL) 79 return (error); 80 error = linux_set_osname(req->td, osname); 81 return (error); 82 } 83 84 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 85 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 86 0, 0, linux_sysctl_osname, "A", 87 "Linux kernel OS name"); 88 89 static char linux_osrelease[LINUX_MAX_UTSNAME] = "2.6.16"; 90 static int linux_use_linux26 = 1; 91 92 static int 93 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 94 { 95 char osrelease[LINUX_MAX_UTSNAME]; 96 int error; 97 98 linux_get_osrelease(req->td, osrelease); 99 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 100 if (error || req->newptr == NULL) 101 return (error); 102 error = linux_set_osrelease(req->td, osrelease); 103 return (error); 104 } 105 106 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 107 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 108 0, 0, linux_sysctl_osrelease, "A", 109 "Linux kernel OS release"); 110 111 static int linux_oss_version = 0x030600; 112 113 static int 114 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 115 { 116 int oss_version; 117 int error; 118 119 oss_version = linux_get_oss_version(req->td); 120 error = sysctl_handle_int(oidp, &oss_version, 0, req); 121 if (error || req->newptr == NULL) 122 return (error); 123 error = linux_set_oss_version(req->td, oss_version); 124 return (error); 125 } 126 127 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 128 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 129 0, 0, linux_sysctl_oss_version, "I", 130 "Linux OSS version"); 131 132 /* 133 * Returns holding the prison mutex if return non-NULL. 134 */ 135 static struct linux_prison * 136 linux_get_prison(struct thread *td, struct prison **prp) 137 { 138 struct prison *pr; 139 struct linux_prison *lpr; 140 141 KASSERT(td == curthread, ("linux_get_prison() called on !curthread")); 142 *prp = pr = td->td_ucred->cr_prison; 143 if (pr == NULL || !linux_osd_jail_slot) 144 return (NULL); 145 mtx_lock(&pr->pr_mtx); 146 lpr = osd_jail_get(pr, linux_osd_jail_slot); 147 if (lpr == NULL) 148 mtx_unlock(&pr->pr_mtx); 149 return (lpr); 150 } 151 152 /* 153 * Ensure a prison has its own Linux info. The prison should be locked on 154 * entrance and will be locked on exit (though it may get unlocked in the 155 * interrim). 156 */ 157 static int 158 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 159 { 160 struct linux_prison *lpr, *nlpr; 161 int error; 162 163 /* If this prison already has Linux info, return that. */ 164 error = 0; 165 mtx_assert(&pr->pr_mtx, MA_OWNED); 166 lpr = osd_jail_get(pr, linux_osd_jail_slot); 167 if (lpr != NULL) 168 goto done; 169 /* 170 * Allocate a new info record. Then check again, in case something 171 * changed during the allocation. 172 */ 173 mtx_unlock(&pr->pr_mtx); 174 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 175 mtx_lock(&pr->pr_mtx); 176 lpr = osd_jail_get(pr, linux_osd_jail_slot); 177 if (lpr != NULL) { 178 free(nlpr, M_PRISON); 179 goto done; 180 } 181 error = osd_jail_set(pr, linux_osd_jail_slot, nlpr); 182 if (error) 183 free(nlpr, M_PRISON); 184 else { 185 lpr = nlpr; 186 mtx_lock(&osname_lock); 187 strncpy(lpr->pr_osname, linux_osname, LINUX_MAX_UTSNAME); 188 strncpy(lpr->pr_osrelease, linux_osrelease, LINUX_MAX_UTSNAME); 189 lpr->pr_oss_version = linux_oss_version; 190 lpr->pr_use_linux26 = linux_use_linux26; 191 mtx_unlock(&osname_lock); 192 } 193 done: 194 if (lprp != NULL) 195 *lprp = lpr; 196 return (error); 197 } 198 199 /* 200 * Jail OSD methods for Linux prison data. 201 */ 202 static int 203 linux_prison_create(void *obj, void *data) 204 { 205 int error; 206 struct prison *pr = obj; 207 struct vfsoptlist *opts = data; 208 209 if (vfs_flagopt(opts, "nolinux", NULL, 0)) 210 return (0); 211 /* 212 * Inherit a prison's initial values from its parent 213 * (different from NULL which also inherits changes). 214 */ 215 mtx_lock(&pr->pr_mtx); 216 error = linux_alloc_prison(pr, NULL); 217 mtx_unlock(&pr->pr_mtx); 218 return (error); 219 } 220 221 static int 222 linux_prison_check(void *obj __unused, void *data) 223 { 224 struct vfsoptlist *opts = data; 225 char *osname, *osrelease; 226 int error, len, oss_version; 227 228 /* Check that the parameters are correct. */ 229 (void)vfs_flagopt(opts, "linux", NULL, 0); 230 (void)vfs_flagopt(opts, "nolinux", NULL, 0); 231 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 232 if (error != ENOENT) { 233 if (error != 0) 234 return (error); 235 if (len == 0 || osname[len - 1] != '\0') 236 return (EINVAL); 237 if (len > LINUX_MAX_UTSNAME) { 238 vfs_opterror(opts, "linux.osname too long"); 239 return (ENAMETOOLONG); 240 } 241 } 242 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 243 if (error != ENOENT) { 244 if (error != 0) 245 return (error); 246 if (len == 0 || osrelease[len - 1] != '\0') 247 return (EINVAL); 248 if (len > LINUX_MAX_UTSNAME) { 249 vfs_opterror(opts, "linux.osrelease too long"); 250 return (ENAMETOOLONG); 251 } 252 } 253 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 254 sizeof(oss_version)); 255 return (error == ENOENT ? 0 : error); 256 } 257 258 static int 259 linux_prison_set(void *obj, void *data) 260 { 261 struct linux_prison *lpr; 262 struct prison *pr = obj; 263 struct vfsoptlist *opts = data; 264 char *osname, *osrelease; 265 int error, gotversion, len, nolinux, oss_version, yeslinux; 266 267 /* Set the parameters, which should be correct. */ 268 yeslinux = vfs_flagopt(opts, "linux", NULL, 0); 269 nolinux = vfs_flagopt(opts, "nolinux", NULL, 0); 270 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 271 if (error == ENOENT) 272 osname = NULL; 273 else 274 yeslinux = 1; 275 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 276 if (error == ENOENT) 277 osrelease = NULL; 278 else 279 yeslinux = 1; 280 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 281 sizeof(oss_version)); 282 gotversion = error == 0; 283 yeslinux |= gotversion; 284 if (nolinux) { 285 /* "nolinux": inherit the parent's Linux info. */ 286 mtx_lock(&pr->pr_mtx); 287 osd_jail_del(pr, linux_osd_jail_slot); 288 mtx_unlock(&pr->pr_mtx); 289 } else if (yeslinux) { 290 /* 291 * "linux" or "linux.*": 292 * the prison gets its own Linux info. 293 */ 294 mtx_lock(&pr->pr_mtx); 295 error = linux_alloc_prison(pr, &lpr); 296 if (error) { 297 mtx_unlock(&pr->pr_mtx); 298 return (error); 299 } 300 if (osname) 301 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 302 if (osrelease) { 303 strlcpy(lpr->pr_osrelease, osrelease, 304 LINUX_MAX_UTSNAME); 305 lpr->pr_use_linux26 = strlen(osrelease) >= 3 && 306 osrelease[2] == '6'; 307 } 308 if (gotversion) 309 lpr->pr_oss_version = oss_version; 310 mtx_unlock(&pr->pr_mtx); 311 } 312 return (0); 313 } 314 315 SYSCTL_JAIL_PARAM_NODE(linux, "Jail Linux parameters"); 316 SYSCTL_JAIL_PARAM(, nolinux, CTLTYPE_INT | CTLFLAG_RW, 317 "BN", "Jail w/ no Linux parameters"); 318 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 319 "Jail Linux kernel OS name"); 320 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 321 "Jail Linux kernel OS release"); 322 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 323 "I", "Jail Linux OSS version"); 324 325 static int 326 linux_prison_get(void *obj, void *data) 327 { 328 struct linux_prison *lpr; 329 struct prison *pr = obj; 330 struct vfsoptlist *opts = data; 331 int error, i; 332 333 mtx_lock(&pr->pr_mtx); 334 /* Tell whether this prison has its own Linux info. */ 335 lpr = osd_jail_get(pr, linux_osd_jail_slot); 336 i = lpr != NULL; 337 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 338 if (error != 0 && error != ENOENT) 339 goto done; 340 i = !i; 341 error = vfs_setopt(opts, "nolinux", &i, sizeof(i)); 342 if (error != 0 && error != ENOENT) 343 goto done; 344 /* 345 * It's kind of bogus to give the root info, but leave it to the caller 346 * to check the above flag. 347 */ 348 if (lpr != NULL) { 349 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 350 if (error != 0 && error != ENOENT) 351 goto done; 352 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 353 if (error != 0 && error != ENOENT) 354 goto done; 355 error = vfs_setopt(opts, "linux.oss_version", 356 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 357 if (error != 0 && error != ENOENT) 358 goto done; 359 } else { 360 mtx_lock(&osname_lock); 361 error = vfs_setopts(opts, "linux.osname", linux_osname); 362 if (error != 0 && error != ENOENT) 363 goto done; 364 error = vfs_setopts(opts, "linux.osrelease", linux_osrelease); 365 if (error != 0 && error != ENOENT) 366 goto done; 367 error = vfs_setopt(opts, "linux.oss_version", 368 &linux_oss_version, sizeof(linux_oss_version)); 369 if (error != 0 && error != ENOENT) 370 goto done; 371 mtx_unlock(&osname_lock); 372 } 373 error = 0; 374 375 done: 376 mtx_unlock(&pr->pr_mtx); 377 return (error); 378 } 379 380 static void 381 linux_prison_destructor(void *data) 382 { 383 384 free(data, M_PRISON); 385 } 386 387 void 388 linux_osd_jail_register(void) 389 { 390 struct prison *pr; 391 osd_method_t methods[PR_MAXMETHOD] = { 392 [PR_METHOD_CREATE] = linux_prison_create, 393 [PR_METHOD_GET] = linux_prison_get, 394 [PR_METHOD_SET] = linux_prison_set, 395 [PR_METHOD_CHECK] = linux_prison_check 396 }; 397 398 linux_osd_jail_slot = 399 osd_jail_register(linux_prison_destructor, methods); 400 if (linux_osd_jail_slot > 0) { 401 /* Copy the system linux info to any current prisons. */ 402 sx_xlock(&allprison_lock); 403 TAILQ_FOREACH(pr, &allprison, pr_list) { 404 mtx_lock(&pr->pr_mtx); 405 (void)linux_alloc_prison(pr, NULL); 406 mtx_unlock(&pr->pr_mtx); 407 } 408 sx_xunlock(&allprison_lock); 409 } 410 } 411 412 void 413 linux_osd_jail_deregister(void) 414 { 415 416 if (linux_osd_jail_slot) 417 osd_jail_deregister(linux_osd_jail_slot); 418 } 419 420 void 421 linux_get_osname(struct thread *td, char *dst) 422 { 423 struct prison *pr; 424 struct linux_prison *lpr; 425 426 lpr = linux_get_prison(td, &pr); 427 if (lpr != NULL) { 428 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 429 mtx_unlock(&pr->pr_mtx); 430 } else { 431 mtx_lock(&osname_lock); 432 bcopy(linux_osname, dst, LINUX_MAX_UTSNAME); 433 mtx_unlock(&osname_lock); 434 } 435 } 436 437 int 438 linux_set_osname(struct thread *td, char *osname) 439 { 440 struct prison *pr; 441 struct linux_prison *lpr; 442 443 lpr = linux_get_prison(td, &pr); 444 if (lpr != NULL) { 445 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 446 mtx_unlock(&pr->pr_mtx); 447 } else { 448 mtx_lock(&osname_lock); 449 strcpy(linux_osname, osname); 450 mtx_unlock(&osname_lock); 451 } 452 453 return (0); 454 } 455 456 void 457 linux_get_osrelease(struct thread *td, char *dst) 458 { 459 struct prison *pr; 460 struct linux_prison *lpr; 461 462 lpr = linux_get_prison(td, &pr); 463 if (lpr != NULL) { 464 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 465 mtx_unlock(&pr->pr_mtx); 466 } else { 467 mtx_lock(&osname_lock); 468 bcopy(linux_osrelease, dst, LINUX_MAX_UTSNAME); 469 mtx_unlock(&osname_lock); 470 } 471 } 472 473 int 474 linux_use26(struct thread *td) 475 { 476 struct prison *pr; 477 struct linux_prison *lpr; 478 int use26; 479 480 lpr = linux_get_prison(td, &pr); 481 if (lpr != NULL) { 482 use26 = lpr->pr_use_linux26; 483 mtx_unlock(&pr->pr_mtx); 484 } else 485 use26 = linux_use_linux26; 486 return (use26); 487 } 488 489 int 490 linux_set_osrelease(struct thread *td, char *osrelease) 491 { 492 struct prison *pr; 493 struct linux_prison *lpr; 494 495 lpr = linux_get_prison(td, &pr); 496 if (lpr != NULL) { 497 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 498 lpr->pr_use_linux26 = 499 strlen(osrelease) >= 3 && osrelease[2] == '6'; 500 mtx_unlock(&pr->pr_mtx); 501 } else { 502 mtx_lock(&osname_lock); 503 strcpy(linux_osrelease, osrelease); 504 linux_use_linux26 = 505 strlen(osrelease) >= 3 && osrelease[2] == '6'; 506 mtx_unlock(&osname_lock); 507 } 508 509 return (0); 510 } 511 512 int 513 linux_get_oss_version(struct thread *td) 514 { 515 struct prison *pr; 516 struct linux_prison *lpr; 517 int version; 518 519 lpr = linux_get_prison(td, &pr); 520 if (lpr != NULL) { 521 version = lpr->pr_oss_version; 522 mtx_unlock(&pr->pr_mtx); 523 } else 524 version = linux_oss_version; 525 return (version); 526 } 527 528 int 529 linux_set_oss_version(struct thread *td, int oss_version) 530 { 531 struct prison *pr; 532 struct linux_prison *lpr; 533 534 lpr = linux_get_prison(td, &pr); 535 if (lpr != NULL) { 536 lpr->pr_oss_version = oss_version; 537 mtx_unlock(&pr->pr_mtx); 538 } else { 539 mtx_lock(&osname_lock); 540 linux_oss_version = oss_version; 541 mtx_unlock(&osname_lock); 542 } 543 544 return (0); 545 } 546 547 #if defined(DEBUG) || defined(KTR) 548 549 u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))]; 550 551 static int 552 linux_debug(int syscall, int toggle, int global) 553 { 554 555 if (global) { 556 char c = toggle ? 0 : 0xff; 557 558 memset(linux_debug_map, c, sizeof(linux_debug_map)); 559 return (0); 560 } 561 if (syscall < 0 || syscall >= LINUX_SYS_MAXSYSCALL) 562 return (EINVAL); 563 if (toggle) 564 clrbit(linux_debug_map, syscall); 565 else 566 setbit(linux_debug_map, syscall); 567 return (0); 568 } 569 570 /* 571 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 572 * 573 * E.g.: sysctl linux.debug=21.0 574 * 575 * As a special case, syscall "all" will apply to all syscalls globally. 576 */ 577 #define LINUX_MAX_DEBUGSTR 16 578 static int 579 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 580 { 581 char value[LINUX_MAX_DEBUGSTR], *p; 582 int error, sysc, toggle; 583 int global = 0; 584 585 value[0] = '\0'; 586 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 587 if (error || req->newptr == NULL) 588 return (error); 589 for (p = value; *p != '\0' && *p != '.'; p++); 590 if (*p == '\0') 591 return (EINVAL); 592 *p++ = '\0'; 593 sysc = strtol(value, NULL, 0); 594 toggle = strtol(p, NULL, 0); 595 if (strcmp(value, "all") == 0) 596 global = 1; 597 error = linux_debug(sysc, toggle, global); 598 return (error); 599 } 600 601 SYSCTL_PROC(_compat_linux, OID_AUTO, debug, 602 CTLTYPE_STRING | CTLFLAG_RW, 603 0, 0, linux_sysctl_debug, "A", 604 "Linux debugging control"); 605 606 #endif /* DEBUG || KTR */ 607