1 /*- 2 * Copyright (c) 1999 Marcel Moolenaar 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/kernel.h> 34 #include <sys/systm.h> 35 #include <sys/sysctl.h> 36 #include <sys/proc.h> 37 #include <sys/malloc.h> 38 #include <sys/mount.h> 39 #include <sys/jail.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/sx.h> 43 44 #include "opt_compat.h" 45 46 #ifdef COMPAT_LINUX32 47 #include <machine/../linux32/linux.h> 48 #else 49 #include <machine/../linux/linux.h> 50 #endif 51 #include <compat/linux/linux_mib.h> 52 53 struct linux_prison { 54 char pr_osname[LINUX_MAX_UTSNAME]; 55 char pr_osrelease[LINUX_MAX_UTSNAME]; 56 int pr_oss_version; 57 int pr_osrel; 58 }; 59 60 static struct linux_prison lprison0 = { 61 .pr_osname = "Linux", 62 .pr_osrelease = "2.6.16", 63 .pr_oss_version = 0x030600, 64 .pr_osrel = 2006016 65 }; 66 67 static unsigned linux_osd_jail_slot; 68 69 SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, 70 "Linux mode"); 71 72 static int 73 linux_sysctl_osname(SYSCTL_HANDLER_ARGS) 74 { 75 char osname[LINUX_MAX_UTSNAME]; 76 int error; 77 78 linux_get_osname(req->td, osname); 79 error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); 80 if (error || req->newptr == NULL) 81 return (error); 82 error = linux_set_osname(req->td, osname); 83 return (error); 84 } 85 86 SYSCTL_PROC(_compat_linux, OID_AUTO, osname, 87 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 88 0, 0, linux_sysctl_osname, "A", 89 "Linux kernel OS name"); 90 91 static int 92 linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) 93 { 94 char osrelease[LINUX_MAX_UTSNAME]; 95 int error; 96 97 linux_get_osrelease(req->td, osrelease); 98 error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); 99 if (error || req->newptr == NULL) 100 return (error); 101 error = linux_set_osrelease(req->td, osrelease); 102 return (error); 103 } 104 105 SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease, 106 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 107 0, 0, linux_sysctl_osrelease, "A", 108 "Linux kernel OS release"); 109 110 static int 111 linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) 112 { 113 int oss_version; 114 int error; 115 116 oss_version = linux_get_oss_version(req->td); 117 error = sysctl_handle_int(oidp, &oss_version, 0, req); 118 if (error || req->newptr == NULL) 119 return (error); 120 error = linux_set_oss_version(req->td, oss_version); 121 return (error); 122 } 123 124 SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version, 125 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, 126 0, 0, linux_sysctl_oss_version, "I", 127 "Linux OSS version"); 128 129 /* 130 * Map the osrelease into integer 131 */ 132 static int 133 linux_map_osrel(char *osrelease, int *osrel) 134 { 135 char *sep, *eosrelease; 136 int len, v0, v1, v2, v; 137 138 len = strlen(osrelease); 139 eosrelease = osrelease + len; 140 v0 = strtol(osrelease, &sep, 10); 141 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 142 return (EINVAL); 143 osrelease = sep + 1; 144 v1 = strtol(osrelease, &sep, 10); 145 if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') 146 return (EINVAL); 147 osrelease = sep + 1; 148 v2 = strtol(osrelease, &sep, 10); 149 if (osrelease == sep || sep != eosrelease) 150 return (EINVAL); 151 152 v = v0 * 1000000 + v1 * 1000 + v2; 153 if (v < 1000000) 154 return (EINVAL); 155 156 *osrel = v; 157 return (0); 158 } 159 160 /* 161 * Find a prison with Linux info. 162 * Return the Linux info and the (locked) prison. 163 */ 164 static struct linux_prison * 165 linux_find_prison(struct prison *spr, struct prison **prp) 166 { 167 struct prison *pr; 168 struct linux_prison *lpr; 169 170 if (!linux_osd_jail_slot) 171 /* In case osd_register failed. */ 172 spr = &prison0; 173 for (pr = spr;; pr = pr->pr_parent) { 174 mtx_lock(&pr->pr_mtx); 175 lpr = (pr == &prison0) 176 ? &lprison0 177 : osd_jail_get(pr, linux_osd_jail_slot); 178 if (lpr != NULL) 179 break; 180 mtx_unlock(&pr->pr_mtx); 181 } 182 *prp = pr; 183 return (lpr); 184 } 185 186 /* 187 * Ensure a prison has its own Linux info. If lprp is non-null, point it to 188 * the Linux info and lock the prison. 189 */ 190 static int 191 linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) 192 { 193 struct prison *ppr; 194 struct linux_prison *lpr, *nlpr; 195 int error; 196 197 /* If this prison already has Linux info, return that. */ 198 error = 0; 199 lpr = linux_find_prison(pr, &ppr); 200 if (ppr == pr) 201 goto done; 202 /* 203 * Allocate a new info record. Then check again, in case something 204 * changed during the allocation. 205 */ 206 mtx_unlock(&ppr->pr_mtx); 207 nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK); 208 lpr = linux_find_prison(pr, &ppr); 209 if (ppr == pr) { 210 free(nlpr, M_PRISON); 211 goto done; 212 } 213 /* Inherit the initial values from the ancestor. */ 214 mtx_lock(&pr->pr_mtx); 215 error = osd_jail_set(pr, linux_osd_jail_slot, nlpr); 216 if (error == 0) { 217 bcopy(lpr, nlpr, sizeof(*lpr)); 218 lpr = nlpr; 219 } else { 220 free(nlpr, M_PRISON); 221 lpr = NULL; 222 } 223 mtx_unlock(&ppr->pr_mtx); 224 done: 225 if (lprp != NULL) 226 *lprp = lpr; 227 else 228 mtx_unlock(&pr->pr_mtx); 229 return (error); 230 } 231 232 /* 233 * Jail OSD methods for Linux prison data. 234 */ 235 static int 236 linux_prison_create(void *obj, void *data) 237 { 238 struct prison *pr = obj; 239 struct vfsoptlist *opts = data; 240 int jsys; 241 242 if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && 243 jsys == JAIL_SYS_INHERIT) 244 return (0); 245 /* 246 * Inherit a prison's initial values from its parent 247 * (different from JAIL_SYS_INHERIT which also inherits changes). 248 */ 249 return linux_alloc_prison(pr, NULL); 250 } 251 252 static int 253 linux_prison_check(void *obj __unused, void *data) 254 { 255 struct vfsoptlist *opts = data; 256 char *osname, *osrelease; 257 int error, jsys, len, osrel, oss_version; 258 259 /* Check that the parameters are correct. */ 260 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 261 if (error != ENOENT) { 262 if (error != 0) 263 return (error); 264 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 265 return (EINVAL); 266 } 267 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 268 if (error != ENOENT) { 269 if (error != 0) 270 return (error); 271 if (len == 0 || osname[len - 1] != '\0') 272 return (EINVAL); 273 if (len > LINUX_MAX_UTSNAME) { 274 vfs_opterror(opts, "linux.osname too long"); 275 return (ENAMETOOLONG); 276 } 277 } 278 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 279 if (error != ENOENT) { 280 if (error != 0) 281 return (error); 282 if (len == 0 || osrelease[len - 1] != '\0') 283 return (EINVAL); 284 if (len > LINUX_MAX_UTSNAME) { 285 vfs_opterror(opts, "linux.osrelease too long"); 286 return (ENAMETOOLONG); 287 } 288 error = linux_map_osrel(osrelease, &osrel); 289 if (error != 0) { 290 vfs_opterror(opts, "linux.osrelease format error"); 291 return (error); 292 } 293 } 294 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 295 sizeof(oss_version)); 296 return (error == ENOENT ? 0 : error); 297 } 298 299 static int 300 linux_prison_set(void *obj, void *data) 301 { 302 struct linux_prison *lpr; 303 struct prison *pr = obj; 304 struct vfsoptlist *opts = data; 305 char *osname, *osrelease; 306 int error, gotversion, jsys, len, oss_version; 307 308 /* Set the parameters, which should be correct. */ 309 error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); 310 if (error == ENOENT) 311 jsys = -1; 312 error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); 313 if (error == ENOENT) 314 osname = NULL; 315 else 316 jsys = JAIL_SYS_NEW; 317 error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); 318 if (error == ENOENT) 319 osrelease = NULL; 320 else 321 jsys = JAIL_SYS_NEW; 322 error = vfs_copyopt(opts, "linux.oss_version", &oss_version, 323 sizeof(oss_version)); 324 if (error == ENOENT) 325 gotversion = 0; 326 else { 327 gotversion = 1; 328 jsys = JAIL_SYS_NEW; 329 } 330 switch (jsys) { 331 case JAIL_SYS_INHERIT: 332 /* "linux=inherit": inherit the parent's Linux info. */ 333 mtx_lock(&pr->pr_mtx); 334 osd_jail_del(pr, linux_osd_jail_slot); 335 mtx_unlock(&pr->pr_mtx); 336 break; 337 case JAIL_SYS_NEW: 338 /* 339 * "linux=new" or "linux.*": 340 * the prison gets its own Linux info. 341 */ 342 error = linux_alloc_prison(pr, &lpr); 343 if (error) { 344 mtx_unlock(&pr->pr_mtx); 345 return (error); 346 } 347 if (osrelease) { 348 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 349 if (error) { 350 mtx_unlock(&pr->pr_mtx); 351 return (error); 352 } 353 strlcpy(lpr->pr_osrelease, osrelease, 354 LINUX_MAX_UTSNAME); 355 } 356 if (osname) 357 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 358 if (gotversion) 359 lpr->pr_oss_version = oss_version; 360 mtx_unlock(&pr->pr_mtx); 361 } 362 return (0); 363 } 364 365 SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters"); 366 SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME, 367 "Jail Linux kernel OS name"); 368 SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME, 369 "Jail Linux kernel OS release"); 370 SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW, 371 "I", "Jail Linux OSS version"); 372 373 static int 374 linux_prison_get(void *obj, void *data) 375 { 376 struct linux_prison *lpr; 377 struct prison *ppr; 378 struct prison *pr = obj; 379 struct vfsoptlist *opts = data; 380 int error, i; 381 382 static int version0; 383 384 /* See if this prison is the one with the Linux info. */ 385 lpr = linux_find_prison(pr, &ppr); 386 i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 387 error = vfs_setopt(opts, "linux", &i, sizeof(i)); 388 if (error != 0 && error != ENOENT) 389 goto done; 390 if (i) { 391 error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); 392 if (error != 0 && error != ENOENT) 393 goto done; 394 error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); 395 if (error != 0 && error != ENOENT) 396 goto done; 397 error = vfs_setopt(opts, "linux.oss_version", 398 &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); 399 if (error != 0 && error != ENOENT) 400 goto done; 401 } else { 402 /* 403 * If this prison is inheriting its Linux info, report 404 * empty/zero parameters. 405 */ 406 error = vfs_setopts(opts, "linux.osname", ""); 407 if (error != 0 && error != ENOENT) 408 goto done; 409 error = vfs_setopts(opts, "linux.osrelease", ""); 410 if (error != 0 && error != ENOENT) 411 goto done; 412 error = vfs_setopt(opts, "linux.oss_version", &version0, 413 sizeof(lpr->pr_oss_version)); 414 if (error != 0 && error != ENOENT) 415 goto done; 416 } 417 error = 0; 418 419 done: 420 mtx_unlock(&ppr->pr_mtx); 421 return (error); 422 } 423 424 static void 425 linux_prison_destructor(void *data) 426 { 427 428 free(data, M_PRISON); 429 } 430 431 void 432 linux_osd_jail_register(void) 433 { 434 struct prison *pr; 435 osd_method_t methods[PR_MAXMETHOD] = { 436 [PR_METHOD_CREATE] = linux_prison_create, 437 [PR_METHOD_GET] = linux_prison_get, 438 [PR_METHOD_SET] = linux_prison_set, 439 [PR_METHOD_CHECK] = linux_prison_check 440 }; 441 442 linux_osd_jail_slot = 443 osd_jail_register(linux_prison_destructor, methods); 444 if (linux_osd_jail_slot > 0) { 445 /* Copy the system linux info to any current prisons. */ 446 sx_xlock(&allprison_lock); 447 TAILQ_FOREACH(pr, &allprison, pr_list) 448 (void)linux_alloc_prison(pr, NULL); 449 sx_xunlock(&allprison_lock); 450 } 451 } 452 453 void 454 linux_osd_jail_deregister(void) 455 { 456 457 if (linux_osd_jail_slot) 458 osd_jail_deregister(linux_osd_jail_slot); 459 } 460 461 void 462 linux_get_osname(struct thread *td, char *dst) 463 { 464 struct prison *pr; 465 struct linux_prison *lpr; 466 467 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 468 bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); 469 mtx_unlock(&pr->pr_mtx); 470 } 471 472 int 473 linux_set_osname(struct thread *td, char *osname) 474 { 475 struct prison *pr; 476 struct linux_prison *lpr; 477 478 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 479 strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); 480 mtx_unlock(&pr->pr_mtx); 481 return (0); 482 } 483 484 void 485 linux_get_osrelease(struct thread *td, char *dst) 486 { 487 struct prison *pr; 488 struct linux_prison *lpr; 489 490 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 491 bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); 492 mtx_unlock(&pr->pr_mtx); 493 } 494 495 int 496 linux_kernver(struct thread *td) 497 { 498 struct prison *pr; 499 struct linux_prison *lpr; 500 int osrel; 501 502 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 503 osrel = lpr->pr_osrel; 504 mtx_unlock(&pr->pr_mtx); 505 return (osrel); 506 } 507 508 int 509 linux_set_osrelease(struct thread *td, char *osrelease) 510 { 511 struct prison *pr; 512 struct linux_prison *lpr; 513 int error; 514 515 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 516 error = linux_map_osrel(osrelease, &lpr->pr_osrel); 517 if (error == 0) 518 strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); 519 mtx_unlock(&pr->pr_mtx); 520 return (error); 521 } 522 523 int 524 linux_get_oss_version(struct thread *td) 525 { 526 struct prison *pr; 527 struct linux_prison *lpr; 528 int version; 529 530 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 531 version = lpr->pr_oss_version; 532 mtx_unlock(&pr->pr_mtx); 533 return (version); 534 } 535 536 int 537 linux_set_oss_version(struct thread *td, int oss_version) 538 { 539 struct prison *pr; 540 struct linux_prison *lpr; 541 542 lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); 543 lpr->pr_oss_version = oss_version; 544 mtx_unlock(&pr->pr_mtx); 545 return (0); 546 } 547 548 #if defined(DEBUG) || defined(KTR) 549 550 u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))]; 551 552 static int 553 linux_debug(int syscall, int toggle, int global) 554 { 555 556 if (global) { 557 char c = toggle ? 0 : 0xff; 558 559 memset(linux_debug_map, c, sizeof(linux_debug_map)); 560 return (0); 561 } 562 if (syscall < 0 || syscall >= LINUX_SYS_MAXSYSCALL) 563 return (EINVAL); 564 if (toggle) 565 clrbit(linux_debug_map, syscall); 566 else 567 setbit(linux_debug_map, syscall); 568 return (0); 569 } 570 571 /* 572 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 573 * 574 * E.g.: sysctl linux.debug=21.0 575 * 576 * As a special case, syscall "all" will apply to all syscalls globally. 577 */ 578 #define LINUX_MAX_DEBUGSTR 16 579 static int 580 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 581 { 582 char value[LINUX_MAX_DEBUGSTR], *p; 583 int error, sysc, toggle; 584 int global = 0; 585 586 value[0] = '\0'; 587 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 588 if (error || req->newptr == NULL) 589 return (error); 590 for (p = value; *p != '\0' && *p != '.'; p++); 591 if (*p == '\0') 592 return (EINVAL); 593 *p++ = '\0'; 594 sysc = strtol(value, NULL, 0); 595 toggle = strtol(p, NULL, 0); 596 if (strcmp(value, "all") == 0) 597 global = 1; 598 error = linux_debug(sysc, toggle, global); 599 return (error); 600 } 601 602 SYSCTL_PROC(_compat_linux, OID_AUTO, debug, 603 CTLTYPE_STRING | CTLFLAG_RW, 604 0, 0, linux_sysctl_debug, "A", 605 "Linux debugging control"); 606 607 #endif /* DEBUG || KTR */ 608