1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 */ 9 10 #include <sys/cdefs.h> 11 __FBSDID("$FreeBSD$"); 12 13 #include "opt_mac.h" 14 15 #include <sys/param.h> 16 #include <sys/types.h> 17 #include <sys/kernel.h> 18 #include <sys/systm.h> 19 #include <sys/errno.h> 20 #include <sys/sysproto.h> 21 #include <sys/malloc.h> 22 #include <sys/priv.h> 23 #include <sys/proc.h> 24 #include <sys/taskqueue.h> 25 #include <sys/fcntl.h> 26 #include <sys/jail.h> 27 #include <sys/lock.h> 28 #include <sys/mutex.h> 29 #include <sys/sx.h> 30 #include <sys/namei.h> 31 #include <sys/mount.h> 32 #include <sys/queue.h> 33 #include <sys/socket.h> 34 #include <sys/syscallsubr.h> 35 #include <sys/sysctl.h> 36 #include <sys/vnode.h> 37 #include <net/if.h> 38 #include <netinet/in.h> 39 40 #include <security/mac/mac_framework.h> 41 42 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 43 44 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 45 "Jail rules"); 46 47 int jail_set_hostname_allowed = 1; 48 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 49 &jail_set_hostname_allowed, 0, 50 "Processes in jail can set their hostnames"); 51 52 int jail_socket_unixiproute_only = 1; 53 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 54 &jail_socket_unixiproute_only, 0, 55 "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 56 57 int jail_sysvipc_allowed = 0; 58 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 59 &jail_sysvipc_allowed, 0, 60 "Processes in jail can use System V IPC primitives"); 61 62 static int jail_enforce_statfs = 2; 63 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 64 &jail_enforce_statfs, 0, 65 "Processes in jail cannot see all mounted file systems"); 66 67 int jail_allow_raw_sockets = 0; 68 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 69 &jail_allow_raw_sockets, 0, 70 "Prison root can create raw sockets"); 71 72 int jail_chflags_allowed = 0; 73 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 74 &jail_chflags_allowed, 0, 75 "Processes in jail can alter system file flags"); 76 77 int jail_mount_allowed = 0; 78 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 79 &jail_mount_allowed, 0, 80 "Processes in jail can mount/unmount jail-friendly file systems"); 81 82 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 83 struct prisonlist allprison; 84 struct sx allprison_lock; 85 int lastprid = 0; 86 int prisoncount = 0; 87 88 /* 89 * List of jail services. Protected by allprison_lock. 90 */ 91 TAILQ_HEAD(prison_services_head, prison_service); 92 static struct prison_services_head prison_services = 93 TAILQ_HEAD_INITIALIZER(prison_services); 94 static int prison_service_slots = 0; 95 96 struct prison_service { 97 prison_create_t ps_create; 98 prison_destroy_t ps_destroy; 99 int ps_slotno; 100 TAILQ_ENTRY(prison_service) ps_next; 101 char ps_name[0]; 102 }; 103 104 static void init_prison(void *); 105 static void prison_complete(void *context, int pending); 106 static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 107 108 static void 109 init_prison(void *data __unused) 110 { 111 112 sx_init(&allprison_lock, "allprison"); 113 LIST_INIT(&allprison); 114 } 115 116 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 117 118 /* 119 * struct jail_args { 120 * struct jail *jail; 121 * }; 122 */ 123 int 124 jail(struct thread *td, struct jail_args *uap) 125 { 126 struct nameidata nd; 127 struct prison *pr, *tpr; 128 struct prison_service *psrv; 129 struct jail j; 130 struct jail_attach_args jaa; 131 int vfslocked, error, tryprid; 132 133 error = copyin(uap->jail, &j, sizeof(j)); 134 if (error) 135 return (error); 136 if (j.version != 0) 137 return (EINVAL); 138 139 MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 140 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 141 pr->pr_ref = 1; 142 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 143 if (error) 144 goto e_killmtx; 145 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 146 pr->pr_path, td); 147 error = namei(&nd); 148 if (error) 149 goto e_killmtx; 150 vfslocked = NDHASGIANT(&nd); 151 pr->pr_root = nd.ni_vp; 152 VOP_UNLOCK(nd.ni_vp, 0); 153 NDFREE(&nd, NDF_ONLY_PNBUF); 154 VFS_UNLOCK_GIANT(vfslocked); 155 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 156 if (error) 157 goto e_dropvnref; 158 pr->pr_ip = j.ip_number; 159 pr->pr_linux = NULL; 160 pr->pr_securelevel = securelevel; 161 if (prison_service_slots == 0) 162 pr->pr_slots = NULL; 163 else { 164 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 165 M_PRISON, M_ZERO | M_WAITOK); 166 } 167 168 /* Determine next pr_id and add prison to allprison list. */ 169 sx_xlock(&allprison_lock); 170 tryprid = lastprid + 1; 171 if (tryprid == JAIL_MAX) 172 tryprid = 1; 173 next: 174 LIST_FOREACH(tpr, &allprison, pr_list) { 175 if (tpr->pr_id == tryprid) { 176 tryprid++; 177 if (tryprid == JAIL_MAX) { 178 sx_xunlock(&allprison_lock); 179 error = EAGAIN; 180 goto e_dropvnref; 181 } 182 goto next; 183 } 184 } 185 pr->pr_id = jaa.jid = lastprid = tryprid; 186 LIST_INSERT_HEAD(&allprison, pr, pr_list); 187 prisoncount++; 188 sx_downgrade(&allprison_lock); 189 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 190 psrv->ps_create(psrv, pr); 191 } 192 sx_sunlock(&allprison_lock); 193 194 error = jail_attach(td, &jaa); 195 if (error) 196 goto e_dropprref; 197 mtx_lock(&pr->pr_mtx); 198 pr->pr_ref--; 199 mtx_unlock(&pr->pr_mtx); 200 td->td_retval[0] = jaa.jid; 201 return (0); 202 e_dropprref: 203 sx_xlock(&allprison_lock); 204 LIST_REMOVE(pr, pr_list); 205 prisoncount--; 206 sx_downgrade(&allprison_lock); 207 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 208 psrv->ps_destroy(psrv, pr); 209 } 210 sx_sunlock(&allprison_lock); 211 e_dropvnref: 212 if (pr->pr_slots != NULL) 213 FREE(pr->pr_slots, M_PRISON); 214 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 215 vrele(pr->pr_root); 216 VFS_UNLOCK_GIANT(vfslocked); 217 e_killmtx: 218 mtx_destroy(&pr->pr_mtx); 219 FREE(pr, M_PRISON); 220 return (error); 221 } 222 223 /* 224 * struct jail_attach_args { 225 * int jid; 226 * }; 227 */ 228 int 229 jail_attach(struct thread *td, struct jail_attach_args *uap) 230 { 231 struct proc *p; 232 struct ucred *newcred, *oldcred; 233 struct prison *pr; 234 int vfslocked, error; 235 236 /* 237 * XXX: Note that there is a slight race here if two threads 238 * in the same privileged process attempt to attach to two 239 * different jails at the same time. It is important for 240 * user processes not to do this, or they might end up with 241 * a process root from one prison, but attached to the jail 242 * of another. 243 */ 244 error = priv_check(td, PRIV_JAIL_ATTACH); 245 if (error) 246 return (error); 247 248 p = td->td_proc; 249 sx_slock(&allprison_lock); 250 pr = prison_find(uap->jid); 251 if (pr == NULL) { 252 sx_sunlock(&allprison_lock); 253 return (EINVAL); 254 } 255 pr->pr_ref++; 256 mtx_unlock(&pr->pr_mtx); 257 sx_sunlock(&allprison_lock); 258 259 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 260 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 261 if ((error = change_dir(pr->pr_root, td)) != 0) 262 goto e_unlock; 263 #ifdef MAC 264 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 265 goto e_unlock; 266 #endif 267 VOP_UNLOCK(pr->pr_root, 0); 268 change_root(pr->pr_root, td); 269 VFS_UNLOCK_GIANT(vfslocked); 270 271 newcred = crget(); 272 PROC_LOCK(p); 273 oldcred = p->p_ucred; 274 setsugid(p); 275 crcopy(newcred, oldcred); 276 newcred->cr_prison = pr; 277 p->p_ucred = newcred; 278 PROC_UNLOCK(p); 279 crfree(oldcred); 280 return (0); 281 e_unlock: 282 VOP_UNLOCK(pr->pr_root, 0); 283 VFS_UNLOCK_GIANT(vfslocked); 284 mtx_lock(&pr->pr_mtx); 285 pr->pr_ref--; 286 mtx_unlock(&pr->pr_mtx); 287 return (error); 288 } 289 290 /* 291 * Returns a locked prison instance, or NULL on failure. 292 */ 293 struct prison * 294 prison_find(int prid) 295 { 296 struct prison *pr; 297 298 sx_assert(&allprison_lock, SX_LOCKED); 299 LIST_FOREACH(pr, &allprison, pr_list) { 300 if (pr->pr_id == prid) { 301 mtx_lock(&pr->pr_mtx); 302 if (pr->pr_ref == 0) { 303 mtx_unlock(&pr->pr_mtx); 304 break; 305 } 306 return (pr); 307 } 308 } 309 return (NULL); 310 } 311 312 void 313 prison_free(struct prison *pr) 314 { 315 316 mtx_lock(&pr->pr_mtx); 317 pr->pr_ref--; 318 if (pr->pr_ref == 0) { 319 mtx_unlock(&pr->pr_mtx); 320 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 321 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 322 return; 323 } 324 mtx_unlock(&pr->pr_mtx); 325 } 326 327 static void 328 prison_complete(void *context, int pending) 329 { 330 struct prison_service *psrv; 331 struct prison *pr; 332 int vfslocked; 333 334 pr = (struct prison *)context; 335 336 sx_xlock(&allprison_lock); 337 LIST_REMOVE(pr, pr_list); 338 prisoncount--; 339 sx_downgrade(&allprison_lock); 340 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 341 psrv->ps_destroy(psrv, pr); 342 } 343 sx_sunlock(&allprison_lock); 344 if (pr->pr_slots != NULL) 345 FREE(pr->pr_slots, M_PRISON); 346 347 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 348 vrele(pr->pr_root); 349 VFS_UNLOCK_GIANT(vfslocked); 350 351 mtx_destroy(&pr->pr_mtx); 352 if (pr->pr_linux != NULL) 353 FREE(pr->pr_linux, M_PRISON); 354 FREE(pr, M_PRISON); 355 } 356 357 void 358 prison_hold(struct prison *pr) 359 { 360 361 mtx_lock(&pr->pr_mtx); 362 KASSERT(pr->pr_ref > 0, 363 ("Trying to hold dead prison (id=%d).", pr->pr_id)); 364 pr->pr_ref++; 365 mtx_unlock(&pr->pr_mtx); 366 } 367 368 u_int32_t 369 prison_getip(struct ucred *cred) 370 { 371 372 return (cred->cr_prison->pr_ip); 373 } 374 375 int 376 prison_ip(struct ucred *cred, int flag, u_int32_t *ip) 377 { 378 u_int32_t tmp; 379 380 if (!jailed(cred)) 381 return (0); 382 if (flag) 383 tmp = *ip; 384 else 385 tmp = ntohl(*ip); 386 if (tmp == INADDR_ANY) { 387 if (flag) 388 *ip = cred->cr_prison->pr_ip; 389 else 390 *ip = htonl(cred->cr_prison->pr_ip); 391 return (0); 392 } 393 if (tmp == INADDR_LOOPBACK) { 394 if (flag) 395 *ip = cred->cr_prison->pr_ip; 396 else 397 *ip = htonl(cred->cr_prison->pr_ip); 398 return (0); 399 } 400 if (cred->cr_prison->pr_ip != tmp) 401 return (1); 402 return (0); 403 } 404 405 void 406 prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 407 { 408 u_int32_t tmp; 409 410 if (!jailed(cred)) 411 return; 412 if (flag) 413 tmp = *ip; 414 else 415 tmp = ntohl(*ip); 416 if (tmp == INADDR_LOOPBACK) { 417 if (flag) 418 *ip = cred->cr_prison->pr_ip; 419 else 420 *ip = htonl(cred->cr_prison->pr_ip); 421 return; 422 } 423 return; 424 } 425 426 int 427 prison_if(struct ucred *cred, struct sockaddr *sa) 428 { 429 struct sockaddr_in *sai; 430 int ok; 431 432 sai = (struct sockaddr_in *)sa; 433 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 434 ok = 1; 435 else if (sai->sin_family != AF_INET) 436 ok = 0; 437 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 438 ok = 1; 439 else 440 ok = 0; 441 return (ok); 442 } 443 444 /* 445 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 446 */ 447 int 448 prison_check(struct ucred *cred1, struct ucred *cred2) 449 { 450 451 if (jailed(cred1)) { 452 if (!jailed(cred2)) 453 return (ESRCH); 454 if (cred2->cr_prison != cred1->cr_prison) 455 return (ESRCH); 456 } 457 458 return (0); 459 } 460 461 /* 462 * Return 1 if the passed credential is in a jail, otherwise 0. 463 */ 464 int 465 jailed(struct ucred *cred) 466 { 467 468 return (cred->cr_prison != NULL); 469 } 470 471 /* 472 * Return the correct hostname for the passed credential. 473 */ 474 void 475 getcredhostname(struct ucred *cred, char *buf, size_t size) 476 { 477 478 if (jailed(cred)) { 479 mtx_lock(&cred->cr_prison->pr_mtx); 480 strlcpy(buf, cred->cr_prison->pr_host, size); 481 mtx_unlock(&cred->cr_prison->pr_mtx); 482 } else { 483 mtx_lock(&hostname_mtx); 484 strlcpy(buf, hostname, size); 485 mtx_unlock(&hostname_mtx); 486 } 487 } 488 489 /* 490 * Determine whether the subject represented by cred can "see" 491 * status of a mount point. 492 * Returns: 0 for permitted, ENOENT otherwise. 493 * XXX: This function should be called cr_canseemount() and should be 494 * placed in kern_prot.c. 495 */ 496 int 497 prison_canseemount(struct ucred *cred, struct mount *mp) 498 { 499 struct prison *pr; 500 struct statfs *sp; 501 size_t len; 502 503 if (!jailed(cred) || jail_enforce_statfs == 0) 504 return (0); 505 pr = cred->cr_prison; 506 if (pr->pr_root->v_mount == mp) 507 return (0); 508 if (jail_enforce_statfs == 2) 509 return (ENOENT); 510 /* 511 * If jail's chroot directory is set to "/" we should be able to see 512 * all mount-points from inside a jail. 513 * This is ugly check, but this is the only situation when jail's 514 * directory ends with '/'. 515 */ 516 if (strcmp(pr->pr_path, "/") == 0) 517 return (0); 518 len = strlen(pr->pr_path); 519 sp = &mp->mnt_stat; 520 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 521 return (ENOENT); 522 /* 523 * Be sure that we don't have situation where jail's root directory 524 * is "/some/path" and mount point is "/some/pathpath". 525 */ 526 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 527 return (ENOENT); 528 return (0); 529 } 530 531 void 532 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 533 { 534 char jpath[MAXPATHLEN]; 535 struct prison *pr; 536 size_t len; 537 538 if (!jailed(cred) || jail_enforce_statfs == 0) 539 return; 540 pr = cred->cr_prison; 541 if (prison_canseemount(cred, mp) != 0) { 542 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 543 strlcpy(sp->f_mntonname, "[restricted]", 544 sizeof(sp->f_mntonname)); 545 return; 546 } 547 if (pr->pr_root->v_mount == mp) { 548 /* 549 * Clear current buffer data, so we are sure nothing from 550 * the valid path left there. 551 */ 552 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 553 *sp->f_mntonname = '/'; 554 return; 555 } 556 /* 557 * If jail's chroot directory is set to "/" we should be able to see 558 * all mount-points from inside a jail. 559 */ 560 if (strcmp(pr->pr_path, "/") == 0) 561 return; 562 len = strlen(pr->pr_path); 563 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 564 /* 565 * Clear current buffer data, so we are sure nothing from 566 * the valid path left there. 567 */ 568 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 569 if (*jpath == '\0') { 570 /* Should never happen. */ 571 *sp->f_mntonname = '/'; 572 } else { 573 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 574 } 575 } 576 577 /* 578 * Check with permission for a specific privilege is granted within jail. We 579 * have a specific list of accepted privileges; the rest are denied. 580 */ 581 int 582 prison_priv_check(struct ucred *cred, int priv) 583 { 584 585 if (!jailed(cred)) 586 return (0); 587 588 switch (priv) { 589 590 /* 591 * Allow ktrace privileges for root in jail. 592 */ 593 case PRIV_KTRACE: 594 595 #if 0 596 /* 597 * Allow jailed processes to configure audit identity and 598 * submit audit records (login, etc). In the future we may 599 * want to further refine the relationship between audit and 600 * jail. 601 */ 602 case PRIV_AUDIT_GETAUDIT: 603 case PRIV_AUDIT_SETAUDIT: 604 case PRIV_AUDIT_SUBMIT: 605 #endif 606 607 /* 608 * Allow jailed processes to manipulate process UNIX 609 * credentials in any way they see fit. 610 */ 611 case PRIV_CRED_SETUID: 612 case PRIV_CRED_SETEUID: 613 case PRIV_CRED_SETGID: 614 case PRIV_CRED_SETEGID: 615 case PRIV_CRED_SETGROUPS: 616 case PRIV_CRED_SETREUID: 617 case PRIV_CRED_SETREGID: 618 case PRIV_CRED_SETRESUID: 619 case PRIV_CRED_SETRESGID: 620 621 /* 622 * Jail implements visibility constraints already, so allow 623 * jailed root to override uid/gid-based constraints. 624 */ 625 case PRIV_SEEOTHERGIDS: 626 case PRIV_SEEOTHERUIDS: 627 628 /* 629 * Jail implements inter-process debugging limits already, so 630 * allow jailed root various debugging privileges. 631 */ 632 case PRIV_DEBUG_DIFFCRED: 633 case PRIV_DEBUG_SUGID: 634 case PRIV_DEBUG_UNPRIV: 635 636 /* 637 * Allow jail to set various resource limits and login 638 * properties, and for now, exceed process resource limits. 639 */ 640 case PRIV_PROC_LIMIT: 641 case PRIV_PROC_SETLOGIN: 642 case PRIV_PROC_SETRLIMIT: 643 644 /* 645 * System V and POSIX IPC privileges are granted in jail. 646 */ 647 case PRIV_IPC_READ: 648 case PRIV_IPC_WRITE: 649 case PRIV_IPC_ADMIN: 650 case PRIV_IPC_MSGSIZE: 651 case PRIV_MQ_ADMIN: 652 653 /* 654 * Jail implements its own inter-process limits, so allow 655 * root processes in jail to change scheduling on other 656 * processes in the same jail. Likewise for signalling. 657 */ 658 case PRIV_SCHED_DIFFCRED: 659 case PRIV_SIGNAL_DIFFCRED: 660 case PRIV_SIGNAL_SUGID: 661 662 /* 663 * Allow jailed processes to write to sysctls marked as jail 664 * writable. 665 */ 666 case PRIV_SYSCTL_WRITEJAIL: 667 668 /* 669 * Allow root in jail to manage a variety of quota 670 * properties. These should likely be conditional on a 671 * configuration option. 672 */ 673 case PRIV_VFS_GETQUOTA: 674 case PRIV_VFS_SETQUOTA: 675 676 /* 677 * Since Jail relies on chroot() to implement file system 678 * protections, grant many VFS privileges to root in jail. 679 * Be careful to exclude mount-related and NFS-related 680 * privileges. 681 */ 682 case PRIV_VFS_READ: 683 case PRIV_VFS_WRITE: 684 case PRIV_VFS_ADMIN: 685 case PRIV_VFS_EXEC: 686 case PRIV_VFS_LOOKUP: 687 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 688 case PRIV_VFS_CHFLAGS_DEV: 689 case PRIV_VFS_CHOWN: 690 case PRIV_VFS_CHROOT: 691 case PRIV_VFS_RETAINSUGID: 692 case PRIV_VFS_FCHROOT: 693 case PRIV_VFS_LINK: 694 case PRIV_VFS_SETGID: 695 case PRIV_VFS_STAT: 696 case PRIV_VFS_STICKYFILE: 697 return (0); 698 699 /* 700 * Depending on the global setting, allow privilege of 701 * setting system flags. 702 */ 703 case PRIV_VFS_SYSFLAGS: 704 if (jail_chflags_allowed) 705 return (0); 706 else 707 return (EPERM); 708 709 /* 710 * Depending on the global setting, allow privilege of 711 * mounting/unmounting file systems. 712 */ 713 case PRIV_VFS_MOUNT: 714 case PRIV_VFS_UNMOUNT: 715 case PRIV_VFS_MOUNT_NONUSER: 716 case PRIV_VFS_MOUNT_OWNER: 717 if (jail_mount_allowed) 718 return (0); 719 else 720 return (EPERM); 721 722 /* 723 * Allow jailed root to bind reserved ports and reuse in-use 724 * ports. 725 */ 726 case PRIV_NETINET_RESERVEDPORT: 727 case PRIV_NETINET_REUSEPORT: 728 return (0); 729 730 /* 731 * Allow jailed root to set certian IPv4/6 (option) headers. 732 */ 733 case PRIV_NETINET_SETHDROPTS: 734 return (0); 735 736 /* 737 * Conditionally allow creating raw sockets in jail. 738 */ 739 case PRIV_NETINET_RAW: 740 if (jail_allow_raw_sockets) 741 return (0); 742 else 743 return (EPERM); 744 745 /* 746 * Since jail implements its own visibility limits on netstat 747 * sysctls, allow getcred. This allows identd to work in 748 * jail. 749 */ 750 case PRIV_NETINET_GETCRED: 751 return (0); 752 753 default: 754 /* 755 * In all remaining cases, deny the privilege request. This 756 * includes almost all network privileges, many system 757 * configuration privileges. 758 */ 759 return (EPERM); 760 } 761 } 762 763 /* 764 * Register jail service. Provides 'create' and 'destroy' methods. 765 * 'create' method will be called for every existing jail and all 766 * jails in the future as they beeing created. 767 * 'destroy' method will be called for every jail going away and 768 * for all existing jails at the time of service deregistration. 769 */ 770 struct prison_service * 771 prison_service_register(const char *name, prison_create_t create, 772 prison_destroy_t destroy) 773 { 774 struct prison_service *psrv, *psrv2; 775 struct prison *pr; 776 int reallocate = 1, slotno = 0; 777 void **slots, **oldslots; 778 779 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 780 M_WAITOK | M_ZERO); 781 psrv->ps_create = create; 782 psrv->ps_destroy = destroy; 783 strcpy(psrv->ps_name, name); 784 /* 785 * Grab the allprison_lock here, so we won't miss any jail 786 * creation/destruction. 787 */ 788 sx_xlock(&allprison_lock); 789 #ifdef INVARIANTS 790 /* 791 * Verify if service is not already registered. 792 */ 793 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 794 KASSERT(strcmp(psrv2->ps_name, name) != 0, 795 ("jail service %s already registered", name)); 796 } 797 #endif 798 /* 799 * Find free slot. When there is no existing free slot available, 800 * allocate one at the end. 801 */ 802 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 803 if (psrv2->ps_slotno != slotno) { 804 KASSERT(slotno < psrv2->ps_slotno, 805 ("Invalid slotno (slotno=%d >= ps_slotno=%d", 806 slotno, psrv2->ps_slotno)); 807 /* We found free slot. */ 808 reallocate = 0; 809 break; 810 } 811 slotno++; 812 } 813 psrv->ps_slotno = slotno; 814 /* 815 * Keep the list sorted by slot number. 816 */ 817 if (psrv2 != NULL) { 818 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 819 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 820 } else { 821 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 822 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 823 } 824 prison_service_slots++; 825 sx_downgrade(&allprison_lock); 826 /* 827 * Allocate memory for new slot if we didn't found empty one. 828 * Do not use realloc(9), because pr_slots is protected with a mutex, 829 * so we can't sleep. 830 */ 831 LIST_FOREACH(pr, &allprison, pr_list) { 832 if (reallocate) { 833 /* First allocate memory with M_WAITOK. */ 834 slots = malloc(sizeof(*slots) * prison_service_slots, 835 M_PRISON, M_WAITOK); 836 /* Now grab the mutex and replace pr_slots. */ 837 mtx_lock(&pr->pr_mtx); 838 oldslots = pr->pr_slots; 839 if (psrv->ps_slotno > 0) { 840 bcopy(oldslots, slots, 841 sizeof(*slots) * (prison_service_slots - 1)); 842 } 843 slots[psrv->ps_slotno] = NULL; 844 pr->pr_slots = slots; 845 mtx_unlock(&pr->pr_mtx); 846 if (oldslots != NULL) 847 free(oldslots, M_PRISON); 848 } 849 /* 850 * Call 'create' method for each existing jail. 851 */ 852 psrv->ps_create(psrv, pr); 853 } 854 sx_sunlock(&allprison_lock); 855 856 return (psrv); 857 } 858 859 void 860 prison_service_deregister(struct prison_service *psrv) 861 { 862 struct prison *pr; 863 void **slots, **oldslots; 864 int last = 0; 865 866 sx_xlock(&allprison_lock); 867 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 868 last = 1; 869 TAILQ_REMOVE(&prison_services, psrv, ps_next); 870 prison_service_slots--; 871 sx_downgrade(&allprison_lock); 872 LIST_FOREACH(pr, &allprison, pr_list) { 873 /* 874 * Call 'destroy' method for every currently existing jail. 875 */ 876 psrv->ps_destroy(psrv, pr); 877 /* 878 * If this is the last slot, free the memory allocated for it. 879 */ 880 if (last) { 881 if (prison_service_slots == 0) 882 slots = NULL; 883 else { 884 slots = malloc(sizeof(*slots) * prison_service_slots, 885 M_PRISON, M_WAITOK); 886 } 887 mtx_lock(&pr->pr_mtx); 888 oldslots = pr->pr_slots; 889 /* 890 * We require setting slot to NULL after freeing it, 891 * this way we can check for memory leaks here. 892 */ 893 KASSERT(oldslots[psrv->ps_slotno] == NULL, 894 ("Slot %d (service %s, jailid=%d) still contains data?", 895 psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 896 if (psrv->ps_slotno > 0) { 897 bcopy(oldslots, slots, 898 sizeof(*slots) * prison_service_slots); 899 } 900 pr->pr_slots = slots; 901 mtx_unlock(&pr->pr_mtx); 902 KASSERT(oldslots != NULL, ("oldslots == NULL")); 903 free(oldslots, M_PRISON); 904 } 905 } 906 sx_sunlock(&allprison_lock); 907 free(psrv, M_PRISON); 908 } 909 910 /* 911 * Function sets data for the given jail in slot assigned for the given 912 * jail service. 913 */ 914 void 915 prison_service_data_set(struct prison_service *psrv, struct prison *pr, 916 void *data) 917 { 918 919 mtx_assert(&pr->pr_mtx, MA_OWNED); 920 pr->pr_slots[psrv->ps_slotno] = data; 921 } 922 923 /* 924 * Function clears slots assigned for the given jail service in the given 925 * prison structure and returns current slot data. 926 */ 927 void * 928 prison_service_data_del(struct prison_service *psrv, struct prison *pr) 929 { 930 void *data; 931 932 mtx_assert(&pr->pr_mtx, MA_OWNED); 933 data = pr->pr_slots[psrv->ps_slotno]; 934 pr->pr_slots[psrv->ps_slotno] = NULL; 935 return (data); 936 } 937 938 /* 939 * Function returns current data from the slot assigned to the given jail 940 * service for the given jail. 941 */ 942 void * 943 prison_service_data_get(struct prison_service *psrv, struct prison *pr) 944 { 945 946 mtx_assert(&pr->pr_mtx, MA_OWNED); 947 return (pr->pr_slots[psrv->ps_slotno]); 948 } 949 950 static int 951 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 952 { 953 struct xprison *xp, *sxp; 954 struct prison *pr; 955 int count, error; 956 957 if (jailed(req->td->td_ucred)) 958 return (0); 959 960 sx_slock(&allprison_lock); 961 if ((count = prisoncount) == 0) { 962 sx_sunlock(&allprison_lock); 963 return (0); 964 } 965 966 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 967 968 LIST_FOREACH(pr, &allprison, pr_list) { 969 xp->pr_version = XPRISON_VERSION; 970 xp->pr_id = pr->pr_id; 971 xp->pr_ip = pr->pr_ip; 972 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 973 mtx_lock(&pr->pr_mtx); 974 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 975 mtx_unlock(&pr->pr_mtx); 976 xp++; 977 } 978 sx_sunlock(&allprison_lock); 979 980 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 981 free(sxp, M_TEMP); 982 return (error); 983 } 984 985 SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 986 NULL, 0, sysctl_jail_list, "S", "List of active jails"); 987 988 static int 989 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 990 { 991 int error, injail; 992 993 injail = jailed(req->td->td_ucred); 994 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 995 996 return (error); 997 } 998 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 999 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 1000