1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 */ 9 10 #include <sys/cdefs.h> 11 __FBSDID("$FreeBSD$"); 12 13 #include "opt_mac.h" 14 15 #include <sys/param.h> 16 #include <sys/types.h> 17 #include <sys/kernel.h> 18 #include <sys/systm.h> 19 #include <sys/errno.h> 20 #include <sys/sysproto.h> 21 #include <sys/malloc.h> 22 #include <sys/priv.h> 23 #include <sys/proc.h> 24 #include <sys/taskqueue.h> 25 #include <sys/fcntl.h> 26 #include <sys/jail.h> 27 #include <sys/lock.h> 28 #include <sys/mutex.h> 29 #include <sys/sx.h> 30 #include <sys/namei.h> 31 #include <sys/mount.h> 32 #include <sys/queue.h> 33 #include <sys/socket.h> 34 #include <sys/syscallsubr.h> 35 #include <sys/sysctl.h> 36 #include <sys/vnode.h> 37 #include <sys/vimage.h> 38 #include <net/if.h> 39 #include <netinet/in.h> 40 41 #include <security/mac/mac_framework.h> 42 43 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 44 45 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 46 "Jail rules"); 47 48 int jail_set_hostname_allowed = 1; 49 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 50 &jail_set_hostname_allowed, 0, 51 "Processes in jail can set their hostnames"); 52 53 int jail_socket_unixiproute_only = 1; 54 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 55 &jail_socket_unixiproute_only, 0, 56 "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 57 58 int jail_sysvipc_allowed = 0; 59 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 60 &jail_sysvipc_allowed, 0, 61 "Processes in jail can use System V IPC primitives"); 62 63 static int jail_enforce_statfs = 2; 64 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 65 &jail_enforce_statfs, 0, 66 "Processes in jail cannot see all mounted file systems"); 67 68 int jail_allow_raw_sockets = 0; 69 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 70 &jail_allow_raw_sockets, 0, 71 "Prison root can create raw sockets"); 72 73 int jail_chflags_allowed = 0; 74 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 75 &jail_chflags_allowed, 0, 76 "Processes in jail can alter system file flags"); 77 78 int jail_mount_allowed = 0; 79 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 80 &jail_mount_allowed, 0, 81 "Processes in jail can mount/unmount jail-friendly file systems"); 82 83 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 84 struct prisonlist allprison; 85 struct sx allprison_lock; 86 int lastprid = 0; 87 int prisoncount = 0; 88 89 /* 90 * List of jail services. Protected by allprison_lock. 91 */ 92 TAILQ_HEAD(prison_services_head, prison_service); 93 static struct prison_services_head prison_services = 94 TAILQ_HEAD_INITIALIZER(prison_services); 95 static int prison_service_slots = 0; 96 97 struct prison_service { 98 prison_create_t ps_create; 99 prison_destroy_t ps_destroy; 100 int ps_slotno; 101 TAILQ_ENTRY(prison_service) ps_next; 102 char ps_name[0]; 103 }; 104 105 static void init_prison(void *); 106 static void prison_complete(void *context, int pending); 107 static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 108 109 static void 110 init_prison(void *data __unused) 111 { 112 113 sx_init(&allprison_lock, "allprison"); 114 LIST_INIT(&allprison); 115 } 116 117 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 118 119 /* 120 * struct jail_args { 121 * struct jail *jail; 122 * }; 123 */ 124 int 125 jail(struct thread *td, struct jail_args *uap) 126 { 127 struct nameidata nd; 128 struct prison *pr, *tpr; 129 struct prison_service *psrv; 130 struct jail j; 131 struct jail_attach_args jaa; 132 int vfslocked, error, tryprid; 133 134 error = copyin(uap->jail, &j, sizeof(j)); 135 if (error) 136 return (error); 137 if (j.version != 0) 138 return (EINVAL); 139 140 MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 141 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 142 pr->pr_ref = 1; 143 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 144 if (error) 145 goto e_killmtx; 146 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 147 pr->pr_path, td); 148 error = namei(&nd); 149 if (error) 150 goto e_killmtx; 151 vfslocked = NDHASGIANT(&nd); 152 pr->pr_root = nd.ni_vp; 153 VOP_UNLOCK(nd.ni_vp, 0); 154 NDFREE(&nd, NDF_ONLY_PNBUF); 155 VFS_UNLOCK_GIANT(vfslocked); 156 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 157 if (error) 158 goto e_dropvnref; 159 pr->pr_ip = j.ip_number; 160 pr->pr_linux = NULL; 161 pr->pr_securelevel = securelevel; 162 if (prison_service_slots == 0) 163 pr->pr_slots = NULL; 164 else { 165 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 166 M_PRISON, M_ZERO | M_WAITOK); 167 } 168 169 /* Determine next pr_id and add prison to allprison list. */ 170 sx_xlock(&allprison_lock); 171 tryprid = lastprid + 1; 172 if (tryprid == JAIL_MAX) 173 tryprid = 1; 174 next: 175 LIST_FOREACH(tpr, &allprison, pr_list) { 176 if (tpr->pr_id == tryprid) { 177 tryprid++; 178 if (tryprid == JAIL_MAX) { 179 sx_xunlock(&allprison_lock); 180 error = EAGAIN; 181 goto e_dropvnref; 182 } 183 goto next; 184 } 185 } 186 pr->pr_id = jaa.jid = lastprid = tryprid; 187 LIST_INSERT_HEAD(&allprison, pr, pr_list); 188 prisoncount++; 189 sx_downgrade(&allprison_lock); 190 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 191 psrv->ps_create(psrv, pr); 192 } 193 sx_sunlock(&allprison_lock); 194 195 error = jail_attach(td, &jaa); 196 if (error) 197 goto e_dropprref; 198 mtx_lock(&pr->pr_mtx); 199 pr->pr_ref--; 200 mtx_unlock(&pr->pr_mtx); 201 td->td_retval[0] = jaa.jid; 202 return (0); 203 e_dropprref: 204 sx_xlock(&allprison_lock); 205 LIST_REMOVE(pr, pr_list); 206 prisoncount--; 207 sx_downgrade(&allprison_lock); 208 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 209 psrv->ps_destroy(psrv, pr); 210 } 211 sx_sunlock(&allprison_lock); 212 e_dropvnref: 213 if (pr->pr_slots != NULL) 214 FREE(pr->pr_slots, M_PRISON); 215 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 216 vrele(pr->pr_root); 217 VFS_UNLOCK_GIANT(vfslocked); 218 e_killmtx: 219 mtx_destroy(&pr->pr_mtx); 220 FREE(pr, M_PRISON); 221 return (error); 222 } 223 224 /* 225 * struct jail_attach_args { 226 * int jid; 227 * }; 228 */ 229 int 230 jail_attach(struct thread *td, struct jail_attach_args *uap) 231 { 232 struct proc *p; 233 struct ucred *newcred, *oldcred; 234 struct prison *pr; 235 int vfslocked, error; 236 237 /* 238 * XXX: Note that there is a slight race here if two threads 239 * in the same privileged process attempt to attach to two 240 * different jails at the same time. It is important for 241 * user processes not to do this, or they might end up with 242 * a process root from one prison, but attached to the jail 243 * of another. 244 */ 245 error = priv_check(td, PRIV_JAIL_ATTACH); 246 if (error) 247 return (error); 248 249 p = td->td_proc; 250 sx_slock(&allprison_lock); 251 pr = prison_find(uap->jid); 252 if (pr == NULL) { 253 sx_sunlock(&allprison_lock); 254 return (EINVAL); 255 } 256 pr->pr_ref++; 257 mtx_unlock(&pr->pr_mtx); 258 sx_sunlock(&allprison_lock); 259 260 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 261 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 262 if ((error = change_dir(pr->pr_root, td)) != 0) 263 goto e_unlock; 264 #ifdef MAC 265 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 266 goto e_unlock; 267 #endif 268 VOP_UNLOCK(pr->pr_root, 0); 269 change_root(pr->pr_root, td); 270 VFS_UNLOCK_GIANT(vfslocked); 271 272 newcred = crget(); 273 PROC_LOCK(p); 274 oldcred = p->p_ucred; 275 setsugid(p); 276 crcopy(newcred, oldcred); 277 newcred->cr_prison = pr; 278 p->p_ucred = newcred; 279 PROC_UNLOCK(p); 280 crfree(oldcred); 281 return (0); 282 e_unlock: 283 VOP_UNLOCK(pr->pr_root, 0); 284 VFS_UNLOCK_GIANT(vfslocked); 285 mtx_lock(&pr->pr_mtx); 286 pr->pr_ref--; 287 mtx_unlock(&pr->pr_mtx); 288 return (error); 289 } 290 291 /* 292 * Returns a locked prison instance, or NULL on failure. 293 */ 294 struct prison * 295 prison_find(int prid) 296 { 297 struct prison *pr; 298 299 sx_assert(&allprison_lock, SX_LOCKED); 300 LIST_FOREACH(pr, &allprison, pr_list) { 301 if (pr->pr_id == prid) { 302 mtx_lock(&pr->pr_mtx); 303 if (pr->pr_ref == 0) { 304 mtx_unlock(&pr->pr_mtx); 305 break; 306 } 307 return (pr); 308 } 309 } 310 return (NULL); 311 } 312 313 void 314 prison_free(struct prison *pr) 315 { 316 317 mtx_lock(&pr->pr_mtx); 318 pr->pr_ref--; 319 if (pr->pr_ref == 0) { 320 mtx_unlock(&pr->pr_mtx); 321 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 322 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 323 return; 324 } 325 mtx_unlock(&pr->pr_mtx); 326 } 327 328 static void 329 prison_complete(void *context, int pending) 330 { 331 struct prison_service *psrv; 332 struct prison *pr; 333 int vfslocked; 334 335 pr = (struct prison *)context; 336 337 sx_xlock(&allprison_lock); 338 LIST_REMOVE(pr, pr_list); 339 prisoncount--; 340 sx_downgrade(&allprison_lock); 341 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 342 psrv->ps_destroy(psrv, pr); 343 } 344 sx_sunlock(&allprison_lock); 345 if (pr->pr_slots != NULL) 346 FREE(pr->pr_slots, M_PRISON); 347 348 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 349 vrele(pr->pr_root); 350 VFS_UNLOCK_GIANT(vfslocked); 351 352 mtx_destroy(&pr->pr_mtx); 353 if (pr->pr_linux != NULL) 354 FREE(pr->pr_linux, M_PRISON); 355 FREE(pr, M_PRISON); 356 } 357 358 void 359 prison_hold(struct prison *pr) 360 { 361 362 mtx_lock(&pr->pr_mtx); 363 KASSERT(pr->pr_ref > 0, 364 ("Trying to hold dead prison (id=%d).", pr->pr_id)); 365 pr->pr_ref++; 366 mtx_unlock(&pr->pr_mtx); 367 } 368 369 u_int32_t 370 prison_getip(struct ucred *cred) 371 { 372 373 return (cred->cr_prison->pr_ip); 374 } 375 376 int 377 prison_ip(struct ucred *cred, int flag, u_int32_t *ip) 378 { 379 u_int32_t tmp; 380 381 if (!jailed(cred)) 382 return (0); 383 if (flag) 384 tmp = *ip; 385 else 386 tmp = ntohl(*ip); 387 if (tmp == INADDR_ANY) { 388 if (flag) 389 *ip = cred->cr_prison->pr_ip; 390 else 391 *ip = htonl(cred->cr_prison->pr_ip); 392 return (0); 393 } 394 if (tmp == INADDR_LOOPBACK) { 395 if (flag) 396 *ip = cred->cr_prison->pr_ip; 397 else 398 *ip = htonl(cred->cr_prison->pr_ip); 399 return (0); 400 } 401 if (cred->cr_prison->pr_ip != tmp) 402 return (1); 403 return (0); 404 } 405 406 void 407 prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 408 { 409 u_int32_t tmp; 410 411 if (!jailed(cred)) 412 return; 413 if (flag) 414 tmp = *ip; 415 else 416 tmp = ntohl(*ip); 417 if (tmp == INADDR_LOOPBACK) { 418 if (flag) 419 *ip = cred->cr_prison->pr_ip; 420 else 421 *ip = htonl(cred->cr_prison->pr_ip); 422 return; 423 } 424 return; 425 } 426 427 int 428 prison_if(struct ucred *cred, struct sockaddr *sa) 429 { 430 struct sockaddr_in *sai; 431 int ok; 432 433 sai = (struct sockaddr_in *)sa; 434 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 435 ok = 1; 436 else if (sai->sin_family != AF_INET) 437 ok = 0; 438 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 439 ok = 1; 440 else 441 ok = 0; 442 return (ok); 443 } 444 445 /* 446 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 447 */ 448 int 449 prison_check(struct ucred *cred1, struct ucred *cred2) 450 { 451 452 if (jailed(cred1)) { 453 if (!jailed(cred2)) 454 return (ESRCH); 455 if (cred2->cr_prison != cred1->cr_prison) 456 return (ESRCH); 457 } 458 459 return (0); 460 } 461 462 /* 463 * Return 1 if the passed credential is in a jail, otherwise 0. 464 */ 465 int 466 jailed(struct ucred *cred) 467 { 468 469 return (cred->cr_prison != NULL); 470 } 471 472 /* 473 * Return the correct hostname for the passed credential. 474 */ 475 void 476 getcredhostname(struct ucred *cred, char *buf, size_t size) 477 { 478 479 if (jailed(cred)) { 480 mtx_lock(&cred->cr_prison->pr_mtx); 481 strlcpy(buf, cred->cr_prison->pr_host, size); 482 mtx_unlock(&cred->cr_prison->pr_mtx); 483 } else { 484 mtx_lock(&hostname_mtx); 485 strlcpy(buf, V_hostname, size); 486 mtx_unlock(&hostname_mtx); 487 } 488 } 489 490 /* 491 * Determine whether the subject represented by cred can "see" 492 * status of a mount point. 493 * Returns: 0 for permitted, ENOENT otherwise. 494 * XXX: This function should be called cr_canseemount() and should be 495 * placed in kern_prot.c. 496 */ 497 int 498 prison_canseemount(struct ucred *cred, struct mount *mp) 499 { 500 struct prison *pr; 501 struct statfs *sp; 502 size_t len; 503 504 if (!jailed(cred) || jail_enforce_statfs == 0) 505 return (0); 506 pr = cred->cr_prison; 507 if (pr->pr_root->v_mount == mp) 508 return (0); 509 if (jail_enforce_statfs == 2) 510 return (ENOENT); 511 /* 512 * If jail's chroot directory is set to "/" we should be able to see 513 * all mount-points from inside a jail. 514 * This is ugly check, but this is the only situation when jail's 515 * directory ends with '/'. 516 */ 517 if (strcmp(pr->pr_path, "/") == 0) 518 return (0); 519 len = strlen(pr->pr_path); 520 sp = &mp->mnt_stat; 521 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 522 return (ENOENT); 523 /* 524 * Be sure that we don't have situation where jail's root directory 525 * is "/some/path" and mount point is "/some/pathpath". 526 */ 527 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 528 return (ENOENT); 529 return (0); 530 } 531 532 void 533 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 534 { 535 char jpath[MAXPATHLEN]; 536 struct prison *pr; 537 size_t len; 538 539 if (!jailed(cred) || jail_enforce_statfs == 0) 540 return; 541 pr = cred->cr_prison; 542 if (prison_canseemount(cred, mp) != 0) { 543 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 544 strlcpy(sp->f_mntonname, "[restricted]", 545 sizeof(sp->f_mntonname)); 546 return; 547 } 548 if (pr->pr_root->v_mount == mp) { 549 /* 550 * Clear current buffer data, so we are sure nothing from 551 * the valid path left there. 552 */ 553 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 554 *sp->f_mntonname = '/'; 555 return; 556 } 557 /* 558 * If jail's chroot directory is set to "/" we should be able to see 559 * all mount-points from inside a jail. 560 */ 561 if (strcmp(pr->pr_path, "/") == 0) 562 return; 563 len = strlen(pr->pr_path); 564 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 565 /* 566 * Clear current buffer data, so we are sure nothing from 567 * the valid path left there. 568 */ 569 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 570 if (*jpath == '\0') { 571 /* Should never happen. */ 572 *sp->f_mntonname = '/'; 573 } else { 574 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 575 } 576 } 577 578 /* 579 * Check with permission for a specific privilege is granted within jail. We 580 * have a specific list of accepted privileges; the rest are denied. 581 */ 582 int 583 prison_priv_check(struct ucred *cred, int priv) 584 { 585 586 if (!jailed(cred)) 587 return (0); 588 589 switch (priv) { 590 591 /* 592 * Allow ktrace privileges for root in jail. 593 */ 594 case PRIV_KTRACE: 595 596 #if 0 597 /* 598 * Allow jailed processes to configure audit identity and 599 * submit audit records (login, etc). In the future we may 600 * want to further refine the relationship between audit and 601 * jail. 602 */ 603 case PRIV_AUDIT_GETAUDIT: 604 case PRIV_AUDIT_SETAUDIT: 605 case PRIV_AUDIT_SUBMIT: 606 #endif 607 608 /* 609 * Allow jailed processes to manipulate process UNIX 610 * credentials in any way they see fit. 611 */ 612 case PRIV_CRED_SETUID: 613 case PRIV_CRED_SETEUID: 614 case PRIV_CRED_SETGID: 615 case PRIV_CRED_SETEGID: 616 case PRIV_CRED_SETGROUPS: 617 case PRIV_CRED_SETREUID: 618 case PRIV_CRED_SETREGID: 619 case PRIV_CRED_SETRESUID: 620 case PRIV_CRED_SETRESGID: 621 622 /* 623 * Jail implements visibility constraints already, so allow 624 * jailed root to override uid/gid-based constraints. 625 */ 626 case PRIV_SEEOTHERGIDS: 627 case PRIV_SEEOTHERUIDS: 628 629 /* 630 * Jail implements inter-process debugging limits already, so 631 * allow jailed root various debugging privileges. 632 */ 633 case PRIV_DEBUG_DIFFCRED: 634 case PRIV_DEBUG_SUGID: 635 case PRIV_DEBUG_UNPRIV: 636 637 /* 638 * Allow jail to set various resource limits and login 639 * properties, and for now, exceed process resource limits. 640 */ 641 case PRIV_PROC_LIMIT: 642 case PRIV_PROC_SETLOGIN: 643 case PRIV_PROC_SETRLIMIT: 644 645 /* 646 * System V and POSIX IPC privileges are granted in jail. 647 */ 648 case PRIV_IPC_READ: 649 case PRIV_IPC_WRITE: 650 case PRIV_IPC_ADMIN: 651 case PRIV_IPC_MSGSIZE: 652 case PRIV_MQ_ADMIN: 653 654 /* 655 * Jail implements its own inter-process limits, so allow 656 * root processes in jail to change scheduling on other 657 * processes in the same jail. Likewise for signalling. 658 */ 659 case PRIV_SCHED_DIFFCRED: 660 case PRIV_SIGNAL_DIFFCRED: 661 case PRIV_SIGNAL_SUGID: 662 663 /* 664 * Allow jailed processes to write to sysctls marked as jail 665 * writable. 666 */ 667 case PRIV_SYSCTL_WRITEJAIL: 668 669 /* 670 * Allow root in jail to manage a variety of quota 671 * properties. These should likely be conditional on a 672 * configuration option. 673 */ 674 case PRIV_VFS_GETQUOTA: 675 case PRIV_VFS_SETQUOTA: 676 677 /* 678 * Since Jail relies on chroot() to implement file system 679 * protections, grant many VFS privileges to root in jail. 680 * Be careful to exclude mount-related and NFS-related 681 * privileges. 682 */ 683 case PRIV_VFS_READ: 684 case PRIV_VFS_WRITE: 685 case PRIV_VFS_ADMIN: 686 case PRIV_VFS_EXEC: 687 case PRIV_VFS_LOOKUP: 688 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 689 case PRIV_VFS_CHFLAGS_DEV: 690 case PRIV_VFS_CHOWN: 691 case PRIV_VFS_CHROOT: 692 case PRIV_VFS_RETAINSUGID: 693 case PRIV_VFS_FCHROOT: 694 case PRIV_VFS_LINK: 695 case PRIV_VFS_SETGID: 696 case PRIV_VFS_STAT: 697 case PRIV_VFS_STICKYFILE: 698 return (0); 699 700 /* 701 * Depending on the global setting, allow privilege of 702 * setting system flags. 703 */ 704 case PRIV_VFS_SYSFLAGS: 705 if (jail_chflags_allowed) 706 return (0); 707 else 708 return (EPERM); 709 710 /* 711 * Depending on the global setting, allow privilege of 712 * mounting/unmounting file systems. 713 */ 714 case PRIV_VFS_MOUNT: 715 case PRIV_VFS_UNMOUNT: 716 case PRIV_VFS_MOUNT_NONUSER: 717 case PRIV_VFS_MOUNT_OWNER: 718 if (jail_mount_allowed) 719 return (0); 720 else 721 return (EPERM); 722 723 /* 724 * Allow jailed root to bind reserved ports and reuse in-use 725 * ports. 726 */ 727 case PRIV_NETINET_RESERVEDPORT: 728 case PRIV_NETINET_REUSEPORT: 729 return (0); 730 731 /* 732 * Allow jailed root to set certian IPv4/6 (option) headers. 733 */ 734 case PRIV_NETINET_SETHDROPTS: 735 return (0); 736 737 /* 738 * Conditionally allow creating raw sockets in jail. 739 */ 740 case PRIV_NETINET_RAW: 741 if (jail_allow_raw_sockets) 742 return (0); 743 else 744 return (EPERM); 745 746 /* 747 * Since jail implements its own visibility limits on netstat 748 * sysctls, allow getcred. This allows identd to work in 749 * jail. 750 */ 751 case PRIV_NETINET_GETCRED: 752 return (0); 753 754 default: 755 /* 756 * In all remaining cases, deny the privilege request. This 757 * includes almost all network privileges, many system 758 * configuration privileges. 759 */ 760 return (EPERM); 761 } 762 } 763 764 /* 765 * Register jail service. Provides 'create' and 'destroy' methods. 766 * 'create' method will be called for every existing jail and all 767 * jails in the future as they beeing created. 768 * 'destroy' method will be called for every jail going away and 769 * for all existing jails at the time of service deregistration. 770 */ 771 struct prison_service * 772 prison_service_register(const char *name, prison_create_t create, 773 prison_destroy_t destroy) 774 { 775 struct prison_service *psrv, *psrv2; 776 struct prison *pr; 777 int reallocate = 1, slotno = 0; 778 void **slots, **oldslots; 779 780 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 781 M_WAITOK | M_ZERO); 782 psrv->ps_create = create; 783 psrv->ps_destroy = destroy; 784 strcpy(psrv->ps_name, name); 785 /* 786 * Grab the allprison_lock here, so we won't miss any jail 787 * creation/destruction. 788 */ 789 sx_xlock(&allprison_lock); 790 #ifdef INVARIANTS 791 /* 792 * Verify if service is not already registered. 793 */ 794 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 795 KASSERT(strcmp(psrv2->ps_name, name) != 0, 796 ("jail service %s already registered", name)); 797 } 798 #endif 799 /* 800 * Find free slot. When there is no existing free slot available, 801 * allocate one at the end. 802 */ 803 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 804 if (psrv2->ps_slotno != slotno) { 805 KASSERT(slotno < psrv2->ps_slotno, 806 ("Invalid slotno (slotno=%d >= ps_slotno=%d", 807 slotno, psrv2->ps_slotno)); 808 /* We found free slot. */ 809 reallocate = 0; 810 break; 811 } 812 slotno++; 813 } 814 psrv->ps_slotno = slotno; 815 /* 816 * Keep the list sorted by slot number. 817 */ 818 if (psrv2 != NULL) { 819 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 820 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 821 } else { 822 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 823 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 824 } 825 prison_service_slots++; 826 sx_downgrade(&allprison_lock); 827 /* 828 * Allocate memory for new slot if we didn't found empty one. 829 * Do not use realloc(9), because pr_slots is protected with a mutex, 830 * so we can't sleep. 831 */ 832 LIST_FOREACH(pr, &allprison, pr_list) { 833 if (reallocate) { 834 /* First allocate memory with M_WAITOK. */ 835 slots = malloc(sizeof(*slots) * prison_service_slots, 836 M_PRISON, M_WAITOK); 837 /* Now grab the mutex and replace pr_slots. */ 838 mtx_lock(&pr->pr_mtx); 839 oldslots = pr->pr_slots; 840 if (psrv->ps_slotno > 0) { 841 bcopy(oldslots, slots, 842 sizeof(*slots) * (prison_service_slots - 1)); 843 } 844 slots[psrv->ps_slotno] = NULL; 845 pr->pr_slots = slots; 846 mtx_unlock(&pr->pr_mtx); 847 if (oldslots != NULL) 848 free(oldslots, M_PRISON); 849 } 850 /* 851 * Call 'create' method for each existing jail. 852 */ 853 psrv->ps_create(psrv, pr); 854 } 855 sx_sunlock(&allprison_lock); 856 857 return (psrv); 858 } 859 860 void 861 prison_service_deregister(struct prison_service *psrv) 862 { 863 struct prison *pr; 864 void **slots, **oldslots; 865 int last = 0; 866 867 sx_xlock(&allprison_lock); 868 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 869 last = 1; 870 TAILQ_REMOVE(&prison_services, psrv, ps_next); 871 prison_service_slots--; 872 sx_downgrade(&allprison_lock); 873 LIST_FOREACH(pr, &allprison, pr_list) { 874 /* 875 * Call 'destroy' method for every currently existing jail. 876 */ 877 psrv->ps_destroy(psrv, pr); 878 /* 879 * If this is the last slot, free the memory allocated for it. 880 */ 881 if (last) { 882 if (prison_service_slots == 0) 883 slots = NULL; 884 else { 885 slots = malloc(sizeof(*slots) * prison_service_slots, 886 M_PRISON, M_WAITOK); 887 } 888 mtx_lock(&pr->pr_mtx); 889 oldslots = pr->pr_slots; 890 /* 891 * We require setting slot to NULL after freeing it, 892 * this way we can check for memory leaks here. 893 */ 894 KASSERT(oldslots[psrv->ps_slotno] == NULL, 895 ("Slot %d (service %s, jailid=%d) still contains data?", 896 psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 897 if (psrv->ps_slotno > 0) { 898 bcopy(oldslots, slots, 899 sizeof(*slots) * prison_service_slots); 900 } 901 pr->pr_slots = slots; 902 mtx_unlock(&pr->pr_mtx); 903 KASSERT(oldslots != NULL, ("oldslots == NULL")); 904 free(oldslots, M_PRISON); 905 } 906 } 907 sx_sunlock(&allprison_lock); 908 free(psrv, M_PRISON); 909 } 910 911 /* 912 * Function sets data for the given jail in slot assigned for the given 913 * jail service. 914 */ 915 void 916 prison_service_data_set(struct prison_service *psrv, struct prison *pr, 917 void *data) 918 { 919 920 mtx_assert(&pr->pr_mtx, MA_OWNED); 921 pr->pr_slots[psrv->ps_slotno] = data; 922 } 923 924 /* 925 * Function clears slots assigned for the given jail service in the given 926 * prison structure and returns current slot data. 927 */ 928 void * 929 prison_service_data_del(struct prison_service *psrv, struct prison *pr) 930 { 931 void *data; 932 933 mtx_assert(&pr->pr_mtx, MA_OWNED); 934 data = pr->pr_slots[psrv->ps_slotno]; 935 pr->pr_slots[psrv->ps_slotno] = NULL; 936 return (data); 937 } 938 939 /* 940 * Function returns current data from the slot assigned to the given jail 941 * service for the given jail. 942 */ 943 void * 944 prison_service_data_get(struct prison_service *psrv, struct prison *pr) 945 { 946 947 mtx_assert(&pr->pr_mtx, MA_OWNED); 948 return (pr->pr_slots[psrv->ps_slotno]); 949 } 950 951 static int 952 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 953 { 954 struct xprison *xp, *sxp; 955 struct prison *pr; 956 int count, error; 957 958 if (jailed(req->td->td_ucred)) 959 return (0); 960 961 sx_slock(&allprison_lock); 962 if ((count = prisoncount) == 0) { 963 sx_sunlock(&allprison_lock); 964 return (0); 965 } 966 967 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 968 969 LIST_FOREACH(pr, &allprison, pr_list) { 970 xp->pr_version = XPRISON_VERSION; 971 xp->pr_id = pr->pr_id; 972 xp->pr_ip = pr->pr_ip; 973 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 974 mtx_lock(&pr->pr_mtx); 975 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 976 mtx_unlock(&pr->pr_mtx); 977 xp++; 978 } 979 sx_sunlock(&allprison_lock); 980 981 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 982 free(sxp, M_TEMP); 983 return (error); 984 } 985 986 SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 987 NULL, 0, sysctl_jail_list, "S", "List of active jails"); 988 989 static int 990 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 991 { 992 int error, injail; 993 994 injail = jailed(req->td->td_ucred); 995 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 996 997 return (error); 998 } 999 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 1000 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 1001