1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 */ 9 10 #include <sys/cdefs.h> 11 __FBSDID("$FreeBSD$"); 12 13 #include "opt_mac.h" 14 15 #include <sys/param.h> 16 #include <sys/types.h> 17 #include <sys/kernel.h> 18 #include <sys/systm.h> 19 #include <sys/errno.h> 20 #include <sys/sysproto.h> 21 #include <sys/malloc.h> 22 #include <sys/priv.h> 23 #include <sys/proc.h> 24 #include <sys/taskqueue.h> 25 #include <sys/fcntl.h> 26 #include <sys/jail.h> 27 #include <sys/lock.h> 28 #include <sys/mutex.h> 29 #include <sys/sx.h> 30 #include <sys/namei.h> 31 #include <sys/mount.h> 32 #include <sys/queue.h> 33 #include <sys/socket.h> 34 #include <sys/syscallsubr.h> 35 #include <sys/sysctl.h> 36 #include <sys/vnode.h> 37 #include <sys/vimage.h> 38 #include <net/if.h> 39 #include <netinet/in.h> 40 41 #include <security/mac/mac_framework.h> 42 43 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 44 45 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 46 "Jail rules"); 47 48 int jail_set_hostname_allowed = 1; 49 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 50 &jail_set_hostname_allowed, 0, 51 "Processes in jail can set their hostnames"); 52 53 int jail_socket_unixiproute_only = 1; 54 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 55 &jail_socket_unixiproute_only, 0, 56 "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 57 58 int jail_sysvipc_allowed = 0; 59 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 60 &jail_sysvipc_allowed, 0, 61 "Processes in jail can use System V IPC primitives"); 62 63 static int jail_enforce_statfs = 2; 64 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 65 &jail_enforce_statfs, 0, 66 "Processes in jail cannot see all mounted file systems"); 67 68 int jail_allow_raw_sockets = 0; 69 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 70 &jail_allow_raw_sockets, 0, 71 "Prison root can create raw sockets"); 72 73 int jail_chflags_allowed = 0; 74 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 75 &jail_chflags_allowed, 0, 76 "Processes in jail can alter system file flags"); 77 78 int jail_mount_allowed = 0; 79 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 80 &jail_mount_allowed, 0, 81 "Processes in jail can mount/unmount jail-friendly file systems"); 82 83 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 84 struct prisonlist allprison; 85 struct sx allprison_lock; 86 int lastprid = 0; 87 int prisoncount = 0; 88 89 /* 90 * List of jail services. Protected by allprison_lock. 91 */ 92 TAILQ_HEAD(prison_services_head, prison_service); 93 static struct prison_services_head prison_services = 94 TAILQ_HEAD_INITIALIZER(prison_services); 95 static int prison_service_slots = 0; 96 97 struct prison_service { 98 prison_create_t ps_create; 99 prison_destroy_t ps_destroy; 100 int ps_slotno; 101 TAILQ_ENTRY(prison_service) ps_next; 102 char ps_name[0]; 103 }; 104 105 static void init_prison(void *); 106 static void prison_complete(void *context, int pending); 107 static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 108 109 static void 110 init_prison(void *data __unused) 111 { 112 113 sx_init(&allprison_lock, "allprison"); 114 LIST_INIT(&allprison); 115 } 116 117 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 118 119 /* 120 * struct jail_args { 121 * struct jail *jail; 122 * }; 123 */ 124 int 125 jail(struct thread *td, struct jail_args *uap) 126 { 127 struct nameidata nd; 128 struct prison *pr, *tpr; 129 struct prison_service *psrv; 130 struct jail j; 131 struct jail_attach_args jaa; 132 int vfslocked, error, tryprid; 133 134 error = copyin(uap->jail, &j, sizeof(j)); 135 if (error) 136 return (error); 137 if (j.version != 0) 138 return (EINVAL); 139 140 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 141 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 142 pr->pr_ref = 1; 143 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 144 if (error) 145 goto e_killmtx; 146 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 147 pr->pr_path, td); 148 error = namei(&nd); 149 if (error) 150 goto e_killmtx; 151 vfslocked = NDHASGIANT(&nd); 152 pr->pr_root = nd.ni_vp; 153 VOP_UNLOCK(nd.ni_vp, 0); 154 NDFREE(&nd, NDF_ONLY_PNBUF); 155 VFS_UNLOCK_GIANT(vfslocked); 156 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 157 if (error) 158 goto e_dropvnref; 159 pr->pr_ip = j.ip_number; 160 pr->pr_linux = NULL; 161 pr->pr_securelevel = securelevel; 162 if (prison_service_slots == 0) 163 pr->pr_slots = NULL; 164 else { 165 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 166 M_PRISON, M_ZERO | M_WAITOK); 167 } 168 169 /* Determine next pr_id and add prison to allprison list. */ 170 sx_xlock(&allprison_lock); 171 tryprid = lastprid + 1; 172 if (tryprid == JAIL_MAX) 173 tryprid = 1; 174 next: 175 LIST_FOREACH(tpr, &allprison, pr_list) { 176 if (tpr->pr_id == tryprid) { 177 tryprid++; 178 if (tryprid == JAIL_MAX) { 179 sx_xunlock(&allprison_lock); 180 error = EAGAIN; 181 goto e_dropvnref; 182 } 183 goto next; 184 } 185 } 186 pr->pr_id = jaa.jid = lastprid = tryprid; 187 LIST_INSERT_HEAD(&allprison, pr, pr_list); 188 prisoncount++; 189 sx_downgrade(&allprison_lock); 190 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 191 psrv->ps_create(psrv, pr); 192 } 193 sx_sunlock(&allprison_lock); 194 195 error = jail_attach(td, &jaa); 196 if (error) 197 goto e_dropprref; 198 mtx_lock(&pr->pr_mtx); 199 pr->pr_ref--; 200 mtx_unlock(&pr->pr_mtx); 201 td->td_retval[0] = jaa.jid; 202 return (0); 203 e_dropprref: 204 sx_xlock(&allprison_lock); 205 LIST_REMOVE(pr, pr_list); 206 prisoncount--; 207 sx_downgrade(&allprison_lock); 208 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 209 psrv->ps_destroy(psrv, pr); 210 } 211 sx_sunlock(&allprison_lock); 212 e_dropvnref: 213 if (pr->pr_slots != NULL) 214 free(pr->pr_slots, M_PRISON); 215 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 216 vrele(pr->pr_root); 217 VFS_UNLOCK_GIANT(vfslocked); 218 e_killmtx: 219 mtx_destroy(&pr->pr_mtx); 220 free(pr, M_PRISON); 221 return (error); 222 } 223 224 /* 225 * struct jail_attach_args { 226 * int jid; 227 * }; 228 */ 229 int 230 jail_attach(struct thread *td, struct jail_attach_args *uap) 231 { 232 struct proc *p; 233 struct ucred *newcred, *oldcred; 234 struct prison *pr; 235 int vfslocked, error; 236 237 /* 238 * XXX: Note that there is a slight race here if two threads 239 * in the same privileged process attempt to attach to two 240 * different jails at the same time. It is important for 241 * user processes not to do this, or they might end up with 242 * a process root from one prison, but attached to the jail 243 * of another. 244 */ 245 error = priv_check(td, PRIV_JAIL_ATTACH); 246 if (error) 247 return (error); 248 249 p = td->td_proc; 250 sx_slock(&allprison_lock); 251 pr = prison_find(uap->jid); 252 if (pr == NULL) { 253 sx_sunlock(&allprison_lock); 254 return (EINVAL); 255 } 256 pr->pr_ref++; 257 mtx_unlock(&pr->pr_mtx); 258 sx_sunlock(&allprison_lock); 259 260 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 261 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 262 if ((error = change_dir(pr->pr_root, td)) != 0) 263 goto e_unlock; 264 #ifdef MAC 265 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 266 goto e_unlock; 267 #endif 268 VOP_UNLOCK(pr->pr_root, 0); 269 change_root(pr->pr_root, td); 270 VFS_UNLOCK_GIANT(vfslocked); 271 272 newcred = crget(); 273 PROC_LOCK(p); 274 oldcred = p->p_ucred; 275 setsugid(p); 276 crcopy(newcred, oldcred); 277 newcred->cr_prison = pr; 278 p->p_ucred = newcred; 279 PROC_UNLOCK(p); 280 crfree(oldcred); 281 return (0); 282 e_unlock: 283 VOP_UNLOCK(pr->pr_root, 0); 284 VFS_UNLOCK_GIANT(vfslocked); 285 mtx_lock(&pr->pr_mtx); 286 pr->pr_ref--; 287 mtx_unlock(&pr->pr_mtx); 288 return (error); 289 } 290 291 /* 292 * Returns a locked prison instance, or NULL on failure. 293 */ 294 struct prison * 295 prison_find(int prid) 296 { 297 struct prison *pr; 298 299 sx_assert(&allprison_lock, SX_LOCKED); 300 LIST_FOREACH(pr, &allprison, pr_list) { 301 if (pr->pr_id == prid) { 302 mtx_lock(&pr->pr_mtx); 303 if (pr->pr_ref == 0) { 304 mtx_unlock(&pr->pr_mtx); 305 break; 306 } 307 return (pr); 308 } 309 } 310 return (NULL); 311 } 312 313 void 314 prison_free(struct prison *pr) 315 { 316 317 mtx_lock(&pr->pr_mtx); 318 pr->pr_ref--; 319 if (pr->pr_ref == 0) { 320 mtx_unlock(&pr->pr_mtx); 321 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 322 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 323 return; 324 } 325 mtx_unlock(&pr->pr_mtx); 326 } 327 328 static void 329 prison_complete(void *context, int pending) 330 { 331 struct prison_service *psrv; 332 struct prison *pr; 333 int vfslocked; 334 335 pr = (struct prison *)context; 336 337 sx_xlock(&allprison_lock); 338 LIST_REMOVE(pr, pr_list); 339 prisoncount--; 340 sx_downgrade(&allprison_lock); 341 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 342 psrv->ps_destroy(psrv, pr); 343 } 344 sx_sunlock(&allprison_lock); 345 if (pr->pr_slots != NULL) 346 free(pr->pr_slots, M_PRISON); 347 348 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 349 vrele(pr->pr_root); 350 VFS_UNLOCK_GIANT(vfslocked); 351 352 mtx_destroy(&pr->pr_mtx); 353 if (pr->pr_linux != NULL) 354 free(pr->pr_linux, M_PRISON); 355 free(pr, M_PRISON); 356 } 357 358 void 359 prison_hold(struct prison *pr) 360 { 361 362 mtx_lock(&pr->pr_mtx); 363 KASSERT(pr->pr_ref > 0, 364 ("Trying to hold dead prison (id=%d).", pr->pr_id)); 365 pr->pr_ref++; 366 mtx_unlock(&pr->pr_mtx); 367 } 368 369 u_int32_t 370 prison_getip(struct ucred *cred) 371 { 372 373 return (cred->cr_prison->pr_ip); 374 } 375 376 int 377 prison_ip(struct ucred *cred, int flag, u_int32_t *ip) 378 { 379 u_int32_t tmp; 380 381 if (!jailed(cred)) 382 return (0); 383 if (flag) 384 tmp = *ip; 385 else 386 tmp = ntohl(*ip); 387 if (tmp == INADDR_ANY) { 388 if (flag) 389 *ip = cred->cr_prison->pr_ip; 390 else 391 *ip = htonl(cred->cr_prison->pr_ip); 392 return (0); 393 } 394 if (tmp == INADDR_LOOPBACK) { 395 if (flag) 396 *ip = cred->cr_prison->pr_ip; 397 else 398 *ip = htonl(cred->cr_prison->pr_ip); 399 return (0); 400 } 401 if (cred->cr_prison->pr_ip != tmp) 402 return (1); 403 return (0); 404 } 405 406 void 407 prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 408 { 409 u_int32_t tmp; 410 411 if (!jailed(cred)) 412 return; 413 if (flag) 414 tmp = *ip; 415 else 416 tmp = ntohl(*ip); 417 if (tmp == INADDR_LOOPBACK) { 418 if (flag) 419 *ip = cred->cr_prison->pr_ip; 420 else 421 *ip = htonl(cred->cr_prison->pr_ip); 422 return; 423 } 424 return; 425 } 426 427 int 428 prison_if(struct ucred *cred, struct sockaddr *sa) 429 { 430 struct sockaddr_in *sai; 431 int ok; 432 433 sai = (struct sockaddr_in *)sa; 434 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 435 ok = 1; 436 else if (sai->sin_family != AF_INET) 437 ok = 0; 438 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 439 ok = 1; 440 else 441 ok = 0; 442 return (ok); 443 } 444 445 /* 446 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 447 */ 448 int 449 prison_check(struct ucred *cred1, struct ucred *cred2) 450 { 451 452 if (jailed(cred1)) { 453 if (!jailed(cred2)) 454 return (ESRCH); 455 if (cred2->cr_prison != cred1->cr_prison) 456 return (ESRCH); 457 } 458 459 return (0); 460 } 461 462 /* 463 * Return 1 if the passed credential is in a jail, otherwise 0. 464 */ 465 int 466 jailed(struct ucred *cred) 467 { 468 469 return (cred->cr_prison != NULL); 470 } 471 472 /* 473 * Return the correct hostname for the passed credential. 474 */ 475 void 476 getcredhostname(struct ucred *cred, char *buf, size_t size) 477 { 478 INIT_VPROCG(cred->cr_vimage->v_procg); 479 480 if (jailed(cred)) { 481 mtx_lock(&cred->cr_prison->pr_mtx); 482 strlcpy(buf, cred->cr_prison->pr_host, size); 483 mtx_unlock(&cred->cr_prison->pr_mtx); 484 } else { 485 mtx_lock(&hostname_mtx); 486 strlcpy(buf, V_hostname, size); 487 mtx_unlock(&hostname_mtx); 488 } 489 } 490 491 /* 492 * Determine whether the subject represented by cred can "see" 493 * status of a mount point. 494 * Returns: 0 for permitted, ENOENT otherwise. 495 * XXX: This function should be called cr_canseemount() and should be 496 * placed in kern_prot.c. 497 */ 498 int 499 prison_canseemount(struct ucred *cred, struct mount *mp) 500 { 501 struct prison *pr; 502 struct statfs *sp; 503 size_t len; 504 505 if (!jailed(cred) || jail_enforce_statfs == 0) 506 return (0); 507 pr = cred->cr_prison; 508 if (pr->pr_root->v_mount == mp) 509 return (0); 510 if (jail_enforce_statfs == 2) 511 return (ENOENT); 512 /* 513 * If jail's chroot directory is set to "/" we should be able to see 514 * all mount-points from inside a jail. 515 * This is ugly check, but this is the only situation when jail's 516 * directory ends with '/'. 517 */ 518 if (strcmp(pr->pr_path, "/") == 0) 519 return (0); 520 len = strlen(pr->pr_path); 521 sp = &mp->mnt_stat; 522 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 523 return (ENOENT); 524 /* 525 * Be sure that we don't have situation where jail's root directory 526 * is "/some/path" and mount point is "/some/pathpath". 527 */ 528 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 529 return (ENOENT); 530 return (0); 531 } 532 533 void 534 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 535 { 536 char jpath[MAXPATHLEN]; 537 struct prison *pr; 538 size_t len; 539 540 if (!jailed(cred) || jail_enforce_statfs == 0) 541 return; 542 pr = cred->cr_prison; 543 if (prison_canseemount(cred, mp) != 0) { 544 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 545 strlcpy(sp->f_mntonname, "[restricted]", 546 sizeof(sp->f_mntonname)); 547 return; 548 } 549 if (pr->pr_root->v_mount == mp) { 550 /* 551 * Clear current buffer data, so we are sure nothing from 552 * the valid path left there. 553 */ 554 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 555 *sp->f_mntonname = '/'; 556 return; 557 } 558 /* 559 * If jail's chroot directory is set to "/" we should be able to see 560 * all mount-points from inside a jail. 561 */ 562 if (strcmp(pr->pr_path, "/") == 0) 563 return; 564 len = strlen(pr->pr_path); 565 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 566 /* 567 * Clear current buffer data, so we are sure nothing from 568 * the valid path left there. 569 */ 570 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 571 if (*jpath == '\0') { 572 /* Should never happen. */ 573 *sp->f_mntonname = '/'; 574 } else { 575 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 576 } 577 } 578 579 /* 580 * Check with permission for a specific privilege is granted within jail. We 581 * have a specific list of accepted privileges; the rest are denied. 582 */ 583 int 584 prison_priv_check(struct ucred *cred, int priv) 585 { 586 587 if (!jailed(cred)) 588 return (0); 589 590 switch (priv) { 591 592 /* 593 * Allow ktrace privileges for root in jail. 594 */ 595 case PRIV_KTRACE: 596 597 #if 0 598 /* 599 * Allow jailed processes to configure audit identity and 600 * submit audit records (login, etc). In the future we may 601 * want to further refine the relationship between audit and 602 * jail. 603 */ 604 case PRIV_AUDIT_GETAUDIT: 605 case PRIV_AUDIT_SETAUDIT: 606 case PRIV_AUDIT_SUBMIT: 607 #endif 608 609 /* 610 * Allow jailed processes to manipulate process UNIX 611 * credentials in any way they see fit. 612 */ 613 case PRIV_CRED_SETUID: 614 case PRIV_CRED_SETEUID: 615 case PRIV_CRED_SETGID: 616 case PRIV_CRED_SETEGID: 617 case PRIV_CRED_SETGROUPS: 618 case PRIV_CRED_SETREUID: 619 case PRIV_CRED_SETREGID: 620 case PRIV_CRED_SETRESUID: 621 case PRIV_CRED_SETRESGID: 622 623 /* 624 * Jail implements visibility constraints already, so allow 625 * jailed root to override uid/gid-based constraints. 626 */ 627 case PRIV_SEEOTHERGIDS: 628 case PRIV_SEEOTHERUIDS: 629 630 /* 631 * Jail implements inter-process debugging limits already, so 632 * allow jailed root various debugging privileges. 633 */ 634 case PRIV_DEBUG_DIFFCRED: 635 case PRIV_DEBUG_SUGID: 636 case PRIV_DEBUG_UNPRIV: 637 638 /* 639 * Allow jail to set various resource limits and login 640 * properties, and for now, exceed process resource limits. 641 */ 642 case PRIV_PROC_LIMIT: 643 case PRIV_PROC_SETLOGIN: 644 case PRIV_PROC_SETRLIMIT: 645 646 /* 647 * System V and POSIX IPC privileges are granted in jail. 648 */ 649 case PRIV_IPC_READ: 650 case PRIV_IPC_WRITE: 651 case PRIV_IPC_ADMIN: 652 case PRIV_IPC_MSGSIZE: 653 case PRIV_MQ_ADMIN: 654 655 /* 656 * Jail implements its own inter-process limits, so allow 657 * root processes in jail to change scheduling on other 658 * processes in the same jail. Likewise for signalling. 659 */ 660 case PRIV_SCHED_DIFFCRED: 661 case PRIV_SIGNAL_DIFFCRED: 662 case PRIV_SIGNAL_SUGID: 663 664 /* 665 * Allow jailed processes to write to sysctls marked as jail 666 * writable. 667 */ 668 case PRIV_SYSCTL_WRITEJAIL: 669 670 /* 671 * Allow root in jail to manage a variety of quota 672 * properties. These should likely be conditional on a 673 * configuration option. 674 */ 675 case PRIV_VFS_GETQUOTA: 676 case PRIV_VFS_SETQUOTA: 677 678 /* 679 * Since Jail relies on chroot() to implement file system 680 * protections, grant many VFS privileges to root in jail. 681 * Be careful to exclude mount-related and NFS-related 682 * privileges. 683 */ 684 case PRIV_VFS_READ: 685 case PRIV_VFS_WRITE: 686 case PRIV_VFS_ADMIN: 687 case PRIV_VFS_EXEC: 688 case PRIV_VFS_LOOKUP: 689 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 690 case PRIV_VFS_CHFLAGS_DEV: 691 case PRIV_VFS_CHOWN: 692 case PRIV_VFS_CHROOT: 693 case PRIV_VFS_RETAINSUGID: 694 case PRIV_VFS_FCHROOT: 695 case PRIV_VFS_LINK: 696 case PRIV_VFS_SETGID: 697 case PRIV_VFS_STAT: 698 case PRIV_VFS_STICKYFILE: 699 return (0); 700 701 /* 702 * Depending on the global setting, allow privilege of 703 * setting system flags. 704 */ 705 case PRIV_VFS_SYSFLAGS: 706 if (jail_chflags_allowed) 707 return (0); 708 else 709 return (EPERM); 710 711 /* 712 * Depending on the global setting, allow privilege of 713 * mounting/unmounting file systems. 714 */ 715 case PRIV_VFS_MOUNT: 716 case PRIV_VFS_UNMOUNT: 717 case PRIV_VFS_MOUNT_NONUSER: 718 case PRIV_VFS_MOUNT_OWNER: 719 if (jail_mount_allowed) 720 return (0); 721 else 722 return (EPERM); 723 724 /* 725 * Allow jailed root to bind reserved ports and reuse in-use 726 * ports. 727 */ 728 case PRIV_NETINET_RESERVEDPORT: 729 case PRIV_NETINET_REUSEPORT: 730 return (0); 731 732 /* 733 * Allow jailed root to set certian IPv4/6 (option) headers. 734 */ 735 case PRIV_NETINET_SETHDROPTS: 736 return (0); 737 738 /* 739 * Conditionally allow creating raw sockets in jail. 740 */ 741 case PRIV_NETINET_RAW: 742 if (jail_allow_raw_sockets) 743 return (0); 744 else 745 return (EPERM); 746 747 /* 748 * Since jail implements its own visibility limits on netstat 749 * sysctls, allow getcred. This allows identd to work in 750 * jail. 751 */ 752 case PRIV_NETINET_GETCRED: 753 return (0); 754 755 default: 756 /* 757 * In all remaining cases, deny the privilege request. This 758 * includes almost all network privileges, many system 759 * configuration privileges. 760 */ 761 return (EPERM); 762 } 763 } 764 765 /* 766 * Register jail service. Provides 'create' and 'destroy' methods. 767 * 'create' method will be called for every existing jail and all 768 * jails in the future as they beeing created. 769 * 'destroy' method will be called for every jail going away and 770 * for all existing jails at the time of service deregistration. 771 */ 772 struct prison_service * 773 prison_service_register(const char *name, prison_create_t create, 774 prison_destroy_t destroy) 775 { 776 struct prison_service *psrv, *psrv2; 777 struct prison *pr; 778 int reallocate = 1, slotno = 0; 779 void **slots, **oldslots; 780 781 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 782 M_WAITOK | M_ZERO); 783 psrv->ps_create = create; 784 psrv->ps_destroy = destroy; 785 strcpy(psrv->ps_name, name); 786 /* 787 * Grab the allprison_lock here, so we won't miss any jail 788 * creation/destruction. 789 */ 790 sx_xlock(&allprison_lock); 791 #ifdef INVARIANTS 792 /* 793 * Verify if service is not already registered. 794 */ 795 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 796 KASSERT(strcmp(psrv2->ps_name, name) != 0, 797 ("jail service %s already registered", name)); 798 } 799 #endif 800 /* 801 * Find free slot. When there is no existing free slot available, 802 * allocate one at the end. 803 */ 804 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 805 if (psrv2->ps_slotno != slotno) { 806 KASSERT(slotno < psrv2->ps_slotno, 807 ("Invalid slotno (slotno=%d >= ps_slotno=%d", 808 slotno, psrv2->ps_slotno)); 809 /* We found free slot. */ 810 reallocate = 0; 811 break; 812 } 813 slotno++; 814 } 815 psrv->ps_slotno = slotno; 816 /* 817 * Keep the list sorted by slot number. 818 */ 819 if (psrv2 != NULL) { 820 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 821 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 822 } else { 823 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 824 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 825 } 826 prison_service_slots++; 827 sx_downgrade(&allprison_lock); 828 /* 829 * Allocate memory for new slot if we didn't found empty one. 830 * Do not use realloc(9), because pr_slots is protected with a mutex, 831 * so we can't sleep. 832 */ 833 LIST_FOREACH(pr, &allprison, pr_list) { 834 if (reallocate) { 835 /* First allocate memory with M_WAITOK. */ 836 slots = malloc(sizeof(*slots) * prison_service_slots, 837 M_PRISON, M_WAITOK); 838 /* Now grab the mutex and replace pr_slots. */ 839 mtx_lock(&pr->pr_mtx); 840 oldslots = pr->pr_slots; 841 if (psrv->ps_slotno > 0) { 842 bcopy(oldslots, slots, 843 sizeof(*slots) * (prison_service_slots - 1)); 844 } 845 slots[psrv->ps_slotno] = NULL; 846 pr->pr_slots = slots; 847 mtx_unlock(&pr->pr_mtx); 848 if (oldslots != NULL) 849 free(oldslots, M_PRISON); 850 } 851 /* 852 * Call 'create' method for each existing jail. 853 */ 854 psrv->ps_create(psrv, pr); 855 } 856 sx_sunlock(&allprison_lock); 857 858 return (psrv); 859 } 860 861 void 862 prison_service_deregister(struct prison_service *psrv) 863 { 864 struct prison *pr; 865 void **slots, **oldslots; 866 int last = 0; 867 868 sx_xlock(&allprison_lock); 869 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 870 last = 1; 871 TAILQ_REMOVE(&prison_services, psrv, ps_next); 872 prison_service_slots--; 873 sx_downgrade(&allprison_lock); 874 LIST_FOREACH(pr, &allprison, pr_list) { 875 /* 876 * Call 'destroy' method for every currently existing jail. 877 */ 878 psrv->ps_destroy(psrv, pr); 879 /* 880 * If this is the last slot, free the memory allocated for it. 881 */ 882 if (last) { 883 if (prison_service_slots == 0) 884 slots = NULL; 885 else { 886 slots = malloc(sizeof(*slots) * prison_service_slots, 887 M_PRISON, M_WAITOK); 888 } 889 mtx_lock(&pr->pr_mtx); 890 oldslots = pr->pr_slots; 891 /* 892 * We require setting slot to NULL after freeing it, 893 * this way we can check for memory leaks here. 894 */ 895 KASSERT(oldslots[psrv->ps_slotno] == NULL, 896 ("Slot %d (service %s, jailid=%d) still contains data?", 897 psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 898 if (psrv->ps_slotno > 0) { 899 bcopy(oldslots, slots, 900 sizeof(*slots) * prison_service_slots); 901 } 902 pr->pr_slots = slots; 903 mtx_unlock(&pr->pr_mtx); 904 KASSERT(oldslots != NULL, ("oldslots == NULL")); 905 free(oldslots, M_PRISON); 906 } 907 } 908 sx_sunlock(&allprison_lock); 909 free(psrv, M_PRISON); 910 } 911 912 /* 913 * Function sets data for the given jail in slot assigned for the given 914 * jail service. 915 */ 916 void 917 prison_service_data_set(struct prison_service *psrv, struct prison *pr, 918 void *data) 919 { 920 921 mtx_assert(&pr->pr_mtx, MA_OWNED); 922 pr->pr_slots[psrv->ps_slotno] = data; 923 } 924 925 /* 926 * Function clears slots assigned for the given jail service in the given 927 * prison structure and returns current slot data. 928 */ 929 void * 930 prison_service_data_del(struct prison_service *psrv, struct prison *pr) 931 { 932 void *data; 933 934 mtx_assert(&pr->pr_mtx, MA_OWNED); 935 data = pr->pr_slots[psrv->ps_slotno]; 936 pr->pr_slots[psrv->ps_slotno] = NULL; 937 return (data); 938 } 939 940 /* 941 * Function returns current data from the slot assigned to the given jail 942 * service for the given jail. 943 */ 944 void * 945 prison_service_data_get(struct prison_service *psrv, struct prison *pr) 946 { 947 948 mtx_assert(&pr->pr_mtx, MA_OWNED); 949 return (pr->pr_slots[psrv->ps_slotno]); 950 } 951 952 static int 953 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 954 { 955 struct xprison *xp, *sxp; 956 struct prison *pr; 957 int count, error; 958 959 if (jailed(req->td->td_ucred)) 960 return (0); 961 962 sx_slock(&allprison_lock); 963 if ((count = prisoncount) == 0) { 964 sx_sunlock(&allprison_lock); 965 return (0); 966 } 967 968 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 969 970 LIST_FOREACH(pr, &allprison, pr_list) { 971 xp->pr_version = XPRISON_VERSION; 972 xp->pr_id = pr->pr_id; 973 xp->pr_ip = pr->pr_ip; 974 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 975 mtx_lock(&pr->pr_mtx); 976 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 977 mtx_unlock(&pr->pr_mtx); 978 xp++; 979 } 980 sx_sunlock(&allprison_lock); 981 982 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 983 free(sxp, M_TEMP); 984 return (error); 985 } 986 987 SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 988 NULL, 0, sysctl_jail_list, "S", "List of active jails"); 989 990 static int 991 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 992 { 993 int error, injail; 994 995 injail = jailed(req->td->td_ucred); 996 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 997 998 return (error); 999 } 1000 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 1001 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 1002