1 /*- 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 */ 9 10 #include <sys/cdefs.h> 11 __FBSDID("$FreeBSD$"); 12 13 #include "opt_mac.h" 14 15 #include <sys/param.h> 16 #include <sys/types.h> 17 #include <sys/kernel.h> 18 #include <sys/systm.h> 19 #include <sys/errno.h> 20 #include <sys/sysproto.h> 21 #include <sys/malloc.h> 22 #include <sys/priv.h> 23 #include <sys/proc.h> 24 #include <sys/taskqueue.h> 25 #include <sys/fcntl.h> 26 #include <sys/jail.h> 27 #include <sys/lock.h> 28 #include <sys/mutex.h> 29 #include <sys/sx.h> 30 #include <sys/namei.h> 31 #include <sys/mount.h> 32 #include <sys/queue.h> 33 #include <sys/socket.h> 34 #include <sys/syscallsubr.h> 35 #include <sys/sysctl.h> 36 #include <sys/vnode.h> 37 #include <net/if.h> 38 #include <netinet/in.h> 39 40 #include <security/mac/mac_framework.h> 41 42 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 43 44 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 45 "Jail rules"); 46 47 int jail_set_hostname_allowed = 1; 48 SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, 49 &jail_set_hostname_allowed, 0, 50 "Processes in jail can set their hostnames"); 51 52 int jail_socket_unixiproute_only = 1; 53 SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, 54 &jail_socket_unixiproute_only, 0, 55 "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); 56 57 int jail_sysvipc_allowed = 0; 58 SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, 59 &jail_sysvipc_allowed, 0, 60 "Processes in jail can use System V IPC primitives"); 61 62 static int jail_enforce_statfs = 2; 63 SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, 64 &jail_enforce_statfs, 0, 65 "Processes in jail cannot see all mounted file systems"); 66 67 int jail_allow_raw_sockets = 0; 68 SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, 69 &jail_allow_raw_sockets, 0, 70 "Prison root can create raw sockets"); 71 72 int jail_chflags_allowed = 0; 73 SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, 74 &jail_chflags_allowed, 0, 75 "Processes in jail can alter system file flags"); 76 77 int jail_mount_allowed = 0; 78 SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, 79 &jail_mount_allowed, 0, 80 "Processes in jail can mount/unmount jail-friendly file systems"); 81 82 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */ 83 struct prisonlist allprison; 84 struct sx allprison_lock; 85 int lastprid = 0; 86 int prisoncount = 0; 87 88 /* 89 * List of jail services. Protected by allprison_lock. 90 */ 91 TAILQ_HEAD(prison_services_head, prison_service); 92 static struct prison_services_head prison_services = 93 TAILQ_HEAD_INITIALIZER(prison_services); 94 static int prison_service_slots = 0; 95 96 struct prison_service { 97 prison_create_t ps_create; 98 prison_destroy_t ps_destroy; 99 int ps_slotno; 100 TAILQ_ENTRY(prison_service) ps_next; 101 char ps_name[0]; 102 }; 103 104 static void init_prison(void *); 105 static void prison_complete(void *context, int pending); 106 static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); 107 108 static void 109 init_prison(void *data __unused) 110 { 111 112 sx_init(&allprison_lock, "allprison"); 113 LIST_INIT(&allprison); 114 } 115 116 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); 117 118 /* 119 * struct jail_args { 120 * struct jail *jail; 121 * }; 122 */ 123 int 124 jail(struct thread *td, struct jail_args *uap) 125 { 126 struct nameidata nd; 127 struct prison *pr, *tpr; 128 struct prison_service *psrv; 129 struct jail j; 130 struct jail_attach_args jaa; 131 int vfslocked, error, tryprid; 132 133 error = copyin(uap->jail, &j, sizeof(j)); 134 if (error) 135 return (error); 136 if (j.version != 0) 137 return (EINVAL); 138 139 MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 140 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); 141 pr->pr_ref = 1; 142 error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); 143 if (error) 144 goto e_killmtx; 145 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, 146 pr->pr_path, td); 147 error = namei(&nd); 148 if (error) 149 goto e_killmtx; 150 vfslocked = NDHASGIANT(&nd); 151 pr->pr_root = nd.ni_vp; 152 VOP_UNLOCK(nd.ni_vp, 0); 153 NDFREE(&nd, NDF_ONLY_PNBUF); 154 VFS_UNLOCK_GIANT(vfslocked); 155 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); 156 if (error) 157 goto e_dropvnref; 158 pr->pr_ip = j.ip_number; 159 pr->pr_linux = NULL; 160 pr->pr_securelevel = securelevel; 161 if (prison_service_slots == 0) 162 pr->pr_slots = NULL; 163 else { 164 pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, 165 M_PRISON, M_ZERO | M_WAITOK); 166 } 167 168 /* Determine next pr_id and add prison to allprison list. */ 169 sx_xlock(&allprison_lock); 170 tryprid = lastprid + 1; 171 if (tryprid == JAIL_MAX) 172 tryprid = 1; 173 next: 174 LIST_FOREACH(tpr, &allprison, pr_list) { 175 if (tpr->pr_id == tryprid) { 176 tryprid++; 177 if (tryprid == JAIL_MAX) { 178 sx_xunlock(&allprison_lock); 179 error = EAGAIN; 180 goto e_dropvnref; 181 } 182 goto next; 183 } 184 } 185 pr->pr_id = jaa.jid = lastprid = tryprid; 186 LIST_INSERT_HEAD(&allprison, pr, pr_list); 187 prisoncount++; 188 sx_downgrade(&allprison_lock); 189 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 190 psrv->ps_create(psrv, pr); 191 } 192 sx_sunlock(&allprison_lock); 193 194 error = jail_attach(td, &jaa); 195 if (error) 196 goto e_dropprref; 197 mtx_lock(&pr->pr_mtx); 198 pr->pr_ref--; 199 mtx_unlock(&pr->pr_mtx); 200 td->td_retval[0] = jaa.jid; 201 return (0); 202 e_dropprref: 203 sx_xlock(&allprison_lock); 204 LIST_REMOVE(pr, pr_list); 205 prisoncount--; 206 sx_downgrade(&allprison_lock); 207 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 208 psrv->ps_destroy(psrv, pr); 209 } 210 sx_sunlock(&allprison_lock); 211 e_dropvnref: 212 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 213 vrele(pr->pr_root); 214 VFS_UNLOCK_GIANT(vfslocked); 215 e_killmtx: 216 mtx_destroy(&pr->pr_mtx); 217 FREE(pr, M_PRISON); 218 return (error); 219 } 220 221 /* 222 * struct jail_attach_args { 223 * int jid; 224 * }; 225 */ 226 int 227 jail_attach(struct thread *td, struct jail_attach_args *uap) 228 { 229 struct proc *p; 230 struct ucred *newcred, *oldcred; 231 struct prison *pr; 232 int vfslocked, error; 233 234 /* 235 * XXX: Note that there is a slight race here if two threads 236 * in the same privileged process attempt to attach to two 237 * different jails at the same time. It is important for 238 * user processes not to do this, or they might end up with 239 * a process root from one prison, but attached to the jail 240 * of another. 241 */ 242 error = priv_check(td, PRIV_JAIL_ATTACH); 243 if (error) 244 return (error); 245 246 p = td->td_proc; 247 sx_slock(&allprison_lock); 248 pr = prison_find(uap->jid); 249 if (pr == NULL) { 250 sx_sunlock(&allprison_lock); 251 return (EINVAL); 252 } 253 pr->pr_ref++; 254 mtx_unlock(&pr->pr_mtx); 255 sx_sunlock(&allprison_lock); 256 257 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 258 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 259 if ((error = change_dir(pr->pr_root, td)) != 0) 260 goto e_unlock; 261 #ifdef MAC 262 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 263 goto e_unlock; 264 #endif 265 VOP_UNLOCK(pr->pr_root, 0); 266 change_root(pr->pr_root, td); 267 VFS_UNLOCK_GIANT(vfslocked); 268 269 newcred = crget(); 270 PROC_LOCK(p); 271 oldcred = p->p_ucred; 272 setsugid(p); 273 crcopy(newcred, oldcred); 274 newcred->cr_prison = pr; 275 p->p_ucred = newcred; 276 PROC_UNLOCK(p); 277 crfree(oldcred); 278 return (0); 279 e_unlock: 280 VOP_UNLOCK(pr->pr_root, 0); 281 VFS_UNLOCK_GIANT(vfslocked); 282 mtx_lock(&pr->pr_mtx); 283 pr->pr_ref--; 284 mtx_unlock(&pr->pr_mtx); 285 return (error); 286 } 287 288 /* 289 * Returns a locked prison instance, or NULL on failure. 290 */ 291 struct prison * 292 prison_find(int prid) 293 { 294 struct prison *pr; 295 296 sx_assert(&allprison_lock, SX_LOCKED); 297 LIST_FOREACH(pr, &allprison, pr_list) { 298 if (pr->pr_id == prid) { 299 mtx_lock(&pr->pr_mtx); 300 if (pr->pr_ref == 0) { 301 mtx_unlock(&pr->pr_mtx); 302 break; 303 } 304 return (pr); 305 } 306 } 307 return (NULL); 308 } 309 310 void 311 prison_free(struct prison *pr) 312 { 313 314 mtx_lock(&pr->pr_mtx); 315 pr->pr_ref--; 316 if (pr->pr_ref == 0) { 317 mtx_unlock(&pr->pr_mtx); 318 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 319 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 320 return; 321 } 322 mtx_unlock(&pr->pr_mtx); 323 } 324 325 static void 326 prison_complete(void *context, int pending) 327 { 328 struct prison_service *psrv; 329 struct prison *pr; 330 int vfslocked; 331 332 pr = (struct prison *)context; 333 334 sx_xlock(&allprison_lock); 335 LIST_REMOVE(pr, pr_list); 336 prisoncount--; 337 sx_downgrade(&allprison_lock); 338 TAILQ_FOREACH(psrv, &prison_services, ps_next) { 339 psrv->ps_destroy(psrv, pr); 340 } 341 sx_sunlock(&allprison_lock); 342 343 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 344 vrele(pr->pr_root); 345 VFS_UNLOCK_GIANT(vfslocked); 346 347 mtx_destroy(&pr->pr_mtx); 348 if (pr->pr_linux != NULL) 349 FREE(pr->pr_linux, M_PRISON); 350 FREE(pr, M_PRISON); 351 } 352 353 void 354 prison_hold(struct prison *pr) 355 { 356 357 mtx_lock(&pr->pr_mtx); 358 KASSERT(pr->pr_ref > 0, 359 ("Trying to hold dead prison (id=%d).", pr->pr_id)); 360 pr->pr_ref++; 361 mtx_unlock(&pr->pr_mtx); 362 } 363 364 u_int32_t 365 prison_getip(struct ucred *cred) 366 { 367 368 return (cred->cr_prison->pr_ip); 369 } 370 371 int 372 prison_ip(struct ucred *cred, int flag, u_int32_t *ip) 373 { 374 u_int32_t tmp; 375 376 if (!jailed(cred)) 377 return (0); 378 if (flag) 379 tmp = *ip; 380 else 381 tmp = ntohl(*ip); 382 if (tmp == INADDR_ANY) { 383 if (flag) 384 *ip = cred->cr_prison->pr_ip; 385 else 386 *ip = htonl(cred->cr_prison->pr_ip); 387 return (0); 388 } 389 if (tmp == INADDR_LOOPBACK) { 390 if (flag) 391 *ip = cred->cr_prison->pr_ip; 392 else 393 *ip = htonl(cred->cr_prison->pr_ip); 394 return (0); 395 } 396 if (cred->cr_prison->pr_ip != tmp) 397 return (1); 398 return (0); 399 } 400 401 void 402 prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) 403 { 404 u_int32_t tmp; 405 406 if (!jailed(cred)) 407 return; 408 if (flag) 409 tmp = *ip; 410 else 411 tmp = ntohl(*ip); 412 if (tmp == INADDR_LOOPBACK) { 413 if (flag) 414 *ip = cred->cr_prison->pr_ip; 415 else 416 *ip = htonl(cred->cr_prison->pr_ip); 417 return; 418 } 419 return; 420 } 421 422 int 423 prison_if(struct ucred *cred, struct sockaddr *sa) 424 { 425 struct sockaddr_in *sai; 426 int ok; 427 428 sai = (struct sockaddr_in *)sa; 429 if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) 430 ok = 1; 431 else if (sai->sin_family != AF_INET) 432 ok = 0; 433 else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) 434 ok = 1; 435 else 436 ok = 0; 437 return (ok); 438 } 439 440 /* 441 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 442 */ 443 int 444 prison_check(struct ucred *cred1, struct ucred *cred2) 445 { 446 447 if (jailed(cred1)) { 448 if (!jailed(cred2)) 449 return (ESRCH); 450 if (cred2->cr_prison != cred1->cr_prison) 451 return (ESRCH); 452 } 453 454 return (0); 455 } 456 457 /* 458 * Return 1 if the passed credential is in a jail, otherwise 0. 459 */ 460 int 461 jailed(struct ucred *cred) 462 { 463 464 return (cred->cr_prison != NULL); 465 } 466 467 /* 468 * Return the correct hostname for the passed credential. 469 */ 470 void 471 getcredhostname(struct ucred *cred, char *buf, size_t size) 472 { 473 474 if (jailed(cred)) { 475 mtx_lock(&cred->cr_prison->pr_mtx); 476 strlcpy(buf, cred->cr_prison->pr_host, size); 477 mtx_unlock(&cred->cr_prison->pr_mtx); 478 } else 479 strlcpy(buf, hostname, size); 480 } 481 482 /* 483 * Determine whether the subject represented by cred can "see" 484 * status of a mount point. 485 * Returns: 0 for permitted, ENOENT otherwise. 486 * XXX: This function should be called cr_canseemount() and should be 487 * placed in kern_prot.c. 488 */ 489 int 490 prison_canseemount(struct ucred *cred, struct mount *mp) 491 { 492 struct prison *pr; 493 struct statfs *sp; 494 size_t len; 495 496 if (!jailed(cred) || jail_enforce_statfs == 0) 497 return (0); 498 pr = cred->cr_prison; 499 if (pr->pr_root->v_mount == mp) 500 return (0); 501 if (jail_enforce_statfs == 2) 502 return (ENOENT); 503 /* 504 * If jail's chroot directory is set to "/" we should be able to see 505 * all mount-points from inside a jail. 506 * This is ugly check, but this is the only situation when jail's 507 * directory ends with '/'. 508 */ 509 if (strcmp(pr->pr_path, "/") == 0) 510 return (0); 511 len = strlen(pr->pr_path); 512 sp = &mp->mnt_stat; 513 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 514 return (ENOENT); 515 /* 516 * Be sure that we don't have situation where jail's root directory 517 * is "/some/path" and mount point is "/some/pathpath". 518 */ 519 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 520 return (ENOENT); 521 return (0); 522 } 523 524 void 525 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 526 { 527 char jpath[MAXPATHLEN]; 528 struct prison *pr; 529 size_t len; 530 531 if (!jailed(cred) || jail_enforce_statfs == 0) 532 return; 533 pr = cred->cr_prison; 534 if (prison_canseemount(cred, mp) != 0) { 535 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 536 strlcpy(sp->f_mntonname, "[restricted]", 537 sizeof(sp->f_mntonname)); 538 return; 539 } 540 if (pr->pr_root->v_mount == mp) { 541 /* 542 * Clear current buffer data, so we are sure nothing from 543 * the valid path left there. 544 */ 545 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 546 *sp->f_mntonname = '/'; 547 return; 548 } 549 /* 550 * If jail's chroot directory is set to "/" we should be able to see 551 * all mount-points from inside a jail. 552 */ 553 if (strcmp(pr->pr_path, "/") == 0) 554 return; 555 len = strlen(pr->pr_path); 556 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 557 /* 558 * Clear current buffer data, so we are sure nothing from 559 * the valid path left there. 560 */ 561 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 562 if (*jpath == '\0') { 563 /* Should never happen. */ 564 *sp->f_mntonname = '/'; 565 } else { 566 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 567 } 568 } 569 570 /* 571 * Check with permission for a specific privilege is granted within jail. We 572 * have a specific list of accepted privileges; the rest are denied. 573 */ 574 int 575 prison_priv_check(struct ucred *cred, int priv) 576 { 577 578 if (!jailed(cred)) 579 return (0); 580 581 switch (priv) { 582 583 /* 584 * Allow ktrace privileges for root in jail. 585 */ 586 case PRIV_KTRACE: 587 588 #if 0 589 /* 590 * Allow jailed processes to configure audit identity and 591 * submit audit records (login, etc). In the future we may 592 * want to further refine the relationship between audit and 593 * jail. 594 */ 595 case PRIV_AUDIT_GETAUDIT: 596 case PRIV_AUDIT_SETAUDIT: 597 case PRIV_AUDIT_SUBMIT: 598 #endif 599 600 /* 601 * Allow jailed processes to manipulate process UNIX 602 * credentials in any way they see fit. 603 */ 604 case PRIV_CRED_SETUID: 605 case PRIV_CRED_SETEUID: 606 case PRIV_CRED_SETGID: 607 case PRIV_CRED_SETEGID: 608 case PRIV_CRED_SETGROUPS: 609 case PRIV_CRED_SETREUID: 610 case PRIV_CRED_SETREGID: 611 case PRIV_CRED_SETRESUID: 612 case PRIV_CRED_SETRESGID: 613 614 /* 615 * Jail implements visibility constraints already, so allow 616 * jailed root to override uid/gid-based constraints. 617 */ 618 case PRIV_SEEOTHERGIDS: 619 case PRIV_SEEOTHERUIDS: 620 621 /* 622 * Jail implements inter-process debugging limits already, so 623 * allow jailed root various debugging privileges. 624 */ 625 case PRIV_DEBUG_DIFFCRED: 626 case PRIV_DEBUG_SUGID: 627 case PRIV_DEBUG_UNPRIV: 628 629 /* 630 * Allow jail to set various resource limits and login 631 * properties, and for now, exceed process resource limits. 632 */ 633 case PRIV_PROC_LIMIT: 634 case PRIV_PROC_SETLOGIN: 635 case PRIV_PROC_SETRLIMIT: 636 637 /* 638 * System V and POSIX IPC privileges are granted in jail. 639 */ 640 case PRIV_IPC_READ: 641 case PRIV_IPC_WRITE: 642 case PRIV_IPC_ADMIN: 643 case PRIV_IPC_MSGSIZE: 644 case PRIV_MQ_ADMIN: 645 646 /* 647 * Jail implements its own inter-process limits, so allow 648 * root processes in jail to change scheduling on other 649 * processes in the same jail. Likewise for signalling. 650 */ 651 case PRIV_SCHED_DIFFCRED: 652 case PRIV_SIGNAL_DIFFCRED: 653 case PRIV_SIGNAL_SUGID: 654 655 /* 656 * Allow jailed processes to write to sysctls marked as jail 657 * writable. 658 */ 659 case PRIV_SYSCTL_WRITEJAIL: 660 661 /* 662 * Allow root in jail to manage a variety of quota 663 * properties. These should likely be conditional on a 664 * configuration option. 665 */ 666 case PRIV_VFS_GETQUOTA: 667 case PRIV_VFS_SETQUOTA: 668 669 /* 670 * Since Jail relies on chroot() to implement file system 671 * protections, grant many VFS privileges to root in jail. 672 * Be careful to exclude mount-related and NFS-related 673 * privileges. 674 */ 675 case PRIV_VFS_READ: 676 case PRIV_VFS_WRITE: 677 case PRIV_VFS_ADMIN: 678 case PRIV_VFS_EXEC: 679 case PRIV_VFS_LOOKUP: 680 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 681 case PRIV_VFS_CHFLAGS_DEV: 682 case PRIV_VFS_CHOWN: 683 case PRIV_VFS_CHROOT: 684 case PRIV_VFS_RETAINSUGID: 685 case PRIV_VFS_FCHROOT: 686 case PRIV_VFS_LINK: 687 case PRIV_VFS_SETGID: 688 case PRIV_VFS_STAT: 689 case PRIV_VFS_STICKYFILE: 690 return (0); 691 692 /* 693 * Depending on the global setting, allow privilege of 694 * setting system flags. 695 */ 696 case PRIV_VFS_SYSFLAGS: 697 if (jail_chflags_allowed) 698 return (0); 699 else 700 return (EPERM); 701 702 /* 703 * Depending on the global setting, allow privilege of 704 * mounting/unmounting file systems. 705 */ 706 case PRIV_VFS_MOUNT: 707 case PRIV_VFS_UNMOUNT: 708 case PRIV_VFS_MOUNT_NONUSER: 709 case PRIV_VFS_MOUNT_OWNER: 710 if (jail_mount_allowed) 711 return (0); 712 else 713 return (EPERM); 714 715 /* 716 * Allow jailed root to bind reserved ports and reuse in-use 717 * ports. 718 */ 719 case PRIV_NETINET_RESERVEDPORT: 720 case PRIV_NETINET_REUSEPORT: 721 return (0); 722 723 /* 724 * Allow jailed root to set certian IPv4/6 (option) headers. 725 */ 726 case PRIV_NETINET_SETHDROPTS: 727 return (0); 728 729 /* 730 * Conditionally allow creating raw sockets in jail. 731 */ 732 case PRIV_NETINET_RAW: 733 if (jail_allow_raw_sockets) 734 return (0); 735 else 736 return (EPERM); 737 738 /* 739 * Since jail implements its own visibility limits on netstat 740 * sysctls, allow getcred. This allows identd to work in 741 * jail. 742 */ 743 case PRIV_NETINET_GETCRED: 744 return (0); 745 746 default: 747 /* 748 * In all remaining cases, deny the privilege request. This 749 * includes almost all network privileges, many system 750 * configuration privileges. 751 */ 752 return (EPERM); 753 } 754 } 755 756 /* 757 * Register jail service. Provides 'create' and 'destroy' methods. 758 * 'create' method will be called for every existing jail and all 759 * jails in the future as they beeing created. 760 * 'destroy' method will be called for every jail going away and 761 * for all existing jails at the time of service deregistration. 762 */ 763 struct prison_service * 764 prison_service_register(const char *name, prison_create_t create, 765 prison_destroy_t destroy) 766 { 767 struct prison_service *psrv, *psrv2; 768 struct prison *pr; 769 int reallocate = 1, slotno = 0; 770 void **slots, **oldslots; 771 772 psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, 773 M_WAITOK | M_ZERO); 774 psrv->ps_create = create; 775 psrv->ps_destroy = destroy; 776 strcpy(psrv->ps_name, name); 777 /* 778 * Grab the allprison_lock here, so we won't miss any jail 779 * creation/destruction. 780 */ 781 sx_xlock(&allprison_lock); 782 #ifdef INVARIANTS 783 /* 784 * Verify if service is not already registered. 785 */ 786 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 787 KASSERT(strcmp(psrv2->ps_name, name) != 0, 788 ("jail service %s already registered", name)); 789 } 790 #endif 791 /* 792 * Find free slot. When there is no existing free slot available, 793 * allocate one at the end. 794 */ 795 TAILQ_FOREACH(psrv2, &prison_services, ps_next) { 796 if (psrv2->ps_slotno != slotno) { 797 KASSERT(slotno < psrv2->ps_slotno, 798 ("Invalid slotno (slotno=%d >= ps_slotno=%d", 799 slotno, psrv2->ps_slotno)); 800 /* We found free slot. */ 801 reallocate = 0; 802 break; 803 } 804 slotno++; 805 } 806 psrv->ps_slotno = slotno; 807 /* 808 * Keep the list sorted by slot number. 809 */ 810 if (psrv2 != NULL) { 811 KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); 812 TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); 813 } else { 814 KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); 815 TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); 816 } 817 prison_service_slots++; 818 sx_downgrade(&allprison_lock); 819 /* 820 * Allocate memory for new slot if we didn't found empty one. 821 * Do not use realloc(9), because pr_slots is protected with a mutex, 822 * so we can't sleep. 823 */ 824 LIST_FOREACH(pr, &allprison, pr_list) { 825 if (reallocate) { 826 /* First allocate memory with M_WAITOK. */ 827 slots = malloc(sizeof(*slots) * prison_service_slots, 828 M_PRISON, M_WAITOK); 829 /* Now grab the mutex and replace pr_slots. */ 830 mtx_lock(&pr->pr_mtx); 831 oldslots = pr->pr_slots; 832 if (psrv->ps_slotno > 0) { 833 bcopy(oldslots, slots, 834 sizeof(*slots) * (prison_service_slots - 1)); 835 } 836 slots[psrv->ps_slotno] = NULL; 837 pr->pr_slots = slots; 838 mtx_unlock(&pr->pr_mtx); 839 if (oldslots != NULL) 840 free(oldslots, M_PRISON); 841 } 842 /* 843 * Call 'create' method for each existing jail. 844 */ 845 psrv->ps_create(psrv, pr); 846 } 847 sx_sunlock(&allprison_lock); 848 849 return (psrv); 850 } 851 852 void 853 prison_service_deregister(struct prison_service *psrv) 854 { 855 struct prison *pr; 856 void **slots, **oldslots; 857 int last = 0; 858 859 sx_xlock(&allprison_lock); 860 if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) 861 last = 1; 862 TAILQ_REMOVE(&prison_services, psrv, ps_next); 863 prison_service_slots--; 864 sx_downgrade(&allprison_lock); 865 LIST_FOREACH(pr, &allprison, pr_list) { 866 /* 867 * Call 'destroy' method for every currently existing jail. 868 */ 869 psrv->ps_destroy(psrv, pr); 870 /* 871 * If this is the last slot, free the memory allocated for it. 872 */ 873 if (last) { 874 if (prison_service_slots == 0) 875 slots = NULL; 876 else { 877 slots = malloc(sizeof(*slots) * prison_service_slots, 878 M_PRISON, M_WAITOK); 879 } 880 mtx_lock(&pr->pr_mtx); 881 oldslots = pr->pr_slots; 882 /* 883 * We require setting slot to NULL after freeing it, 884 * this way we can check for memory leaks here. 885 */ 886 KASSERT(oldslots[psrv->ps_slotno] == NULL, 887 ("Slot %d (service %s, jailid=%d) still contains data?", 888 psrv->ps_slotno, psrv->ps_name, pr->pr_id)); 889 if (psrv->ps_slotno > 0) { 890 bcopy(oldslots, slots, 891 sizeof(*slots) * prison_service_slots); 892 } 893 pr->pr_slots = slots; 894 mtx_unlock(&pr->pr_mtx); 895 KASSERT(oldslots != NULL, ("oldslots == NULL")); 896 free(oldslots, M_PRISON); 897 } 898 } 899 sx_sunlock(&allprison_lock); 900 free(psrv, M_PRISON); 901 } 902 903 /* 904 * Function sets data for the given jail in slot assigned for the given 905 * jail service. 906 */ 907 void 908 prison_service_data_set(struct prison_service *psrv, struct prison *pr, 909 void *data) 910 { 911 912 mtx_assert(&pr->pr_mtx, MA_OWNED); 913 pr->pr_slots[psrv->ps_slotno] = data; 914 } 915 916 /* 917 * Function clears slots assigned for the given jail service in the given 918 * prison structure and returns current slot data. 919 */ 920 void * 921 prison_service_data_del(struct prison_service *psrv, struct prison *pr) 922 { 923 void *data; 924 925 mtx_assert(&pr->pr_mtx, MA_OWNED); 926 data = pr->pr_slots[psrv->ps_slotno]; 927 pr->pr_slots[psrv->ps_slotno] = NULL; 928 return (data); 929 } 930 931 /* 932 * Function returns current data from the slot assigned to the given jail 933 * service for the given jail. 934 */ 935 void * 936 prison_service_data_get(struct prison_service *psrv, struct prison *pr) 937 { 938 939 mtx_assert(&pr->pr_mtx, MA_OWNED); 940 return (pr->pr_slots[psrv->ps_slotno]); 941 } 942 943 static int 944 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 945 { 946 struct xprison *xp, *sxp; 947 struct prison *pr; 948 int count, error; 949 950 if (jailed(req->td->td_ucred)) 951 return (0); 952 953 sx_slock(&allprison_lock); 954 if ((count = prisoncount) == 0) { 955 sx_sunlock(&allprison_lock); 956 return (0); 957 } 958 959 sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); 960 961 LIST_FOREACH(pr, &allprison, pr_list) { 962 xp->pr_version = XPRISON_VERSION; 963 xp->pr_id = pr->pr_id; 964 xp->pr_ip = pr->pr_ip; 965 strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); 966 mtx_lock(&pr->pr_mtx); 967 strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); 968 mtx_unlock(&pr->pr_mtx); 969 xp++; 970 } 971 sx_sunlock(&allprison_lock); 972 973 error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); 974 free(sxp, M_TEMP); 975 return (error); 976 } 977 978 SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, 979 NULL, 0, sysctl_jail_list, "S", "List of active jails"); 980 981 static int 982 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 983 { 984 int error, injail; 985 986 injail = jailed(req->td->td_ucred); 987 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 988 989 return (error); 990 } 991 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, 992 NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); 993