1 /*- 2 * Copyright (c) 1999 Poul-Henning Kamp. 3 * Copyright (c) 2008 Bjoern A. Zeeb. 4 * Copyright (c) 2009 James Gritton. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 #include "opt_ddb.h" 34 #include "opt_inet.h" 35 #include "opt_inet6.h" 36 37 #include <sys/param.h> 38 #include <sys/types.h> 39 #include <sys/kernel.h> 40 #include <sys/systm.h> 41 #include <sys/errno.h> 42 #include <sys/sysproto.h> 43 #include <sys/malloc.h> 44 #include <sys/osd.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/taskqueue.h> 48 #include <sys/fcntl.h> 49 #include <sys/jail.h> 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/racct.h> 53 #include <sys/refcount.h> 54 #include <sys/sx.h> 55 #include <sys/sysent.h> 56 #include <sys/namei.h> 57 #include <sys/mount.h> 58 #include <sys/queue.h> 59 #include <sys/socket.h> 60 #include <sys/syscallsubr.h> 61 #include <sys/sysctl.h> 62 #include <sys/vnode.h> 63 64 #include <net/if.h> 65 #include <net/vnet.h> 66 67 #include <netinet/in.h> 68 69 #ifdef DDB 70 #include <ddb/ddb.h> 71 #endif /* DDB */ 72 73 #include <security/mac/mac_framework.h> 74 75 #define DEFAULT_HOSTUUID "00000000-0000-0000-0000-000000000000" 76 77 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 78 static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures"); 79 80 /* Keep struct prison prison0 and some code in kern_jail_set() readable. */ 81 #ifdef INET 82 #ifdef INET6 83 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL|PR_IP6_SADDRSEL 84 #else 85 #define _PR_IP_SADDRSEL PR_IP4_SADDRSEL 86 #endif 87 #else /* !INET */ 88 #ifdef INET6 89 #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL 90 #else 91 #define _PR_IP_SADDRSEL 0 92 #endif 93 #endif 94 95 /* prison0 describes what is "real" about the system. */ 96 struct prison prison0 = { 97 .pr_id = 0, 98 .pr_name = "0", 99 .pr_ref = 1, 100 .pr_uref = 1, 101 .pr_path = "/", 102 .pr_securelevel = -1, 103 .pr_devfs_rsnum = 0, 104 .pr_childmax = JAIL_MAX, 105 .pr_hostuuid = DEFAULT_HOSTUUID, 106 .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), 107 #ifdef VIMAGE 108 .pr_flags = PR_HOST|PR_VNET|_PR_IP_SADDRSEL, 109 #else 110 .pr_flags = PR_HOST|_PR_IP_SADDRSEL, 111 #endif 112 .pr_allow = PR_ALLOW_ALL, 113 }; 114 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); 115 116 /* allprison, allprison_racct and lastprid are protected by allprison_lock. */ 117 struct sx allprison_lock; 118 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); 119 struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); 120 LIST_HEAD(, prison_racct) allprison_racct; 121 int lastprid = 0; 122 123 static int do_jail_attach(struct thread *td, struct prison *pr); 124 static void prison_complete(void *context, int pending); 125 static void prison_deref(struct prison *pr, int flags); 126 static char *prison_path(struct prison *pr1, struct prison *pr2); 127 static void prison_remove_one(struct prison *pr); 128 #ifdef RACCT 129 static void prison_racct_attach(struct prison *pr); 130 static void prison_racct_modify(struct prison *pr); 131 static void prison_racct_detach(struct prison *pr); 132 #endif 133 #ifdef INET 134 static int _prison_check_ip4(const struct prison *, const struct in_addr *); 135 static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); 136 #endif 137 #ifdef INET6 138 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); 139 static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); 140 #endif 141 142 /* Flags for prison_deref */ 143 #define PD_DEREF 0x01 144 #define PD_DEUREF 0x02 145 #define PD_LOCKED 0x04 146 #define PD_LIST_SLOCKED 0x08 147 #define PD_LIST_XLOCKED 0x10 148 149 /* 150 * Parameter names corresponding to PR_* flag values. Size values are for kvm 151 * as we cannot figure out the size of a sparse array, or an array without a 152 * terminating entry. 153 */ 154 static char *pr_flag_names[] = { 155 [0] = "persist", 156 #ifdef INET 157 [7] = "ip4.saddrsel", 158 #endif 159 #ifdef INET6 160 [8] = "ip6.saddrsel", 161 #endif 162 }; 163 const size_t pr_flag_names_size = sizeof(pr_flag_names); 164 165 static char *pr_flag_nonames[] = { 166 [0] = "nopersist", 167 #ifdef INET 168 [7] = "ip4.nosaddrsel", 169 #endif 170 #ifdef INET6 171 [8] = "ip6.nosaddrsel", 172 #endif 173 }; 174 const size_t pr_flag_nonames_size = sizeof(pr_flag_nonames); 175 176 struct jailsys_flags { 177 const char *name; 178 unsigned disable; 179 unsigned new; 180 } pr_flag_jailsys[] = { 181 { "host", 0, PR_HOST }, 182 #ifdef VIMAGE 183 { "vnet", 0, PR_VNET }, 184 #endif 185 #ifdef INET 186 { "ip4", PR_IP4_USER, PR_IP4_USER }, 187 #endif 188 #ifdef INET6 189 { "ip6", PR_IP6_USER, PR_IP6_USER }, 190 #endif 191 }; 192 const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys); 193 194 static char *pr_allow_names[] = { 195 "allow.set_hostname", 196 "allow.sysvipc", 197 "allow.raw_sockets", 198 "allow.chflags", 199 "allow.mount", 200 "allow.quotas", 201 "allow.socket_af", 202 "allow.mount.devfs", 203 "allow.mount.nullfs", 204 "allow.mount.zfs", 205 "allow.mount.procfs", 206 "allow.mount.tmpfs", 207 "allow.mount.fdescfs", 208 }; 209 const size_t pr_allow_names_size = sizeof(pr_allow_names); 210 211 static char *pr_allow_nonames[] = { 212 "allow.noset_hostname", 213 "allow.nosysvipc", 214 "allow.noraw_sockets", 215 "allow.nochflags", 216 "allow.nomount", 217 "allow.noquotas", 218 "allow.nosocket_af", 219 "allow.mount.nodevfs", 220 "allow.mount.nonullfs", 221 "allow.mount.nozfs", 222 "allow.mount.noprocfs", 223 "allow.mount.notmpfs", 224 "allow.mount.nofdescfs", 225 }; 226 const size_t pr_allow_nonames_size = sizeof(pr_allow_nonames); 227 228 #define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 229 #define JAIL_DEFAULT_ENFORCE_STATFS 2 230 #define JAIL_DEFAULT_DEVFS_RSNUM 0 231 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 232 static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 233 static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; 234 #if defined(INET) || defined(INET6) 235 static unsigned jail_max_af_ips = 255; 236 #endif 237 238 /* 239 * Initialize the parts of prison0 that can't be static-initialized with 240 * constants. This is called from proc0_init() after creating thread0 cpuset. 241 */ 242 void 243 prison0_init(void) 244 { 245 246 prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset); 247 prison0.pr_osreldate = osreldate; 248 strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease)); 249 } 250 251 #ifdef INET 252 static int 253 qcmp_v4(const void *ip1, const void *ip2) 254 { 255 in_addr_t iaa, iab; 256 257 /* 258 * We need to compare in HBO here to get the list sorted as expected 259 * by the result of the code. Sorting NBO addresses gives you 260 * interesting results. If you do not understand, do not try. 261 */ 262 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 263 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 264 265 /* 266 * Do not simply return the difference of the two numbers, the int is 267 * not wide enough. 268 */ 269 if (iaa > iab) 270 return (1); 271 else if (iaa < iab) 272 return (-1); 273 else 274 return (0); 275 } 276 #endif 277 278 #ifdef INET6 279 static int 280 qcmp_v6(const void *ip1, const void *ip2) 281 { 282 const struct in6_addr *ia6a, *ia6b; 283 int i, rc; 284 285 ia6a = (const struct in6_addr *)ip1; 286 ia6b = (const struct in6_addr *)ip2; 287 288 rc = 0; 289 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 290 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 291 rc = 1; 292 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 293 rc = -1; 294 } 295 return (rc); 296 } 297 #endif 298 299 /* 300 * struct jail_args { 301 * struct jail *jail; 302 * }; 303 */ 304 int 305 sys_jail(struct thread *td, struct jail_args *uap) 306 { 307 uint32_t version; 308 int error; 309 struct jail j; 310 311 error = copyin(uap->jail, &version, sizeof(uint32_t)); 312 if (error) 313 return (error); 314 315 switch (version) { 316 case 0: 317 { 318 struct jail_v0 j0; 319 320 /* FreeBSD single IPv4 jails. */ 321 bzero(&j, sizeof(struct jail)); 322 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 323 if (error) 324 return (error); 325 j.version = j0.version; 326 j.path = j0.path; 327 j.hostname = j0.hostname; 328 j.ip4s = htonl(j0.ip_number); /* jail_v0 is host order */ 329 break; 330 } 331 332 case 1: 333 /* 334 * Version 1 was used by multi-IPv4 jail implementations 335 * that never made it into the official kernel. 336 */ 337 return (EINVAL); 338 339 case 2: /* JAIL_API_VERSION */ 340 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 341 error = copyin(uap->jail, &j, sizeof(struct jail)); 342 if (error) 343 return (error); 344 break; 345 346 default: 347 /* Sci-Fi jails are not supported, sorry. */ 348 return (EINVAL); 349 } 350 return (kern_jail(td, &j)); 351 } 352 353 int 354 kern_jail(struct thread *td, struct jail *j) 355 { 356 struct iovec optiov[2 * (4 357 + sizeof(pr_allow_names) / sizeof(pr_allow_names[0]) 358 #ifdef INET 359 + 1 360 #endif 361 #ifdef INET6 362 + 1 363 #endif 364 )]; 365 struct uio opt; 366 char *u_path, *u_hostname, *u_name; 367 #ifdef INET 368 uint32_t ip4s; 369 struct in_addr *u_ip4; 370 #endif 371 #ifdef INET6 372 struct in6_addr *u_ip6; 373 #endif 374 size_t tmplen; 375 int error, enforce_statfs, fi; 376 377 bzero(&optiov, sizeof(optiov)); 378 opt.uio_iov = optiov; 379 opt.uio_iovcnt = 0; 380 opt.uio_offset = -1; 381 opt.uio_resid = -1; 382 opt.uio_segflg = UIO_SYSSPACE; 383 opt.uio_rw = UIO_READ; 384 opt.uio_td = td; 385 386 /* Set permissions for top-level jails from sysctls. */ 387 if (!jailed(td->td_ucred)) { 388 for (fi = 0; fi < sizeof(pr_allow_names) / 389 sizeof(pr_allow_names[0]); fi++) { 390 optiov[opt.uio_iovcnt].iov_base = 391 (jail_default_allow & (1 << fi)) 392 ? pr_allow_names[fi] : pr_allow_nonames[fi]; 393 optiov[opt.uio_iovcnt].iov_len = 394 strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 395 opt.uio_iovcnt += 2; 396 } 397 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 398 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 399 opt.uio_iovcnt++; 400 enforce_statfs = jail_default_enforce_statfs; 401 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 402 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 403 opt.uio_iovcnt++; 404 } 405 406 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 407 #ifdef INET 408 ip4s = (j->version == 0) ? 1 : j->ip4s; 409 if (ip4s > jail_max_af_ips) 410 return (EINVAL); 411 tmplen += ip4s * sizeof(struct in_addr); 412 #else 413 if (j->ip4s > 0) 414 return (EINVAL); 415 #endif 416 #ifdef INET6 417 if (j->ip6s > jail_max_af_ips) 418 return (EINVAL); 419 tmplen += j->ip6s * sizeof(struct in6_addr); 420 #else 421 if (j->ip6s > 0) 422 return (EINVAL); 423 #endif 424 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 425 u_hostname = u_path + MAXPATHLEN; 426 u_name = u_hostname + MAXHOSTNAMELEN; 427 #ifdef INET 428 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 429 #endif 430 #ifdef INET6 431 #ifdef INET 432 u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 433 #else 434 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 435 #endif 436 #endif 437 optiov[opt.uio_iovcnt].iov_base = "path"; 438 optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 439 opt.uio_iovcnt++; 440 optiov[opt.uio_iovcnt].iov_base = u_path; 441 error = copyinstr(j->path, u_path, MAXPATHLEN, 442 &optiov[opt.uio_iovcnt].iov_len); 443 if (error) { 444 free(u_path, M_TEMP); 445 return (error); 446 } 447 opt.uio_iovcnt++; 448 optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 449 optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 450 opt.uio_iovcnt++; 451 optiov[opt.uio_iovcnt].iov_base = u_hostname; 452 error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 453 &optiov[opt.uio_iovcnt].iov_len); 454 if (error) { 455 free(u_path, M_TEMP); 456 return (error); 457 } 458 opt.uio_iovcnt++; 459 if (j->jailname != NULL) { 460 optiov[opt.uio_iovcnt].iov_base = "name"; 461 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 462 opt.uio_iovcnt++; 463 optiov[opt.uio_iovcnt].iov_base = u_name; 464 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 465 &optiov[opt.uio_iovcnt].iov_len); 466 if (error) { 467 free(u_path, M_TEMP); 468 return (error); 469 } 470 opt.uio_iovcnt++; 471 } 472 #ifdef INET 473 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 474 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 475 opt.uio_iovcnt++; 476 optiov[opt.uio_iovcnt].iov_base = u_ip4; 477 optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 478 if (j->version == 0) 479 u_ip4->s_addr = j->ip4s; 480 else { 481 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 482 if (error) { 483 free(u_path, M_TEMP); 484 return (error); 485 } 486 } 487 opt.uio_iovcnt++; 488 #endif 489 #ifdef INET6 490 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 491 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 492 opt.uio_iovcnt++; 493 optiov[opt.uio_iovcnt].iov_base = u_ip6; 494 optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 495 error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 496 if (error) { 497 free(u_path, M_TEMP); 498 return (error); 499 } 500 opt.uio_iovcnt++; 501 #endif 502 KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 503 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 504 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 505 free(u_path, M_TEMP); 506 return (error); 507 } 508 509 510 /* 511 * struct jail_set_args { 512 * struct iovec *iovp; 513 * unsigned int iovcnt; 514 * int flags; 515 * }; 516 */ 517 int 518 sys_jail_set(struct thread *td, struct jail_set_args *uap) 519 { 520 struct uio *auio; 521 int error; 522 523 /* Check that we have an even number of iovecs. */ 524 if (uap->iovcnt & 1) 525 return (EINVAL); 526 527 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 528 if (error) 529 return (error); 530 error = kern_jail_set(td, auio, uap->flags); 531 free(auio, M_IOV); 532 return (error); 533 } 534 535 int 536 kern_jail_set(struct thread *td, struct uio *optuio, int flags) 537 { 538 struct nameidata nd; 539 #ifdef INET 540 struct in_addr *ip4; 541 #endif 542 #ifdef INET6 543 struct in6_addr *ip6; 544 #endif 545 struct vfsopt *opt; 546 struct vfsoptlist *opts; 547 struct prison *pr, *deadpr, *mypr, *ppr, *tpr; 548 struct vnode *root; 549 char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid; 550 char *g_path, *osrelstr; 551 #if defined(INET) || defined(INET6) 552 struct prison *tppr; 553 void *op; 554 #endif 555 unsigned long hid; 556 size_t namelen, onamelen; 557 int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 558 int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel; 559 int fi, jid, jsys, len, level; 560 int childmax, osreldt, rsnum, slevel; 561 int fullpath_disabled; 562 #if defined(INET) || defined(INET6) 563 int ii, ij; 564 #endif 565 #ifdef INET 566 int ip4s, redo_ip4; 567 #endif 568 #ifdef INET6 569 int ip6s, redo_ip6; 570 #endif 571 uint64_t pr_allow, ch_allow, pr_flags, ch_flags; 572 unsigned tallow; 573 char numbuf[12]; 574 575 error = priv_check(td, PRIV_JAIL_SET); 576 if (!error && (flags & JAIL_ATTACH)) 577 error = priv_check(td, PRIV_JAIL_ATTACH); 578 if (error) 579 return (error); 580 mypr = ppr = td->td_ucred->cr_prison; 581 if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0) 582 return (EPERM); 583 if (flags & ~JAIL_SET_MASK) 584 return (EINVAL); 585 586 /* 587 * Check all the parameters before committing to anything. Not all 588 * errors can be caught early, but we may as well try. Also, this 589 * takes care of some expensive stuff (path lookup) before getting 590 * the allprison lock. 591 * 592 * XXX Jails are not filesystems, and jail parameters are not mount 593 * options. But it makes more sense to re-use the vfsopt code 594 * than duplicate it under a different name. 595 */ 596 error = vfs_buildopts(optuio, &opts); 597 if (error) 598 return (error); 599 #ifdef INET 600 ip4 = NULL; 601 #endif 602 #ifdef INET6 603 ip6 = NULL; 604 #endif 605 g_path = NULL; 606 607 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 608 if (error == ENOENT) 609 jid = 0; 610 else if (error != 0) 611 goto done_free; 612 613 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 614 if (error == ENOENT) 615 gotslevel = 0; 616 else if (error != 0) 617 goto done_free; 618 else 619 gotslevel = 1; 620 621 error = 622 vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax)); 623 if (error == ENOENT) 624 gotchildmax = 0; 625 else if (error != 0) 626 goto done_free; 627 else 628 gotchildmax = 1; 629 630 error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 631 if (error == ENOENT) 632 gotenforce = 0; 633 else if (error != 0) 634 goto done_free; 635 else if (enforce < 0 || enforce > 2) { 636 error = EINVAL; 637 goto done_free; 638 } else 639 gotenforce = 1; 640 641 error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum)); 642 if (error == ENOENT) 643 gotrsnum = 0; 644 else if (error != 0) 645 goto done_free; 646 else 647 gotrsnum = 1; 648 649 pr_flags = ch_flags = 0; 650 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 651 fi++) { 652 if (pr_flag_names[fi] == NULL) 653 continue; 654 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 655 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 656 } 657 ch_flags |= pr_flags; 658 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 659 fi++) { 660 error = vfs_copyopt(opts, pr_flag_jailsys[fi].name, &jsys, 661 sizeof(jsys)); 662 if (error == ENOENT) 663 continue; 664 if (error != 0) 665 goto done_free; 666 switch (jsys) { 667 case JAIL_SYS_DISABLE: 668 if (!pr_flag_jailsys[fi].disable) { 669 error = EINVAL; 670 goto done_free; 671 } 672 pr_flags |= pr_flag_jailsys[fi].disable; 673 break; 674 case JAIL_SYS_NEW: 675 pr_flags |= pr_flag_jailsys[fi].new; 676 break; 677 case JAIL_SYS_INHERIT: 678 break; 679 default: 680 error = EINVAL; 681 goto done_free; 682 } 683 ch_flags |= 684 pr_flag_jailsys[fi].new | pr_flag_jailsys[fi].disable; 685 } 686 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 687 && !(pr_flags & PR_PERSIST)) { 688 error = EINVAL; 689 vfs_opterror(opts, "new jail must persist or attach"); 690 goto done_errmsg; 691 } 692 #ifdef VIMAGE 693 if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) { 694 error = EINVAL; 695 vfs_opterror(opts, "vnet cannot be changed after creation"); 696 goto done_errmsg; 697 } 698 #endif 699 #ifdef INET 700 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) { 701 error = EINVAL; 702 vfs_opterror(opts, "ip4 cannot be changed after creation"); 703 goto done_errmsg; 704 } 705 #endif 706 #ifdef INET6 707 if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) { 708 error = EINVAL; 709 vfs_opterror(opts, "ip6 cannot be changed after creation"); 710 goto done_errmsg; 711 } 712 #endif 713 714 pr_allow = ch_allow = 0; 715 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 716 fi++) { 717 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 718 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 719 } 720 ch_allow |= pr_allow; 721 722 error = vfs_getopt(opts, "name", (void **)&name, &len); 723 if (error == ENOENT) 724 name = NULL; 725 else if (error != 0) 726 goto done_free; 727 else { 728 if (len == 0 || name[len - 1] != '\0') { 729 error = EINVAL; 730 goto done_free; 731 } 732 if (len > MAXHOSTNAMELEN) { 733 error = ENAMETOOLONG; 734 goto done_free; 735 } 736 } 737 738 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 739 if (error == ENOENT) 740 host = NULL; 741 else if (error != 0) 742 goto done_free; 743 else { 744 ch_flags |= PR_HOST; 745 pr_flags |= PR_HOST; 746 if (len == 0 || host[len - 1] != '\0') { 747 error = EINVAL; 748 goto done_free; 749 } 750 if (len > MAXHOSTNAMELEN) { 751 error = ENAMETOOLONG; 752 goto done_free; 753 } 754 } 755 756 error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 757 if (error == ENOENT) 758 domain = NULL; 759 else if (error != 0) 760 goto done_free; 761 else { 762 ch_flags |= PR_HOST; 763 pr_flags |= PR_HOST; 764 if (len == 0 || domain[len - 1] != '\0') { 765 error = EINVAL; 766 goto done_free; 767 } 768 if (len > MAXHOSTNAMELEN) { 769 error = ENAMETOOLONG; 770 goto done_free; 771 } 772 } 773 774 error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 775 if (error == ENOENT) 776 uuid = NULL; 777 else if (error != 0) 778 goto done_free; 779 else { 780 ch_flags |= PR_HOST; 781 pr_flags |= PR_HOST; 782 if (len == 0 || uuid[len - 1] != '\0') { 783 error = EINVAL; 784 goto done_free; 785 } 786 if (len > HOSTUUIDLEN) { 787 error = ENAMETOOLONG; 788 goto done_free; 789 } 790 } 791 792 #ifdef COMPAT_FREEBSD32 793 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 794 uint32_t hid32; 795 796 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 797 hid = hid32; 798 } else 799 #endif 800 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 801 if (error == ENOENT) 802 gothid = 0; 803 else if (error != 0) 804 goto done_free; 805 else { 806 gothid = 1; 807 ch_flags |= PR_HOST; 808 pr_flags |= PR_HOST; 809 } 810 811 #ifdef INET 812 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 813 if (error == ENOENT) 814 ip4s = 0; 815 else if (error != 0) 816 goto done_free; 817 else if (ip4s & (sizeof(*ip4) - 1)) { 818 error = EINVAL; 819 goto done_free; 820 } else { 821 ch_flags |= PR_IP4_USER; 822 pr_flags |= PR_IP4_USER; 823 if (ip4s > 0) { 824 ip4s /= sizeof(*ip4); 825 if (ip4s > jail_max_af_ips) { 826 error = EINVAL; 827 vfs_opterror(opts, "too many IPv4 addresses"); 828 goto done_errmsg; 829 } 830 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 831 bcopy(op, ip4, ip4s * sizeof(*ip4)); 832 /* 833 * IP addresses are all sorted but ip[0] to preserve 834 * the primary IP address as given from userland. 835 * This special IP is used for unbound outgoing 836 * connections as well for "loopback" traffic in case 837 * source address selection cannot find any more fitting 838 * address to connect from. 839 */ 840 if (ip4s > 1) 841 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 842 /* 843 * Check for duplicate addresses and do some simple 844 * zero and broadcast checks. If users give other bogus 845 * addresses it is their problem. 846 * 847 * We do not have to care about byte order for these 848 * checks so we will do them in NBO. 849 */ 850 for (ii = 0; ii < ip4s; ii++) { 851 if (ip4[ii].s_addr == INADDR_ANY || 852 ip4[ii].s_addr == INADDR_BROADCAST) { 853 error = EINVAL; 854 goto done_free; 855 } 856 if ((ii+1) < ip4s && 857 (ip4[0].s_addr == ip4[ii+1].s_addr || 858 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 859 error = EINVAL; 860 goto done_free; 861 } 862 } 863 } 864 } 865 #endif 866 867 #ifdef INET6 868 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 869 if (error == ENOENT) 870 ip6s = 0; 871 else if (error != 0) 872 goto done_free; 873 else if (ip6s & (sizeof(*ip6) - 1)) { 874 error = EINVAL; 875 goto done_free; 876 } else { 877 ch_flags |= PR_IP6_USER; 878 pr_flags |= PR_IP6_USER; 879 if (ip6s > 0) { 880 ip6s /= sizeof(*ip6); 881 if (ip6s > jail_max_af_ips) { 882 error = EINVAL; 883 vfs_opterror(opts, "too many IPv6 addresses"); 884 goto done_errmsg; 885 } 886 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 887 bcopy(op, ip6, ip6s * sizeof(*ip6)); 888 if (ip6s > 1) 889 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 890 for (ii = 0; ii < ip6s; ii++) { 891 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 892 error = EINVAL; 893 goto done_free; 894 } 895 if ((ii+1) < ip6s && 896 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 897 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 898 { 899 error = EINVAL; 900 goto done_free; 901 } 902 } 903 } 904 } 905 #endif 906 907 #if defined(VIMAGE) && (defined(INET) || defined(INET6)) 908 if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 909 error = EINVAL; 910 vfs_opterror(opts, 911 "vnet jails cannot have IP address restrictions"); 912 goto done_errmsg; 913 } 914 #endif 915 916 fullpath_disabled = 0; 917 root = NULL; 918 error = vfs_getopt(opts, "path", (void **)&path, &len); 919 if (error == ENOENT) 920 path = NULL; 921 else if (error != 0) 922 goto done_free; 923 else { 924 if (flags & JAIL_UPDATE) { 925 error = EINVAL; 926 vfs_opterror(opts, 927 "path cannot be changed after creation"); 928 goto done_errmsg; 929 } 930 if (len == 0 || path[len - 1] != '\0') { 931 error = EINVAL; 932 goto done_free; 933 } 934 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, 935 path, td); 936 error = namei(&nd); 937 if (error) 938 goto done_free; 939 root = nd.ni_vp; 940 NDFREE(&nd, NDF_ONLY_PNBUF); 941 g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 942 strlcpy(g_path, path, MAXPATHLEN); 943 error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN); 944 if (error == 0) 945 path = g_path; 946 else if (error == ENODEV) { 947 /* proceed if sysctl debug.disablefullpath == 1 */ 948 fullpath_disabled = 1; 949 if (len < 2 || (len == 2 && path[0] == '/')) 950 path = NULL; 951 } else { 952 /* exit on other errors */ 953 goto done_free; 954 } 955 if (root->v_type != VDIR) { 956 error = ENOTDIR; 957 vput(root); 958 goto done_free; 959 } 960 VOP_UNLOCK(root, 0); 961 if (fullpath_disabled) { 962 /* Leave room for a real-root full pathname. */ 963 if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 964 ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 965 error = ENAMETOOLONG; 966 goto done_free; 967 } 968 } 969 } 970 971 error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len); 972 if (error == ENOENT) 973 osrelstr = NULL; 974 else if (error != 0) 975 goto done_free; 976 else { 977 if (flags & JAIL_UPDATE) { 978 error = EINVAL; 979 vfs_opterror(opts, 980 "osrelease cannot be changed after creation"); 981 goto done_errmsg; 982 } 983 if (len == 0 || len >= OSRELEASELEN) { 984 error = EINVAL; 985 vfs_opterror(opts, 986 "osrelease string must be 1-%d bytes long", 987 OSRELEASELEN - 1); 988 goto done_errmsg; 989 } 990 } 991 992 error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt)); 993 if (error == ENOENT) 994 osreldt = 0; 995 else if (error != 0) 996 goto done_free; 997 else { 998 if (flags & JAIL_UPDATE) { 999 error = EINVAL; 1000 vfs_opterror(opts, 1001 "osreldate cannot be changed after creation"); 1002 goto done_errmsg; 1003 } 1004 if (osreldt == 0) { 1005 error = EINVAL; 1006 vfs_opterror(opts, "osreldate cannot be 0"); 1007 goto done_errmsg; 1008 } 1009 } 1010 1011 /* 1012 * Grab the allprison lock before letting modules check their 1013 * parameters. Once we have it, do not let go so we'll have a 1014 * consistent view of the OSD list. 1015 */ 1016 sx_xlock(&allprison_lock); 1017 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 1018 if (error) 1019 goto done_unlock_list; 1020 1021 /* By now, all parameters should have been noted. */ 1022 TAILQ_FOREACH(opt, opts, link) { 1023 if (!opt->seen && strcmp(opt->name, "errmsg")) { 1024 error = EINVAL; 1025 vfs_opterror(opts, "unknown parameter: %s", opt->name); 1026 goto done_unlock_list; 1027 } 1028 } 1029 1030 /* 1031 * See if we are creating a new record or updating an existing one. 1032 * This abuses the file error codes ENOENT and EEXIST. 1033 */ 1034 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 1035 if (!cuflags) { 1036 error = EINVAL; 1037 vfs_opterror(opts, "no valid operation (create or update)"); 1038 goto done_unlock_list; 1039 } 1040 pr = NULL; 1041 namelc = NULL; 1042 if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) { 1043 namelc = strrchr(name, '.'); 1044 jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10); 1045 if (*p != '\0') 1046 jid = 0; 1047 } 1048 if (jid != 0) { 1049 /* 1050 * See if a requested jid already exists. There is an 1051 * information leak here if the jid exists but is not within 1052 * the caller's jail hierarchy. Jail creators will get EEXIST 1053 * even though they cannot see the jail, and CREATE | UPDATE 1054 * will return ENOENT which is not normally a valid error. 1055 */ 1056 if (jid < 0) { 1057 error = EINVAL; 1058 vfs_opterror(opts, "negative jid"); 1059 goto done_unlock_list; 1060 } 1061 pr = prison_find(jid); 1062 if (pr != NULL) { 1063 ppr = pr->pr_parent; 1064 /* Create: jid must not exist. */ 1065 if (cuflags == JAIL_CREATE) { 1066 mtx_unlock(&pr->pr_mtx); 1067 error = EEXIST; 1068 vfs_opterror(opts, "jail %d already exists", 1069 jid); 1070 goto done_unlock_list; 1071 } 1072 if (!prison_ischild(mypr, pr)) { 1073 mtx_unlock(&pr->pr_mtx); 1074 pr = NULL; 1075 } else if (pr->pr_uref == 0) { 1076 if (!(flags & JAIL_DYING)) { 1077 mtx_unlock(&pr->pr_mtx); 1078 error = ENOENT; 1079 vfs_opterror(opts, "jail %d is dying", 1080 jid); 1081 goto done_unlock_list; 1082 } else if ((flags & JAIL_ATTACH) || 1083 (pr_flags & PR_PERSIST)) { 1084 /* 1085 * A dying jail might be resurrected 1086 * (via attach or persist), but first 1087 * it must determine if another jail 1088 * has claimed its name. Accomplish 1089 * this by implicitly re-setting the 1090 * name. 1091 */ 1092 if (name == NULL) 1093 name = prison_name(mypr, pr); 1094 } 1095 } 1096 } 1097 if (pr == NULL) { 1098 /* Update: jid must exist. */ 1099 if (cuflags == JAIL_UPDATE) { 1100 error = ENOENT; 1101 vfs_opterror(opts, "jail %d not found", jid); 1102 goto done_unlock_list; 1103 } 1104 } 1105 } 1106 /* 1107 * If the caller provided a name, look for a jail by that name. 1108 * This has different semantics for creates and updates keyed by jid 1109 * (where the name must not already exist in a different jail), 1110 * and updates keyed by the name itself (where the name must exist 1111 * because that is the jail being updated). 1112 */ 1113 if (name != NULL) { 1114 namelc = strrchr(name, '.'); 1115 if (namelc == NULL) 1116 namelc = name; 1117 else { 1118 /* 1119 * This is a hierarchical name. Split it into the 1120 * parent and child names, and make sure the parent 1121 * exists or matches an already found jail. 1122 */ 1123 *namelc = '\0'; 1124 if (pr != NULL) { 1125 if (strncmp(name, ppr->pr_name, namelc - name) 1126 || ppr->pr_name[namelc - name] != '\0') { 1127 mtx_unlock(&pr->pr_mtx); 1128 error = EINVAL; 1129 vfs_opterror(opts, 1130 "cannot change jail's parent"); 1131 goto done_unlock_list; 1132 } 1133 } else { 1134 ppr = prison_find_name(mypr, name); 1135 if (ppr == NULL) { 1136 error = ENOENT; 1137 vfs_opterror(opts, 1138 "jail \"%s\" not found", name); 1139 goto done_unlock_list; 1140 } 1141 mtx_unlock(&ppr->pr_mtx); 1142 } 1143 name = ++namelc; 1144 } 1145 if (name[0] != '\0') { 1146 namelen = 1147 (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 1148 name_again: 1149 deadpr = NULL; 1150 FOREACH_PRISON_CHILD(ppr, tpr) { 1151 if (tpr != pr && tpr->pr_ref > 0 && 1152 !strcmp(tpr->pr_name + namelen, name)) { 1153 if (pr == NULL && 1154 cuflags != JAIL_CREATE) { 1155 mtx_lock(&tpr->pr_mtx); 1156 if (tpr->pr_ref > 0) { 1157 /* 1158 * Use this jail 1159 * for updates. 1160 */ 1161 if (tpr->pr_uref > 0) { 1162 pr = tpr; 1163 break; 1164 } 1165 deadpr = tpr; 1166 } 1167 mtx_unlock(&tpr->pr_mtx); 1168 } else if (tpr->pr_uref > 0) { 1169 /* 1170 * Create, or update(jid): 1171 * name must not exist in an 1172 * active sibling jail. 1173 */ 1174 error = EEXIST; 1175 if (pr != NULL) 1176 mtx_unlock(&pr->pr_mtx); 1177 vfs_opterror(opts, 1178 "jail \"%s\" already exists", 1179 name); 1180 goto done_unlock_list; 1181 } 1182 } 1183 } 1184 /* If no active jail is found, use a dying one. */ 1185 if (deadpr != NULL && pr == NULL) { 1186 if (flags & JAIL_DYING) { 1187 mtx_lock(&deadpr->pr_mtx); 1188 if (deadpr->pr_ref == 0) { 1189 mtx_unlock(&deadpr->pr_mtx); 1190 goto name_again; 1191 } 1192 pr = deadpr; 1193 } else if (cuflags == JAIL_UPDATE) { 1194 error = ENOENT; 1195 vfs_opterror(opts, 1196 "jail \"%s\" is dying", name); 1197 goto done_unlock_list; 1198 } 1199 } 1200 /* Update: name must exist if no jid. */ 1201 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1202 error = ENOENT; 1203 vfs_opterror(opts, "jail \"%s\" not found", 1204 name); 1205 goto done_unlock_list; 1206 } 1207 } 1208 } 1209 /* Update: must provide a jid or name. */ 1210 else if (cuflags == JAIL_UPDATE && pr == NULL) { 1211 error = ENOENT; 1212 vfs_opterror(opts, "update specified no jail"); 1213 goto done_unlock_list; 1214 } 1215 1216 /* If there's no prison to update, create a new one and link it in. */ 1217 if (pr == NULL) { 1218 for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent) 1219 if (tpr->pr_childcount >= tpr->pr_childmax) { 1220 error = EPERM; 1221 vfs_opterror(opts, "prison limit exceeded"); 1222 goto done_unlock_list; 1223 } 1224 created = 1; 1225 mtx_lock(&ppr->pr_mtx); 1226 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1227 mtx_unlock(&ppr->pr_mtx); 1228 error = ENOENT; 1229 vfs_opterror(opts, "parent jail went away!"); 1230 goto done_unlock_list; 1231 } 1232 ppr->pr_ref++; 1233 ppr->pr_uref++; 1234 mtx_unlock(&ppr->pr_mtx); 1235 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1236 if (jid == 0) { 1237 /* Find the next free jid. */ 1238 jid = lastprid + 1; 1239 findnext: 1240 if (jid == JAIL_MAX) 1241 jid = 1; 1242 TAILQ_FOREACH(tpr, &allprison, pr_list) { 1243 if (tpr->pr_id < jid) 1244 continue; 1245 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1246 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1247 break; 1248 } 1249 if (jid == lastprid) { 1250 error = EAGAIN; 1251 vfs_opterror(opts, 1252 "no available jail IDs"); 1253 free(pr, M_PRISON); 1254 prison_deref(ppr, PD_DEREF | 1255 PD_DEUREF | PD_LIST_XLOCKED); 1256 goto done_releroot; 1257 } 1258 jid++; 1259 goto findnext; 1260 } 1261 lastprid = jid; 1262 } else { 1263 /* 1264 * The jail already has a jid (that did not yet exist), 1265 * so just find where to insert it. 1266 */ 1267 TAILQ_FOREACH(tpr, &allprison, pr_list) 1268 if (tpr->pr_id >= jid) { 1269 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1270 break; 1271 } 1272 } 1273 if (tpr == NULL) 1274 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1275 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1276 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1277 tpr->pr_childcount++; 1278 1279 pr->pr_parent = ppr; 1280 pr->pr_id = jid; 1281 1282 /* Set some default values, and inherit some from the parent. */ 1283 if (name == NULL) 1284 name = ""; 1285 if (path == NULL) { 1286 path = "/"; 1287 root = mypr->pr_root; 1288 vref(root); 1289 } 1290 strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); 1291 pr->pr_flags |= PR_HOST; 1292 #if defined(INET) || defined(INET6) 1293 #ifdef VIMAGE 1294 if (!(pr_flags & PR_VNET)) 1295 #endif 1296 { 1297 #ifdef INET 1298 if (!(ch_flags & PR_IP4_USER)) 1299 pr->pr_flags |= PR_IP4 | PR_IP4_USER; 1300 else if (!(pr_flags & PR_IP4_USER)) { 1301 pr->pr_flags |= ppr->pr_flags & PR_IP4; 1302 if (ppr->pr_ip4 != NULL) { 1303 pr->pr_ip4s = ppr->pr_ip4s; 1304 pr->pr_ip4 = malloc(pr->pr_ip4s * 1305 sizeof(struct in_addr), M_PRISON, 1306 M_WAITOK); 1307 bcopy(ppr->pr_ip4, pr->pr_ip4, 1308 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1309 } 1310 } 1311 #endif 1312 #ifdef INET6 1313 if (!(ch_flags & PR_IP6_USER)) 1314 pr->pr_flags |= PR_IP6 | PR_IP6_USER; 1315 else if (!(pr_flags & PR_IP6_USER)) { 1316 pr->pr_flags |= ppr->pr_flags & PR_IP6; 1317 if (ppr->pr_ip6 != NULL) { 1318 pr->pr_ip6s = ppr->pr_ip6s; 1319 pr->pr_ip6 = malloc(pr->pr_ip6s * 1320 sizeof(struct in6_addr), M_PRISON, 1321 M_WAITOK); 1322 bcopy(ppr->pr_ip6, pr->pr_ip6, 1323 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1324 } 1325 } 1326 #endif 1327 } 1328 #endif 1329 /* Source address selection is always on by default. */ 1330 pr->pr_flags |= _PR_IP_SADDRSEL; 1331 1332 pr->pr_securelevel = ppr->pr_securelevel; 1333 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1334 pr->pr_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; 1335 pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum; 1336 1337 pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate; 1338 if (osrelstr == NULL) 1339 strcpy(pr->pr_osrelease, ppr->pr_osrelease); 1340 else 1341 strcpy(pr->pr_osrelease, osrelstr); 1342 1343 LIST_INIT(&pr->pr_children); 1344 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1345 1346 #ifdef VIMAGE 1347 /* Allocate a new vnet if specified. */ 1348 pr->pr_vnet = (pr_flags & PR_VNET) 1349 ? vnet_alloc() : ppr->pr_vnet; 1350 #endif 1351 /* 1352 * Allocate a dedicated cpuset for each jail. 1353 * Unlike other initial settings, this may return an erorr. 1354 */ 1355 error = cpuset_create_root(ppr, &pr->pr_cpuset); 1356 if (error) { 1357 prison_deref(pr, PD_LIST_XLOCKED); 1358 goto done_releroot; 1359 } 1360 1361 mtx_lock(&pr->pr_mtx); 1362 /* 1363 * New prisons do not yet have a reference, because we do not 1364 * want other to see the incomplete prison once the 1365 * allprison_lock is downgraded. 1366 */ 1367 } else { 1368 created = 0; 1369 /* 1370 * Grab a reference for existing prisons, to ensure they 1371 * continue to exist for the duration of the call. 1372 */ 1373 pr->pr_ref++; 1374 #if defined(VIMAGE) && (defined(INET) || defined(INET6)) 1375 if ((pr->pr_flags & PR_VNET) && 1376 (ch_flags & (PR_IP4_USER | PR_IP6_USER))) { 1377 error = EINVAL; 1378 vfs_opterror(opts, 1379 "vnet jails cannot have IP address restrictions"); 1380 goto done_deref_locked; 1381 } 1382 #endif 1383 #ifdef INET 1384 if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1385 error = EINVAL; 1386 vfs_opterror(opts, 1387 "ip4 cannot be changed after creation"); 1388 goto done_deref_locked; 1389 } 1390 #endif 1391 #ifdef INET6 1392 if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) { 1393 error = EINVAL; 1394 vfs_opterror(opts, 1395 "ip6 cannot be changed after creation"); 1396 goto done_deref_locked; 1397 } 1398 #endif 1399 } 1400 1401 /* Do final error checking before setting anything. */ 1402 if (gotslevel) { 1403 if (slevel < ppr->pr_securelevel) { 1404 error = EPERM; 1405 goto done_deref_locked; 1406 } 1407 } 1408 if (gotchildmax) { 1409 if (childmax >= ppr->pr_childmax) { 1410 error = EPERM; 1411 goto done_deref_locked; 1412 } 1413 } 1414 if (gotenforce) { 1415 if (enforce < ppr->pr_enforce_statfs) { 1416 error = EPERM; 1417 goto done_deref_locked; 1418 } 1419 } 1420 if (gotrsnum) { 1421 /* 1422 * devfs_rsnum is a uint16_t 1423 */ 1424 if (rsnum < 0 || rsnum > 65535) { 1425 error = EINVAL; 1426 goto done_deref_locked; 1427 } 1428 /* 1429 * Nested jails always inherit parent's devfs ruleset 1430 */ 1431 if (jailed(td->td_ucred)) { 1432 if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) { 1433 error = EPERM; 1434 goto done_deref_locked; 1435 } else 1436 rsnum = ppr->pr_devfs_rsnum; 1437 } 1438 } 1439 #ifdef INET 1440 if (ip4s > 0) { 1441 if (ppr->pr_flags & PR_IP4) { 1442 /* 1443 * Make sure the new set of IP addresses is a 1444 * subset of the parent's list. Don't worry 1445 * about the parent being unlocked, as any 1446 * setting is done with allprison_lock held. 1447 */ 1448 for (ij = 0; ij < ppr->pr_ip4s; ij++) 1449 if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 1450 break; 1451 if (ij == ppr->pr_ip4s) { 1452 error = EPERM; 1453 goto done_deref_locked; 1454 } 1455 if (ip4s > 1) { 1456 for (ii = ij = 1; ii < ip4s; ii++) { 1457 if (ip4[ii].s_addr == 1458 ppr->pr_ip4[0].s_addr) 1459 continue; 1460 for (; ij < ppr->pr_ip4s; ij++) 1461 if (ip4[ii].s_addr == 1462 ppr->pr_ip4[ij].s_addr) 1463 break; 1464 if (ij == ppr->pr_ip4s) 1465 break; 1466 } 1467 if (ij == ppr->pr_ip4s) { 1468 error = EPERM; 1469 goto done_deref_locked; 1470 } 1471 } 1472 } 1473 /* 1474 * Check for conflicting IP addresses. We permit them 1475 * if there is no more than one IP on each jail. If 1476 * there is a duplicate on a jail with more than one 1477 * IP stop checking and return error. 1478 */ 1479 tppr = ppr; 1480 #ifdef VIMAGE 1481 for (; tppr != &prison0; tppr = tppr->pr_parent) 1482 if (tppr->pr_flags & PR_VNET) 1483 break; 1484 #endif 1485 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1486 if (tpr == pr || 1487 #ifdef VIMAGE 1488 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1489 #endif 1490 tpr->pr_uref == 0) { 1491 descend = 0; 1492 continue; 1493 } 1494 if (!(tpr->pr_flags & PR_IP4_USER)) 1495 continue; 1496 descend = 0; 1497 if (tpr->pr_ip4 == NULL || 1498 (ip4s == 1 && tpr->pr_ip4s == 1)) 1499 continue; 1500 for (ii = 0; ii < ip4s; ii++) { 1501 if (_prison_check_ip4(tpr, &ip4[ii]) == 0) { 1502 error = EADDRINUSE; 1503 vfs_opterror(opts, 1504 "IPv4 addresses clash"); 1505 goto done_deref_locked; 1506 } 1507 } 1508 } 1509 } 1510 #endif 1511 #ifdef INET6 1512 if (ip6s > 0) { 1513 if (ppr->pr_flags & PR_IP6) { 1514 /* 1515 * Make sure the new set of IP addresses is a 1516 * subset of the parent's list. 1517 */ 1518 for (ij = 0; ij < ppr->pr_ip6s; ij++) 1519 if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1520 &ppr->pr_ip6[ij])) 1521 break; 1522 if (ij == ppr->pr_ip6s) { 1523 error = EPERM; 1524 goto done_deref_locked; 1525 } 1526 if (ip6s > 1) { 1527 for (ii = ij = 1; ii < ip6s; ii++) { 1528 if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1529 &ppr->pr_ip6[0])) 1530 continue; 1531 for (; ij < ppr->pr_ip6s; ij++) 1532 if (IN6_ARE_ADDR_EQUAL( 1533 &ip6[ii], &ppr->pr_ip6[ij])) 1534 break; 1535 if (ij == ppr->pr_ip6s) 1536 break; 1537 } 1538 if (ij == ppr->pr_ip6s) { 1539 error = EPERM; 1540 goto done_deref_locked; 1541 } 1542 } 1543 } 1544 /* Check for conflicting IP addresses. */ 1545 tppr = ppr; 1546 #ifdef VIMAGE 1547 for (; tppr != &prison0; tppr = tppr->pr_parent) 1548 if (tppr->pr_flags & PR_VNET) 1549 break; 1550 #endif 1551 FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) { 1552 if (tpr == pr || 1553 #ifdef VIMAGE 1554 (tpr != tppr && (tpr->pr_flags & PR_VNET)) || 1555 #endif 1556 tpr->pr_uref == 0) { 1557 descend = 0; 1558 continue; 1559 } 1560 if (!(tpr->pr_flags & PR_IP6_USER)) 1561 continue; 1562 descend = 0; 1563 if (tpr->pr_ip6 == NULL || 1564 (ip6s == 1 && tpr->pr_ip6s == 1)) 1565 continue; 1566 for (ii = 0; ii < ip6s; ii++) { 1567 if (_prison_check_ip6(tpr, &ip6[ii]) == 0) { 1568 error = EADDRINUSE; 1569 vfs_opterror(opts, 1570 "IPv6 addresses clash"); 1571 goto done_deref_locked; 1572 } 1573 } 1574 } 1575 } 1576 #endif 1577 onamelen = namelen = 0; 1578 if (name != NULL) { 1579 /* Give a default name of the jid. */ 1580 if (name[0] == '\0') 1581 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1582 else if (*namelc == '0' || (strtoul(namelc, &p, 10) != jid && 1583 *p == '\0')) { 1584 error = EINVAL; 1585 vfs_opterror(opts, 1586 "name cannot be numeric (unless it is the jid)"); 1587 goto done_deref_locked; 1588 } 1589 /* 1590 * Make sure the name isn't too long for the prison or its 1591 * children. 1592 */ 1593 onamelen = strlen(pr->pr_name); 1594 namelen = strlen(name); 1595 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1596 error = ENAMETOOLONG; 1597 goto done_deref_locked; 1598 } 1599 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1600 if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1601 sizeof(pr->pr_name)) { 1602 error = ENAMETOOLONG; 1603 goto done_deref_locked; 1604 } 1605 } 1606 } 1607 if (pr_allow & ~ppr->pr_allow) { 1608 error = EPERM; 1609 goto done_deref_locked; 1610 } 1611 1612 /* Set the parameters of the prison. */ 1613 #ifdef INET 1614 redo_ip4 = 0; 1615 if (pr_flags & PR_IP4_USER) { 1616 pr->pr_flags |= PR_IP4; 1617 free(pr->pr_ip4, M_PRISON); 1618 pr->pr_ip4s = ip4s; 1619 pr->pr_ip4 = ip4; 1620 ip4 = NULL; 1621 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1622 #ifdef VIMAGE 1623 if (tpr->pr_flags & PR_VNET) { 1624 descend = 0; 1625 continue; 1626 } 1627 #endif 1628 if (prison_restrict_ip4(tpr, NULL)) { 1629 redo_ip4 = 1; 1630 descend = 0; 1631 } 1632 } 1633 } 1634 #endif 1635 #ifdef INET6 1636 redo_ip6 = 0; 1637 if (pr_flags & PR_IP6_USER) { 1638 pr->pr_flags |= PR_IP6; 1639 free(pr->pr_ip6, M_PRISON); 1640 pr->pr_ip6s = ip6s; 1641 pr->pr_ip6 = ip6; 1642 ip6 = NULL; 1643 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1644 #ifdef VIMAGE 1645 if (tpr->pr_flags & PR_VNET) { 1646 descend = 0; 1647 continue; 1648 } 1649 #endif 1650 if (prison_restrict_ip6(tpr, NULL)) { 1651 redo_ip6 = 1; 1652 descend = 0; 1653 } 1654 } 1655 } 1656 #endif 1657 if (gotslevel) { 1658 pr->pr_securelevel = slevel; 1659 /* Set all child jails to be at least this level. */ 1660 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1661 if (tpr->pr_securelevel < slevel) 1662 tpr->pr_securelevel = slevel; 1663 } 1664 if (gotchildmax) { 1665 pr->pr_childmax = childmax; 1666 /* Set all child jails to under this limit. */ 1667 FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level) 1668 if (tpr->pr_childmax > childmax - level) 1669 tpr->pr_childmax = childmax > level 1670 ? childmax - level : 0; 1671 } 1672 if (gotenforce) { 1673 pr->pr_enforce_statfs = enforce; 1674 /* Pass this restriction on to the children. */ 1675 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1676 if (tpr->pr_enforce_statfs < enforce) 1677 tpr->pr_enforce_statfs = enforce; 1678 } 1679 if (gotrsnum) { 1680 pr->pr_devfs_rsnum = rsnum; 1681 /* Pass this restriction on to the children. */ 1682 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1683 tpr->pr_devfs_rsnum = rsnum; 1684 } 1685 if (name != NULL) { 1686 if (ppr == &prison0) 1687 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1688 else 1689 snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1690 ppr->pr_name, name); 1691 /* Change this component of child names. */ 1692 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1693 bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1694 strlen(tpr->pr_name + onamelen) + 1); 1695 bcopy(pr->pr_name, tpr->pr_name, namelen); 1696 } 1697 } 1698 if (path != NULL) { 1699 /* Try to keep a real-rooted full pathname. */ 1700 if (fullpath_disabled && path[0] == '/' && 1701 strcmp(mypr->pr_path, "/")) 1702 snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1703 mypr->pr_path, path); 1704 else 1705 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1706 pr->pr_root = root; 1707 } 1708 if (PR_HOST & ch_flags & ~pr_flags) { 1709 if (pr->pr_flags & PR_HOST) { 1710 /* 1711 * Copy the parent's host info. As with pr_ip4 above, 1712 * the lack of a lock on the parent is not a problem; 1713 * it is always set with allprison_lock at least 1714 * shared, and is held exclusively here. 1715 */ 1716 strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname, 1717 sizeof(pr->pr_hostname)); 1718 strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname, 1719 sizeof(pr->pr_domainname)); 1720 strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid, 1721 sizeof(pr->pr_hostuuid)); 1722 pr->pr_hostid = pr->pr_parent->pr_hostid; 1723 } 1724 } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1725 /* Set this prison, and any descendants without PR_HOST. */ 1726 if (host != NULL) 1727 strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname)); 1728 if (domain != NULL) 1729 strlcpy(pr->pr_domainname, domain, 1730 sizeof(pr->pr_domainname)); 1731 if (uuid != NULL) 1732 strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid)); 1733 if (gothid) 1734 pr->pr_hostid = hid; 1735 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1736 if (tpr->pr_flags & PR_HOST) 1737 descend = 0; 1738 else { 1739 if (host != NULL) 1740 strlcpy(tpr->pr_hostname, 1741 pr->pr_hostname, 1742 sizeof(tpr->pr_hostname)); 1743 if (domain != NULL) 1744 strlcpy(tpr->pr_domainname, 1745 pr->pr_domainname, 1746 sizeof(tpr->pr_domainname)); 1747 if (uuid != NULL) 1748 strlcpy(tpr->pr_hostuuid, 1749 pr->pr_hostuuid, 1750 sizeof(tpr->pr_hostuuid)); 1751 if (gothid) 1752 tpr->pr_hostid = hid; 1753 } 1754 } 1755 } 1756 if ((tallow = ch_allow & ~pr_allow)) { 1757 /* Clear allow bits in all children. */ 1758 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1759 tpr->pr_allow &= ~tallow; 1760 } 1761 pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1762 /* 1763 * Persistent prisons get an extra reference, and prisons losing their 1764 * persist flag lose that reference. Only do this for existing prisons 1765 * for now, so new ones will remain unseen until after the module 1766 * handlers have completed. 1767 */ 1768 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1769 if (pr_flags & PR_PERSIST) { 1770 pr->pr_ref++; 1771 pr->pr_uref++; 1772 } else { 1773 pr->pr_ref--; 1774 pr->pr_uref--; 1775 } 1776 } 1777 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1778 mtx_unlock(&pr->pr_mtx); 1779 1780 #ifdef RACCT 1781 if (racct_enable && created) 1782 prison_racct_attach(pr); 1783 #endif 1784 1785 /* Locks may have prevented a complete restriction of child IP 1786 * addresses. If so, allocate some more memory and try again. 1787 */ 1788 #ifdef INET 1789 while (redo_ip4) { 1790 ip4s = pr->pr_ip4s; 1791 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1792 mtx_lock(&pr->pr_mtx); 1793 redo_ip4 = 0; 1794 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1795 #ifdef VIMAGE 1796 if (tpr->pr_flags & PR_VNET) { 1797 descend = 0; 1798 continue; 1799 } 1800 #endif 1801 if (prison_restrict_ip4(tpr, ip4)) { 1802 if (ip4 != NULL) 1803 ip4 = NULL; 1804 else 1805 redo_ip4 = 1; 1806 } 1807 } 1808 mtx_unlock(&pr->pr_mtx); 1809 } 1810 #endif 1811 #ifdef INET6 1812 while (redo_ip6) { 1813 ip6s = pr->pr_ip6s; 1814 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1815 mtx_lock(&pr->pr_mtx); 1816 redo_ip6 = 0; 1817 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1818 #ifdef VIMAGE 1819 if (tpr->pr_flags & PR_VNET) { 1820 descend = 0; 1821 continue; 1822 } 1823 #endif 1824 if (prison_restrict_ip6(tpr, ip6)) { 1825 if (ip6 != NULL) 1826 ip6 = NULL; 1827 else 1828 redo_ip6 = 1; 1829 } 1830 } 1831 mtx_unlock(&pr->pr_mtx); 1832 } 1833 #endif 1834 1835 /* Let the modules do their work. */ 1836 sx_downgrade(&allprison_lock); 1837 if (created) { 1838 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1839 if (error) { 1840 prison_deref(pr, PD_LIST_SLOCKED); 1841 goto done_errmsg; 1842 } 1843 } 1844 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1845 if (error) { 1846 prison_deref(pr, created 1847 ? PD_LIST_SLOCKED 1848 : PD_DEREF | PD_LIST_SLOCKED); 1849 goto done_errmsg; 1850 } 1851 1852 /* Attach this process to the prison if requested. */ 1853 if (flags & JAIL_ATTACH) { 1854 mtx_lock(&pr->pr_mtx); 1855 error = do_jail_attach(td, pr); 1856 if (error) { 1857 vfs_opterror(opts, "attach failed"); 1858 if (!created) 1859 prison_deref(pr, PD_DEREF); 1860 goto done_errmsg; 1861 } 1862 } 1863 1864 #ifdef RACCT 1865 if (racct_enable && !created) { 1866 if (!(flags & JAIL_ATTACH)) 1867 sx_sunlock(&allprison_lock); 1868 prison_racct_modify(pr); 1869 if (!(flags & JAIL_ATTACH)) 1870 sx_slock(&allprison_lock); 1871 } 1872 #endif 1873 1874 td->td_retval[0] = pr->pr_id; 1875 1876 /* 1877 * Now that it is all there, drop the temporary reference from existing 1878 * prisons. Or add a reference to newly created persistent prisons 1879 * (which was not done earlier so that the prison would not be publicly 1880 * visible). 1881 */ 1882 if (!created) { 1883 prison_deref(pr, (flags & JAIL_ATTACH) 1884 ? PD_DEREF 1885 : PD_DEREF | PD_LIST_SLOCKED); 1886 } else { 1887 if (pr_flags & PR_PERSIST) { 1888 mtx_lock(&pr->pr_mtx); 1889 pr->pr_ref++; 1890 pr->pr_uref++; 1891 mtx_unlock(&pr->pr_mtx); 1892 } 1893 if (!(flags & JAIL_ATTACH)) 1894 sx_sunlock(&allprison_lock); 1895 } 1896 1897 goto done_errmsg; 1898 1899 done_deref_locked: 1900 prison_deref(pr, created 1901 ? PD_LOCKED | PD_LIST_XLOCKED 1902 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1903 goto done_releroot; 1904 done_unlock_list: 1905 sx_xunlock(&allprison_lock); 1906 done_releroot: 1907 if (root != NULL) 1908 vrele(root); 1909 done_errmsg: 1910 if (error) { 1911 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1912 if (errmsg_len > 0) { 1913 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1914 if (errmsg_pos > 0) { 1915 if (optuio->uio_segflg == UIO_SYSSPACE) 1916 bcopy(errmsg, 1917 optuio->uio_iov[errmsg_pos].iov_base, 1918 errmsg_len); 1919 else 1920 copyout(errmsg, 1921 optuio->uio_iov[errmsg_pos].iov_base, 1922 errmsg_len); 1923 } 1924 } 1925 } 1926 done_free: 1927 #ifdef INET 1928 free(ip4, M_PRISON); 1929 #endif 1930 #ifdef INET6 1931 free(ip6, M_PRISON); 1932 #endif 1933 if (g_path != NULL) 1934 free(g_path, M_TEMP); 1935 vfs_freeopts(opts); 1936 return (error); 1937 } 1938 1939 1940 /* 1941 * struct jail_get_args { 1942 * struct iovec *iovp; 1943 * unsigned int iovcnt; 1944 * int flags; 1945 * }; 1946 */ 1947 int 1948 sys_jail_get(struct thread *td, struct jail_get_args *uap) 1949 { 1950 struct uio *auio; 1951 int error; 1952 1953 /* Check that we have an even number of iovecs. */ 1954 if (uap->iovcnt & 1) 1955 return (EINVAL); 1956 1957 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1958 if (error) 1959 return (error); 1960 error = kern_jail_get(td, auio, uap->flags); 1961 if (error == 0) 1962 error = copyout(auio->uio_iov, uap->iovp, 1963 uap->iovcnt * sizeof (struct iovec)); 1964 free(auio, M_IOV); 1965 return (error); 1966 } 1967 1968 int 1969 kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1970 { 1971 struct prison *pr, *mypr; 1972 struct vfsopt *opt; 1973 struct vfsoptlist *opts; 1974 char *errmsg, *name; 1975 int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1976 1977 if (flags & ~JAIL_GET_MASK) 1978 return (EINVAL); 1979 1980 /* Get the parameter list. */ 1981 error = vfs_buildopts(optuio, &opts); 1982 if (error) 1983 return (error); 1984 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1985 mypr = td->td_ucred->cr_prison; 1986 1987 /* 1988 * Find the prison specified by one of: lastjid, jid, name. 1989 */ 1990 sx_slock(&allprison_lock); 1991 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1992 if (error == 0) { 1993 TAILQ_FOREACH(pr, &allprison, pr_list) { 1994 if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 1995 mtx_lock(&pr->pr_mtx); 1996 if (pr->pr_ref > 0 && 1997 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1998 break; 1999 mtx_unlock(&pr->pr_mtx); 2000 } 2001 } 2002 if (pr != NULL) 2003 goto found_prison; 2004 error = ENOENT; 2005 vfs_opterror(opts, "no jail after %d", jid); 2006 goto done_unlock_list; 2007 } else if (error != ENOENT) 2008 goto done_unlock_list; 2009 2010 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 2011 if (error == 0) { 2012 if (jid != 0) { 2013 pr = prison_find_child(mypr, jid); 2014 if (pr != NULL) { 2015 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 2016 mtx_unlock(&pr->pr_mtx); 2017 error = ENOENT; 2018 vfs_opterror(opts, "jail %d is dying", 2019 jid); 2020 goto done_unlock_list; 2021 } 2022 goto found_prison; 2023 } 2024 error = ENOENT; 2025 vfs_opterror(opts, "jail %d not found", jid); 2026 goto done_unlock_list; 2027 } 2028 } else if (error != ENOENT) 2029 goto done_unlock_list; 2030 2031 error = vfs_getopt(opts, "name", (void **)&name, &len); 2032 if (error == 0) { 2033 if (len == 0 || name[len - 1] != '\0') { 2034 error = EINVAL; 2035 goto done_unlock_list; 2036 } 2037 pr = prison_find_name(mypr, name); 2038 if (pr != NULL) { 2039 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 2040 mtx_unlock(&pr->pr_mtx); 2041 error = ENOENT; 2042 vfs_opterror(opts, "jail \"%s\" is dying", 2043 name); 2044 goto done_unlock_list; 2045 } 2046 goto found_prison; 2047 } 2048 error = ENOENT; 2049 vfs_opterror(opts, "jail \"%s\" not found", name); 2050 goto done_unlock_list; 2051 } else if (error != ENOENT) 2052 goto done_unlock_list; 2053 2054 vfs_opterror(opts, "no jail specified"); 2055 error = ENOENT; 2056 goto done_unlock_list; 2057 2058 found_prison: 2059 /* Get the parameters of the prison. */ 2060 pr->pr_ref++; 2061 locked = PD_LOCKED; 2062 td->td_retval[0] = pr->pr_id; 2063 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 2064 if (error != 0 && error != ENOENT) 2065 goto done_deref; 2066 i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 2067 error = vfs_setopt(opts, "parent", &i, sizeof(i)); 2068 if (error != 0 && error != ENOENT) 2069 goto done_deref; 2070 error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 2071 if (error != 0 && error != ENOENT) 2072 goto done_deref; 2073 error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 2074 sizeof(pr->pr_cpuset->cs_id)); 2075 if (error != 0 && error != ENOENT) 2076 goto done_deref; 2077 error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 2078 if (error != 0 && error != ENOENT) 2079 goto done_deref; 2080 #ifdef INET 2081 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 2082 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2083 if (error != 0 && error != ENOENT) 2084 goto done_deref; 2085 #endif 2086 #ifdef INET6 2087 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 2088 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2089 if (error != 0 && error != ENOENT) 2090 goto done_deref; 2091 #endif 2092 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 2093 sizeof(pr->pr_securelevel)); 2094 if (error != 0 && error != ENOENT) 2095 goto done_deref; 2096 error = vfs_setopt(opts, "children.cur", &pr->pr_childcount, 2097 sizeof(pr->pr_childcount)); 2098 if (error != 0 && error != ENOENT) 2099 goto done_deref; 2100 error = vfs_setopt(opts, "children.max", &pr->pr_childmax, 2101 sizeof(pr->pr_childmax)); 2102 if (error != 0 && error != ENOENT) 2103 goto done_deref; 2104 error = vfs_setopts(opts, "host.hostname", pr->pr_hostname); 2105 if (error != 0 && error != ENOENT) 2106 goto done_deref; 2107 error = vfs_setopts(opts, "host.domainname", pr->pr_domainname); 2108 if (error != 0 && error != ENOENT) 2109 goto done_deref; 2110 error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid); 2111 if (error != 0 && error != ENOENT) 2112 goto done_deref; 2113 #ifdef COMPAT_FREEBSD32 2114 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { 2115 uint32_t hid32 = pr->pr_hostid; 2116 2117 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 2118 } else 2119 #endif 2120 error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 2121 sizeof(pr->pr_hostid)); 2122 if (error != 0 && error != ENOENT) 2123 goto done_deref; 2124 error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 2125 sizeof(pr->pr_enforce_statfs)); 2126 if (error != 0 && error != ENOENT) 2127 goto done_deref; 2128 error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum, 2129 sizeof(pr->pr_devfs_rsnum)); 2130 if (error != 0 && error != ENOENT) 2131 goto done_deref; 2132 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 2133 fi++) { 2134 if (pr_flag_names[fi] == NULL) 2135 continue; 2136 i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 2137 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 2138 if (error != 0 && error != ENOENT) 2139 goto done_deref; 2140 i = !i; 2141 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 2142 if (error != 0 && error != ENOENT) 2143 goto done_deref; 2144 } 2145 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 2146 fi++) { 2147 i = pr->pr_flags & 2148 (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 2149 i = pr_flag_jailsys[fi].disable && 2150 (i == pr_flag_jailsys[fi].disable) ? JAIL_SYS_DISABLE 2151 : (i == pr_flag_jailsys[fi].new) ? JAIL_SYS_NEW 2152 : JAIL_SYS_INHERIT; 2153 error = 2154 vfs_setopt(opts, pr_flag_jailsys[fi].name, &i, sizeof(i)); 2155 if (error != 0 && error != ENOENT) 2156 goto done_deref; 2157 } 2158 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 2159 fi++) { 2160 if (pr_allow_names[fi] == NULL) 2161 continue; 2162 i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 2163 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 2164 if (error != 0 && error != ENOENT) 2165 goto done_deref; 2166 i = !i; 2167 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 2168 if (error != 0 && error != ENOENT) 2169 goto done_deref; 2170 } 2171 i = (pr->pr_uref == 0); 2172 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 2173 if (error != 0 && error != ENOENT) 2174 goto done_deref; 2175 i = !i; 2176 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 2177 if (error != 0 && error != ENOENT) 2178 goto done_deref; 2179 error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate, 2180 sizeof(pr->pr_osreldate)); 2181 if (error != 0 && error != ENOENT) 2182 goto done_deref; 2183 error = vfs_setopts(opts, "osrelease", pr->pr_osrelease); 2184 if (error != 0 && error != ENOENT) 2185 goto done_deref; 2186 2187 /* Get the module parameters. */ 2188 mtx_unlock(&pr->pr_mtx); 2189 locked = 0; 2190 error = osd_jail_call(pr, PR_METHOD_GET, opts); 2191 if (error) 2192 goto done_deref; 2193 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 2194 2195 /* By now, all parameters should have been noted. */ 2196 TAILQ_FOREACH(opt, opts, link) { 2197 if (!opt->seen && strcmp(opt->name, "errmsg")) { 2198 error = EINVAL; 2199 vfs_opterror(opts, "unknown parameter: %s", opt->name); 2200 goto done_errmsg; 2201 } 2202 } 2203 2204 /* Write the fetched parameters back to userspace. */ 2205 error = 0; 2206 TAILQ_FOREACH(opt, opts, link) { 2207 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 2208 pos = 2 * opt->pos + 1; 2209 optuio->uio_iov[pos].iov_len = opt->len; 2210 if (opt->value != NULL) { 2211 if (optuio->uio_segflg == UIO_SYSSPACE) { 2212 bcopy(opt->value, 2213 optuio->uio_iov[pos].iov_base, 2214 opt->len); 2215 } else { 2216 error = copyout(opt->value, 2217 optuio->uio_iov[pos].iov_base, 2218 opt->len); 2219 if (error) 2220 break; 2221 } 2222 } 2223 } 2224 } 2225 goto done_errmsg; 2226 2227 done_deref: 2228 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 2229 goto done_errmsg; 2230 2231 done_unlock_list: 2232 sx_sunlock(&allprison_lock); 2233 done_errmsg: 2234 if (error && errmsg_pos >= 0) { 2235 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 2236 errmsg_pos = 2 * errmsg_pos + 1; 2237 if (errmsg_len > 0) { 2238 if (optuio->uio_segflg == UIO_SYSSPACE) 2239 bcopy(errmsg, 2240 optuio->uio_iov[errmsg_pos].iov_base, 2241 errmsg_len); 2242 else 2243 copyout(errmsg, 2244 optuio->uio_iov[errmsg_pos].iov_base, 2245 errmsg_len); 2246 } 2247 } 2248 vfs_freeopts(opts); 2249 return (error); 2250 } 2251 2252 2253 /* 2254 * struct jail_remove_args { 2255 * int jid; 2256 * }; 2257 */ 2258 int 2259 sys_jail_remove(struct thread *td, struct jail_remove_args *uap) 2260 { 2261 struct prison *pr, *cpr, *lpr, *tpr; 2262 int descend, error; 2263 2264 error = priv_check(td, PRIV_JAIL_REMOVE); 2265 if (error) 2266 return (error); 2267 2268 sx_xlock(&allprison_lock); 2269 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2270 if (pr == NULL) { 2271 sx_xunlock(&allprison_lock); 2272 return (EINVAL); 2273 } 2274 2275 /* Remove all descendants of this prison, then remove this prison. */ 2276 pr->pr_ref++; 2277 pr->pr_flags |= PR_REMOVE; 2278 if (!LIST_EMPTY(&pr->pr_children)) { 2279 mtx_unlock(&pr->pr_mtx); 2280 lpr = NULL; 2281 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2282 mtx_lock(&cpr->pr_mtx); 2283 if (cpr->pr_ref > 0) { 2284 tpr = cpr; 2285 cpr->pr_ref++; 2286 cpr->pr_flags |= PR_REMOVE; 2287 } else { 2288 /* Already removed - do not do it again. */ 2289 tpr = NULL; 2290 } 2291 mtx_unlock(&cpr->pr_mtx); 2292 if (lpr != NULL) { 2293 mtx_lock(&lpr->pr_mtx); 2294 prison_remove_one(lpr); 2295 sx_xlock(&allprison_lock); 2296 } 2297 lpr = tpr; 2298 } 2299 if (lpr != NULL) { 2300 mtx_lock(&lpr->pr_mtx); 2301 prison_remove_one(lpr); 2302 sx_xlock(&allprison_lock); 2303 } 2304 mtx_lock(&pr->pr_mtx); 2305 } 2306 prison_remove_one(pr); 2307 return (0); 2308 } 2309 2310 static void 2311 prison_remove_one(struct prison *pr) 2312 { 2313 struct proc *p; 2314 int deuref; 2315 2316 /* If the prison was persistent, it is not anymore. */ 2317 deuref = 0; 2318 if (pr->pr_flags & PR_PERSIST) { 2319 pr->pr_ref--; 2320 deuref = PD_DEUREF; 2321 pr->pr_flags &= ~PR_PERSIST; 2322 } 2323 2324 /* 2325 * jail_remove added a reference. If that's the only one, remove 2326 * the prison now. 2327 */ 2328 KASSERT(pr->pr_ref > 0, 2329 ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2330 if (pr->pr_ref == 1) { 2331 prison_deref(pr, 2332 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2333 return; 2334 } 2335 2336 mtx_unlock(&pr->pr_mtx); 2337 sx_xunlock(&allprison_lock); 2338 /* 2339 * Kill all processes unfortunate enough to be attached to this prison. 2340 */ 2341 sx_slock(&allproc_lock); 2342 LIST_FOREACH(p, &allproc, p_list) { 2343 PROC_LOCK(p); 2344 if (p->p_state != PRS_NEW && p->p_ucred && 2345 p->p_ucred->cr_prison == pr) 2346 kern_psignal(p, SIGKILL); 2347 PROC_UNLOCK(p); 2348 } 2349 sx_sunlock(&allproc_lock); 2350 /* Remove the temporary reference added by jail_remove. */ 2351 prison_deref(pr, deuref | PD_DEREF); 2352 } 2353 2354 2355 /* 2356 * struct jail_attach_args { 2357 * int jid; 2358 * }; 2359 */ 2360 int 2361 sys_jail_attach(struct thread *td, struct jail_attach_args *uap) 2362 { 2363 struct prison *pr; 2364 int error; 2365 2366 error = priv_check(td, PRIV_JAIL_ATTACH); 2367 if (error) 2368 return (error); 2369 2370 sx_slock(&allprison_lock); 2371 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2372 if (pr == NULL) { 2373 sx_sunlock(&allprison_lock); 2374 return (EINVAL); 2375 } 2376 2377 /* 2378 * Do not allow a process to attach to a prison that is not 2379 * considered to be "alive". 2380 */ 2381 if (pr->pr_uref == 0) { 2382 mtx_unlock(&pr->pr_mtx); 2383 sx_sunlock(&allprison_lock); 2384 return (EINVAL); 2385 } 2386 2387 return (do_jail_attach(td, pr)); 2388 } 2389 2390 static int 2391 do_jail_attach(struct thread *td, struct prison *pr) 2392 { 2393 struct prison *ppr; 2394 struct proc *p; 2395 struct ucred *newcred, *oldcred; 2396 int error; 2397 2398 /* 2399 * XXX: Note that there is a slight race here if two threads 2400 * in the same privileged process attempt to attach to two 2401 * different jails at the same time. It is important for 2402 * user processes not to do this, or they might end up with 2403 * a process root from one prison, but attached to the jail 2404 * of another. 2405 */ 2406 pr->pr_ref++; 2407 pr->pr_uref++; 2408 mtx_unlock(&pr->pr_mtx); 2409 2410 /* Let modules do whatever they need to prepare for attaching. */ 2411 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2412 if (error) { 2413 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2414 return (error); 2415 } 2416 sx_sunlock(&allprison_lock); 2417 2418 /* 2419 * Reparent the newly attached process to this jail. 2420 */ 2421 ppr = td->td_ucred->cr_prison; 2422 p = td->td_proc; 2423 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2424 if (error) 2425 goto e_revert_osd; 2426 2427 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2428 if ((error = change_dir(pr->pr_root, td)) != 0) 2429 goto e_unlock; 2430 #ifdef MAC 2431 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2432 goto e_unlock; 2433 #endif 2434 VOP_UNLOCK(pr->pr_root, 0); 2435 if ((error = change_root(pr->pr_root, td))) 2436 goto e_revert_osd; 2437 2438 newcred = crget(); 2439 PROC_LOCK(p); 2440 oldcred = p->p_ucred; 2441 setsugid(p); 2442 crcopy(newcred, oldcred); 2443 newcred->cr_prison = pr; 2444 proc_set_cred(p, newcred); 2445 PROC_UNLOCK(p); 2446 #ifdef RACCT 2447 racct_proc_ucred_changed(p, oldcred, newcred); 2448 #endif 2449 crfree(oldcred); 2450 prison_deref(ppr, PD_DEREF | PD_DEUREF); 2451 return (0); 2452 e_unlock: 2453 VOP_UNLOCK(pr->pr_root, 0); 2454 e_revert_osd: 2455 /* Tell modules this thread is still in its old jail after all. */ 2456 (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2457 prison_deref(pr, PD_DEREF | PD_DEUREF); 2458 return (error); 2459 } 2460 2461 2462 /* 2463 * Returns a locked prison instance, or NULL on failure. 2464 */ 2465 struct prison * 2466 prison_find(int prid) 2467 { 2468 struct prison *pr; 2469 2470 sx_assert(&allprison_lock, SX_LOCKED); 2471 TAILQ_FOREACH(pr, &allprison, pr_list) { 2472 if (pr->pr_id == prid) { 2473 mtx_lock(&pr->pr_mtx); 2474 if (pr->pr_ref > 0) 2475 return (pr); 2476 mtx_unlock(&pr->pr_mtx); 2477 } 2478 } 2479 return (NULL); 2480 } 2481 2482 /* 2483 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2484 */ 2485 struct prison * 2486 prison_find_child(struct prison *mypr, int prid) 2487 { 2488 struct prison *pr; 2489 int descend; 2490 2491 sx_assert(&allprison_lock, SX_LOCKED); 2492 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2493 if (pr->pr_id == prid) { 2494 mtx_lock(&pr->pr_mtx); 2495 if (pr->pr_ref > 0) 2496 return (pr); 2497 mtx_unlock(&pr->pr_mtx); 2498 } 2499 } 2500 return (NULL); 2501 } 2502 2503 /* 2504 * Look for the name relative to mypr. Returns a locked prison or NULL. 2505 */ 2506 struct prison * 2507 prison_find_name(struct prison *mypr, const char *name) 2508 { 2509 struct prison *pr, *deadpr; 2510 size_t mylen; 2511 int descend; 2512 2513 sx_assert(&allprison_lock, SX_LOCKED); 2514 mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2515 again: 2516 deadpr = NULL; 2517 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2518 if (!strcmp(pr->pr_name + mylen, name)) { 2519 mtx_lock(&pr->pr_mtx); 2520 if (pr->pr_ref > 0) { 2521 if (pr->pr_uref > 0) 2522 return (pr); 2523 deadpr = pr; 2524 } 2525 mtx_unlock(&pr->pr_mtx); 2526 } 2527 } 2528 /* There was no valid prison - perhaps there was a dying one. */ 2529 if (deadpr != NULL) { 2530 mtx_lock(&deadpr->pr_mtx); 2531 if (deadpr->pr_ref == 0) { 2532 mtx_unlock(&deadpr->pr_mtx); 2533 goto again; 2534 } 2535 } 2536 return (deadpr); 2537 } 2538 2539 /* 2540 * See if a prison has the specific flag set. 2541 */ 2542 int 2543 prison_flag(struct ucred *cred, unsigned flag) 2544 { 2545 2546 /* This is an atomic read, so no locking is necessary. */ 2547 return (cred->cr_prison->pr_flags & flag); 2548 } 2549 2550 int 2551 prison_allow(struct ucred *cred, unsigned flag) 2552 { 2553 2554 /* This is an atomic read, so no locking is necessary. */ 2555 return (cred->cr_prison->pr_allow & flag); 2556 } 2557 2558 /* 2559 * Remove a prison reference. If that was the last reference, remove the 2560 * prison itself - but not in this context in case there are locks held. 2561 */ 2562 void 2563 prison_free_locked(struct prison *pr) 2564 { 2565 2566 mtx_assert(&pr->pr_mtx, MA_OWNED); 2567 pr->pr_ref--; 2568 if (pr->pr_ref == 0) { 2569 mtx_unlock(&pr->pr_mtx); 2570 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2571 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 2572 return; 2573 } 2574 mtx_unlock(&pr->pr_mtx); 2575 } 2576 2577 void 2578 prison_free(struct prison *pr) 2579 { 2580 2581 mtx_lock(&pr->pr_mtx); 2582 prison_free_locked(pr); 2583 } 2584 2585 static void 2586 prison_complete(void *context, int pending) 2587 { 2588 2589 prison_deref((struct prison *)context, 0); 2590 } 2591 2592 /* 2593 * Remove a prison reference (usually). This internal version assumes no 2594 * mutexes are held, except perhaps the prison itself. If there are no more 2595 * references, release and delist the prison. On completion, the prison lock 2596 * and the allprison lock are both unlocked. 2597 */ 2598 static void 2599 prison_deref(struct prison *pr, int flags) 2600 { 2601 struct prison *ppr, *tpr; 2602 2603 if (!(flags & PD_LOCKED)) 2604 mtx_lock(&pr->pr_mtx); 2605 for (;;) { 2606 if (flags & PD_DEUREF) { 2607 pr->pr_uref--; 2608 KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0")); 2609 } 2610 if (flags & PD_DEREF) 2611 pr->pr_ref--; 2612 /* If the prison still has references, nothing else to do. */ 2613 if (pr->pr_ref > 0) { 2614 mtx_unlock(&pr->pr_mtx); 2615 if (flags & PD_LIST_SLOCKED) 2616 sx_sunlock(&allprison_lock); 2617 else if (flags & PD_LIST_XLOCKED) 2618 sx_xunlock(&allprison_lock); 2619 return; 2620 } 2621 2622 mtx_unlock(&pr->pr_mtx); 2623 if (flags & PD_LIST_SLOCKED) { 2624 if (!sx_try_upgrade(&allprison_lock)) { 2625 sx_sunlock(&allprison_lock); 2626 sx_xlock(&allprison_lock); 2627 } 2628 } else if (!(flags & PD_LIST_XLOCKED)) 2629 sx_xlock(&allprison_lock); 2630 2631 TAILQ_REMOVE(&allprison, pr, pr_list); 2632 LIST_REMOVE(pr, pr_sibling); 2633 ppr = pr->pr_parent; 2634 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2635 tpr->pr_childcount--; 2636 sx_xunlock(&allprison_lock); 2637 2638 #ifdef VIMAGE 2639 if (pr->pr_vnet != ppr->pr_vnet) 2640 vnet_destroy(pr->pr_vnet); 2641 #endif 2642 if (pr->pr_root != NULL) 2643 vrele(pr->pr_root); 2644 mtx_destroy(&pr->pr_mtx); 2645 #ifdef INET 2646 free(pr->pr_ip4, M_PRISON); 2647 #endif 2648 #ifdef INET6 2649 free(pr->pr_ip6, M_PRISON); 2650 #endif 2651 if (pr->pr_cpuset != NULL) 2652 cpuset_rel(pr->pr_cpuset); 2653 osd_jail_exit(pr); 2654 #ifdef RACCT 2655 if (racct_enable) 2656 prison_racct_detach(pr); 2657 #endif 2658 free(pr, M_PRISON); 2659 2660 /* Removing a prison frees a reference on its parent. */ 2661 pr = ppr; 2662 mtx_lock(&pr->pr_mtx); 2663 flags = PD_DEREF | PD_DEUREF; 2664 } 2665 } 2666 2667 void 2668 prison_hold_locked(struct prison *pr) 2669 { 2670 2671 mtx_assert(&pr->pr_mtx, MA_OWNED); 2672 KASSERT(pr->pr_ref > 0, 2673 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 2674 pr->pr_ref++; 2675 } 2676 2677 void 2678 prison_hold(struct prison *pr) 2679 { 2680 2681 mtx_lock(&pr->pr_mtx); 2682 prison_hold_locked(pr); 2683 mtx_unlock(&pr->pr_mtx); 2684 } 2685 2686 void 2687 prison_proc_hold(struct prison *pr) 2688 { 2689 2690 mtx_lock(&pr->pr_mtx); 2691 KASSERT(pr->pr_uref > 0, 2692 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2693 pr->pr_uref++; 2694 mtx_unlock(&pr->pr_mtx); 2695 } 2696 2697 void 2698 prison_proc_free(struct prison *pr) 2699 { 2700 2701 mtx_lock(&pr->pr_mtx); 2702 KASSERT(pr->pr_uref > 0, 2703 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2704 prison_deref(pr, PD_DEUREF | PD_LOCKED); 2705 } 2706 2707 2708 #ifdef INET 2709 /* 2710 * Restrict a prison's IP address list with its parent's, possibly replacing 2711 * it. Return true if the replacement buffer was used (or would have been). 2712 */ 2713 static int 2714 prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2715 { 2716 int ii, ij, used; 2717 struct prison *ppr; 2718 2719 ppr = pr->pr_parent; 2720 if (!(pr->pr_flags & PR_IP4_USER)) { 2721 /* This has no user settings, so just copy the parent's list. */ 2722 if (pr->pr_ip4s < ppr->pr_ip4s) { 2723 /* 2724 * There's no room for the parent's list. Use the 2725 * new list buffer, which is assumed to be big enough 2726 * (if it was passed). If there's no buffer, try to 2727 * allocate one. 2728 */ 2729 used = 1; 2730 if (newip4 == NULL) { 2731 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2732 M_PRISON, M_NOWAIT); 2733 if (newip4 != NULL) 2734 used = 0; 2735 } 2736 if (newip4 != NULL) { 2737 bcopy(ppr->pr_ip4, newip4, 2738 ppr->pr_ip4s * sizeof(*newip4)); 2739 free(pr->pr_ip4, M_PRISON); 2740 pr->pr_ip4 = newip4; 2741 pr->pr_ip4s = ppr->pr_ip4s; 2742 } 2743 return (used); 2744 } 2745 pr->pr_ip4s = ppr->pr_ip4s; 2746 if (pr->pr_ip4s > 0) 2747 bcopy(ppr->pr_ip4, pr->pr_ip4, 2748 pr->pr_ip4s * sizeof(*newip4)); 2749 else if (pr->pr_ip4 != NULL) { 2750 free(pr->pr_ip4, M_PRISON); 2751 pr->pr_ip4 = NULL; 2752 } 2753 } else if (pr->pr_ip4s > 0) { 2754 /* Remove addresses that aren't in the parent. */ 2755 for (ij = 0; ij < ppr->pr_ip4s; ij++) 2756 if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2757 break; 2758 if (ij < ppr->pr_ip4s) 2759 ii = 1; 2760 else { 2761 bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2762 --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2763 ii = 0; 2764 } 2765 for (ij = 1; ii < pr->pr_ip4s; ) { 2766 if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2767 ii++; 2768 continue; 2769 } 2770 switch (ij >= ppr->pr_ip4s ? -1 : 2771 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2772 case -1: 2773 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2774 (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2775 break; 2776 case 0: 2777 ii++; 2778 ij++; 2779 break; 2780 case 1: 2781 ij++; 2782 break; 2783 } 2784 } 2785 if (pr->pr_ip4s == 0) { 2786 free(pr->pr_ip4, M_PRISON); 2787 pr->pr_ip4 = NULL; 2788 } 2789 } 2790 return (0); 2791 } 2792 2793 /* 2794 * Pass back primary IPv4 address of this jail. 2795 * 2796 * If not restricted return success but do not alter the address. Caller has 2797 * to make sure to initialize it correctly (e.g. INADDR_ANY). 2798 * 2799 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2800 * Address returned in NBO. 2801 */ 2802 int 2803 prison_get_ip4(struct ucred *cred, struct in_addr *ia) 2804 { 2805 struct prison *pr; 2806 2807 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2808 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2809 2810 pr = cred->cr_prison; 2811 if (!(pr->pr_flags & PR_IP4)) 2812 return (0); 2813 mtx_lock(&pr->pr_mtx); 2814 if (!(pr->pr_flags & PR_IP4)) { 2815 mtx_unlock(&pr->pr_mtx); 2816 return (0); 2817 } 2818 if (pr->pr_ip4 == NULL) { 2819 mtx_unlock(&pr->pr_mtx); 2820 return (EAFNOSUPPORT); 2821 } 2822 2823 ia->s_addr = pr->pr_ip4[0].s_addr; 2824 mtx_unlock(&pr->pr_mtx); 2825 return (0); 2826 } 2827 2828 /* 2829 * Return 1 if we should do proper source address selection or are not jailed. 2830 * We will return 0 if we should bypass source address selection in favour 2831 * of the primary jail IPv4 address. Only in this case *ia will be updated and 2832 * returned in NBO. 2833 * Return EAFNOSUPPORT, in case this jail does not allow IPv4. 2834 */ 2835 int 2836 prison_saddrsel_ip4(struct ucred *cred, struct in_addr *ia) 2837 { 2838 struct prison *pr; 2839 struct in_addr lia; 2840 int error; 2841 2842 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2843 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2844 2845 if (!jailed(cred)) 2846 return (1); 2847 2848 pr = cred->cr_prison; 2849 if (pr->pr_flags & PR_IP4_SADDRSEL) 2850 return (1); 2851 2852 lia.s_addr = INADDR_ANY; 2853 error = prison_get_ip4(cred, &lia); 2854 if (error) 2855 return (error); 2856 if (lia.s_addr == INADDR_ANY) 2857 return (1); 2858 2859 ia->s_addr = lia.s_addr; 2860 return (0); 2861 } 2862 2863 /* 2864 * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2865 */ 2866 int 2867 prison_equal_ip4(struct prison *pr1, struct prison *pr2) 2868 { 2869 2870 if (pr1 == pr2) 2871 return (1); 2872 2873 /* 2874 * No need to lock since the PR_IP4_USER flag can't be altered for 2875 * existing prisons. 2876 */ 2877 while (pr1 != &prison0 && 2878 #ifdef VIMAGE 2879 !(pr1->pr_flags & PR_VNET) && 2880 #endif 2881 !(pr1->pr_flags & PR_IP4_USER)) 2882 pr1 = pr1->pr_parent; 2883 while (pr2 != &prison0 && 2884 #ifdef VIMAGE 2885 !(pr2->pr_flags & PR_VNET) && 2886 #endif 2887 !(pr2->pr_flags & PR_IP4_USER)) 2888 pr2 = pr2->pr_parent; 2889 return (pr1 == pr2); 2890 } 2891 2892 /* 2893 * Make sure our (source) address is set to something meaningful to this 2894 * jail. 2895 * 2896 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2897 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2898 * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2899 */ 2900 int 2901 prison_local_ip4(struct ucred *cred, struct in_addr *ia) 2902 { 2903 struct prison *pr; 2904 struct in_addr ia0; 2905 int error; 2906 2907 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2908 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2909 2910 pr = cred->cr_prison; 2911 if (!(pr->pr_flags & PR_IP4)) 2912 return (0); 2913 mtx_lock(&pr->pr_mtx); 2914 if (!(pr->pr_flags & PR_IP4)) { 2915 mtx_unlock(&pr->pr_mtx); 2916 return (0); 2917 } 2918 if (pr->pr_ip4 == NULL) { 2919 mtx_unlock(&pr->pr_mtx); 2920 return (EAFNOSUPPORT); 2921 } 2922 2923 ia0.s_addr = ntohl(ia->s_addr); 2924 if (ia0.s_addr == INADDR_LOOPBACK) { 2925 ia->s_addr = pr->pr_ip4[0].s_addr; 2926 mtx_unlock(&pr->pr_mtx); 2927 return (0); 2928 } 2929 2930 if (ia0.s_addr == INADDR_ANY) { 2931 /* 2932 * In case there is only 1 IPv4 address, bind directly. 2933 */ 2934 if (pr->pr_ip4s == 1) 2935 ia->s_addr = pr->pr_ip4[0].s_addr; 2936 mtx_unlock(&pr->pr_mtx); 2937 return (0); 2938 } 2939 2940 error = _prison_check_ip4(pr, ia); 2941 mtx_unlock(&pr->pr_mtx); 2942 return (error); 2943 } 2944 2945 /* 2946 * Rewrite destination address in case we will connect to loopback address. 2947 * 2948 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2949 * Address passed in in NBO and returned in NBO. 2950 */ 2951 int 2952 prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2953 { 2954 struct prison *pr; 2955 2956 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2957 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2958 2959 pr = cred->cr_prison; 2960 if (!(pr->pr_flags & PR_IP4)) 2961 return (0); 2962 mtx_lock(&pr->pr_mtx); 2963 if (!(pr->pr_flags & PR_IP4)) { 2964 mtx_unlock(&pr->pr_mtx); 2965 return (0); 2966 } 2967 if (pr->pr_ip4 == NULL) { 2968 mtx_unlock(&pr->pr_mtx); 2969 return (EAFNOSUPPORT); 2970 } 2971 2972 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2973 ia->s_addr = pr->pr_ip4[0].s_addr; 2974 mtx_unlock(&pr->pr_mtx); 2975 return (0); 2976 } 2977 2978 /* 2979 * Return success because nothing had to be changed. 2980 */ 2981 mtx_unlock(&pr->pr_mtx); 2982 return (0); 2983 } 2984 2985 /* 2986 * Check if given address belongs to the jail referenced by cred/prison. 2987 * 2988 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2989 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2990 * doesn't allow IPv4. Address passed in in NBO. 2991 */ 2992 static int 2993 _prison_check_ip4(const struct prison *pr, const struct in_addr *ia) 2994 { 2995 int i, a, z, d; 2996 2997 /* 2998 * Check the primary IP. 2999 */ 3000 if (pr->pr_ip4[0].s_addr == ia->s_addr) 3001 return (0); 3002 3003 /* 3004 * All the other IPs are sorted so we can do a binary search. 3005 */ 3006 a = 0; 3007 z = pr->pr_ip4s - 2; 3008 while (a <= z) { 3009 i = (a + z) / 2; 3010 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 3011 if (d > 0) 3012 z = i - 1; 3013 else if (d < 0) 3014 a = i + 1; 3015 else 3016 return (0); 3017 } 3018 3019 return (EADDRNOTAVAIL); 3020 } 3021 3022 int 3023 prison_check_ip4(const struct ucred *cred, const struct in_addr *ia) 3024 { 3025 struct prison *pr; 3026 int error; 3027 3028 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3029 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 3030 3031 pr = cred->cr_prison; 3032 if (!(pr->pr_flags & PR_IP4)) 3033 return (0); 3034 mtx_lock(&pr->pr_mtx); 3035 if (!(pr->pr_flags & PR_IP4)) { 3036 mtx_unlock(&pr->pr_mtx); 3037 return (0); 3038 } 3039 if (pr->pr_ip4 == NULL) { 3040 mtx_unlock(&pr->pr_mtx); 3041 return (EAFNOSUPPORT); 3042 } 3043 3044 error = _prison_check_ip4(pr, ia); 3045 mtx_unlock(&pr->pr_mtx); 3046 return (error); 3047 } 3048 #endif 3049 3050 #ifdef INET6 3051 static int 3052 prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 3053 { 3054 int ii, ij, used; 3055 struct prison *ppr; 3056 3057 ppr = pr->pr_parent; 3058 if (!(pr->pr_flags & PR_IP6_USER)) { 3059 /* This has no user settings, so just copy the parent's list. */ 3060 if (pr->pr_ip6s < ppr->pr_ip6s) { 3061 /* 3062 * There's no room for the parent's list. Use the 3063 * new list buffer, which is assumed to be big enough 3064 * (if it was passed). If there's no buffer, try to 3065 * allocate one. 3066 */ 3067 used = 1; 3068 if (newip6 == NULL) { 3069 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 3070 M_PRISON, M_NOWAIT); 3071 if (newip6 != NULL) 3072 used = 0; 3073 } 3074 if (newip6 != NULL) { 3075 bcopy(ppr->pr_ip6, newip6, 3076 ppr->pr_ip6s * sizeof(*newip6)); 3077 free(pr->pr_ip6, M_PRISON); 3078 pr->pr_ip6 = newip6; 3079 pr->pr_ip6s = ppr->pr_ip6s; 3080 } 3081 return (used); 3082 } 3083 pr->pr_ip6s = ppr->pr_ip6s; 3084 if (pr->pr_ip6s > 0) 3085 bcopy(ppr->pr_ip6, pr->pr_ip6, 3086 pr->pr_ip6s * sizeof(*newip6)); 3087 else if (pr->pr_ip6 != NULL) { 3088 free(pr->pr_ip6, M_PRISON); 3089 pr->pr_ip6 = NULL; 3090 } 3091 } else if (pr->pr_ip6s > 0) { 3092 /* Remove addresses that aren't in the parent. */ 3093 for (ij = 0; ij < ppr->pr_ip6s; ij++) 3094 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 3095 &ppr->pr_ip6[ij])) 3096 break; 3097 if (ij < ppr->pr_ip6s) 3098 ii = 1; 3099 else { 3100 bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 3101 --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 3102 ii = 0; 3103 } 3104 for (ij = 1; ii < pr->pr_ip6s; ) { 3105 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 3106 &ppr->pr_ip6[0])) { 3107 ii++; 3108 continue; 3109 } 3110 switch (ij >= ppr->pr_ip6s ? -1 : 3111 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 3112 case -1: 3113 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 3114 (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 3115 break; 3116 case 0: 3117 ii++; 3118 ij++; 3119 break; 3120 case 1: 3121 ij++; 3122 break; 3123 } 3124 } 3125 if (pr->pr_ip6s == 0) { 3126 free(pr->pr_ip6, M_PRISON); 3127 pr->pr_ip6 = NULL; 3128 } 3129 } 3130 return 0; 3131 } 3132 3133 /* 3134 * Pass back primary IPv6 address for this jail. 3135 * 3136 * If not restricted return success but do not alter the address. Caller has 3137 * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 3138 * 3139 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3140 */ 3141 int 3142 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 3143 { 3144 struct prison *pr; 3145 3146 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3147 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3148 3149 pr = cred->cr_prison; 3150 if (!(pr->pr_flags & PR_IP6)) 3151 return (0); 3152 mtx_lock(&pr->pr_mtx); 3153 if (!(pr->pr_flags & PR_IP6)) { 3154 mtx_unlock(&pr->pr_mtx); 3155 return (0); 3156 } 3157 if (pr->pr_ip6 == NULL) { 3158 mtx_unlock(&pr->pr_mtx); 3159 return (EAFNOSUPPORT); 3160 } 3161 3162 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3163 mtx_unlock(&pr->pr_mtx); 3164 return (0); 3165 } 3166 3167 /* 3168 * Return 1 if we should do proper source address selection or are not jailed. 3169 * We will return 0 if we should bypass source address selection in favour 3170 * of the primary jail IPv6 address. Only in this case *ia will be updated and 3171 * returned in NBO. 3172 * Return EAFNOSUPPORT, in case this jail does not allow IPv6. 3173 */ 3174 int 3175 prison_saddrsel_ip6(struct ucred *cred, struct in6_addr *ia6) 3176 { 3177 struct prison *pr; 3178 struct in6_addr lia6; 3179 int error; 3180 3181 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3182 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3183 3184 if (!jailed(cred)) 3185 return (1); 3186 3187 pr = cred->cr_prison; 3188 if (pr->pr_flags & PR_IP6_SADDRSEL) 3189 return (1); 3190 3191 lia6 = in6addr_any; 3192 error = prison_get_ip6(cred, &lia6); 3193 if (error) 3194 return (error); 3195 if (IN6_IS_ADDR_UNSPECIFIED(&lia6)) 3196 return (1); 3197 3198 bcopy(&lia6, ia6, sizeof(struct in6_addr)); 3199 return (0); 3200 } 3201 3202 /* 3203 * Return true if pr1 and pr2 have the same IPv6 address restrictions. 3204 */ 3205 int 3206 prison_equal_ip6(struct prison *pr1, struct prison *pr2) 3207 { 3208 3209 if (pr1 == pr2) 3210 return (1); 3211 3212 while (pr1 != &prison0 && 3213 #ifdef VIMAGE 3214 !(pr1->pr_flags & PR_VNET) && 3215 #endif 3216 !(pr1->pr_flags & PR_IP6_USER)) 3217 pr1 = pr1->pr_parent; 3218 while (pr2 != &prison0 && 3219 #ifdef VIMAGE 3220 !(pr2->pr_flags & PR_VNET) && 3221 #endif 3222 !(pr2->pr_flags & PR_IP6_USER)) 3223 pr2 = pr2->pr_parent; 3224 return (pr1 == pr2); 3225 } 3226 3227 /* 3228 * Make sure our (source) address is set to something meaningful to this jail. 3229 * 3230 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 3231 * when needed while binding. 3232 * 3233 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3234 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3235 * doesn't allow IPv6. 3236 */ 3237 int 3238 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 3239 { 3240 struct prison *pr; 3241 int error; 3242 3243 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3244 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3245 3246 pr = cred->cr_prison; 3247 if (!(pr->pr_flags & PR_IP6)) 3248 return (0); 3249 mtx_lock(&pr->pr_mtx); 3250 if (!(pr->pr_flags & PR_IP6)) { 3251 mtx_unlock(&pr->pr_mtx); 3252 return (0); 3253 } 3254 if (pr->pr_ip6 == NULL) { 3255 mtx_unlock(&pr->pr_mtx); 3256 return (EAFNOSUPPORT); 3257 } 3258 3259 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3260 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3261 mtx_unlock(&pr->pr_mtx); 3262 return (0); 3263 } 3264 3265 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 3266 /* 3267 * In case there is only 1 IPv6 address, and v6only is true, 3268 * then bind directly. 3269 */ 3270 if (v6only != 0 && pr->pr_ip6s == 1) 3271 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3272 mtx_unlock(&pr->pr_mtx); 3273 return (0); 3274 } 3275 3276 error = _prison_check_ip6(pr, ia6); 3277 mtx_unlock(&pr->pr_mtx); 3278 return (error); 3279 } 3280 3281 /* 3282 * Rewrite destination address in case we will connect to loopback address. 3283 * 3284 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 3285 */ 3286 int 3287 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 3288 { 3289 struct prison *pr; 3290 3291 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3292 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3293 3294 pr = cred->cr_prison; 3295 if (!(pr->pr_flags & PR_IP6)) 3296 return (0); 3297 mtx_lock(&pr->pr_mtx); 3298 if (!(pr->pr_flags & PR_IP6)) { 3299 mtx_unlock(&pr->pr_mtx); 3300 return (0); 3301 } 3302 if (pr->pr_ip6 == NULL) { 3303 mtx_unlock(&pr->pr_mtx); 3304 return (EAFNOSUPPORT); 3305 } 3306 3307 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 3308 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 3309 mtx_unlock(&pr->pr_mtx); 3310 return (0); 3311 } 3312 3313 /* 3314 * Return success because nothing had to be changed. 3315 */ 3316 mtx_unlock(&pr->pr_mtx); 3317 return (0); 3318 } 3319 3320 /* 3321 * Check if given address belongs to the jail referenced by cred/prison. 3322 * 3323 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3324 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3325 * doesn't allow IPv6. 3326 */ 3327 static int 3328 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 3329 { 3330 int i, a, z, d; 3331 3332 /* 3333 * Check the primary IP. 3334 */ 3335 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3336 return (0); 3337 3338 /* 3339 * All the other IPs are sorted so we can do a binary search. 3340 */ 3341 a = 0; 3342 z = pr->pr_ip6s - 2; 3343 while (a <= z) { 3344 i = (a + z) / 2; 3345 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3346 if (d > 0) 3347 z = i - 1; 3348 else if (d < 0) 3349 a = i + 1; 3350 else 3351 return (0); 3352 } 3353 3354 return (EADDRNOTAVAIL); 3355 } 3356 3357 int 3358 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3359 { 3360 struct prison *pr; 3361 int error; 3362 3363 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3364 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3365 3366 pr = cred->cr_prison; 3367 if (!(pr->pr_flags & PR_IP6)) 3368 return (0); 3369 mtx_lock(&pr->pr_mtx); 3370 if (!(pr->pr_flags & PR_IP6)) { 3371 mtx_unlock(&pr->pr_mtx); 3372 return (0); 3373 } 3374 if (pr->pr_ip6 == NULL) { 3375 mtx_unlock(&pr->pr_mtx); 3376 return (EAFNOSUPPORT); 3377 } 3378 3379 error = _prison_check_ip6(pr, ia6); 3380 mtx_unlock(&pr->pr_mtx); 3381 return (error); 3382 } 3383 #endif 3384 3385 /* 3386 * Check if a jail supports the given address family. 3387 * 3388 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3389 * if not. 3390 */ 3391 int 3392 prison_check_af(struct ucred *cred, int af) 3393 { 3394 struct prison *pr; 3395 int error; 3396 3397 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3398 3399 pr = cred->cr_prison; 3400 #ifdef VIMAGE 3401 /* Prisons with their own network stack are not limited. */ 3402 if (prison_owns_vnet(cred)) 3403 return (0); 3404 #endif 3405 3406 error = 0; 3407 switch (af) 3408 { 3409 #ifdef INET 3410 case AF_INET: 3411 if (pr->pr_flags & PR_IP4) 3412 { 3413 mtx_lock(&pr->pr_mtx); 3414 if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3415 error = EAFNOSUPPORT; 3416 mtx_unlock(&pr->pr_mtx); 3417 } 3418 break; 3419 #endif 3420 #ifdef INET6 3421 case AF_INET6: 3422 if (pr->pr_flags & PR_IP6) 3423 { 3424 mtx_lock(&pr->pr_mtx); 3425 if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3426 error = EAFNOSUPPORT; 3427 mtx_unlock(&pr->pr_mtx); 3428 } 3429 break; 3430 #endif 3431 case AF_LOCAL: 3432 case AF_ROUTE: 3433 break; 3434 default: 3435 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3436 error = EAFNOSUPPORT; 3437 } 3438 return (error); 3439 } 3440 3441 /* 3442 * Check if given address belongs to the jail referenced by cred (wrapper to 3443 * prison_check_ip[46]). 3444 * 3445 * Returns 0 if jail doesn't restrict the address family or if address belongs 3446 * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3447 * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3448 */ 3449 int 3450 prison_if(struct ucred *cred, struct sockaddr *sa) 3451 { 3452 #ifdef INET 3453 struct sockaddr_in *sai; 3454 #endif 3455 #ifdef INET6 3456 struct sockaddr_in6 *sai6; 3457 #endif 3458 int error; 3459 3460 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3461 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3462 3463 #ifdef VIMAGE 3464 if (prison_owns_vnet(cred)) 3465 return (0); 3466 #endif 3467 3468 error = 0; 3469 switch (sa->sa_family) 3470 { 3471 #ifdef INET 3472 case AF_INET: 3473 sai = (struct sockaddr_in *)sa; 3474 error = prison_check_ip4(cred, &sai->sin_addr); 3475 break; 3476 #endif 3477 #ifdef INET6 3478 case AF_INET6: 3479 sai6 = (struct sockaddr_in6 *)sa; 3480 error = prison_check_ip6(cred, &sai6->sin6_addr); 3481 break; 3482 #endif 3483 default: 3484 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3485 error = EAFNOSUPPORT; 3486 } 3487 return (error); 3488 } 3489 3490 /* 3491 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 3492 */ 3493 int 3494 prison_check(struct ucred *cred1, struct ucred *cred2) 3495 { 3496 3497 return ((cred1->cr_prison == cred2->cr_prison || 3498 prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3499 } 3500 3501 /* 3502 * Return 1 if p2 is a child of p1, otherwise 0. 3503 */ 3504 int 3505 prison_ischild(struct prison *pr1, struct prison *pr2) 3506 { 3507 3508 for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3509 if (pr1 == pr2) 3510 return (1); 3511 return (0); 3512 } 3513 3514 /* 3515 * Return 1 if the passed credential is in a jail, otherwise 0. 3516 */ 3517 int 3518 jailed(struct ucred *cred) 3519 { 3520 3521 return (cred->cr_prison != &prison0); 3522 } 3523 3524 /* 3525 * Return 1 if the passed credential is in a jail and that jail does not 3526 * have its own virtual network stack, otherwise 0. 3527 */ 3528 int 3529 jailed_without_vnet(struct ucred *cred) 3530 { 3531 3532 if (!jailed(cred)) 3533 return (0); 3534 #ifdef VIMAGE 3535 if (prison_owns_vnet(cred)) 3536 return (0); 3537 #endif 3538 3539 return (1); 3540 } 3541 3542 /* 3543 * Return the correct hostname (domainname, et al) for the passed credential. 3544 */ 3545 void 3546 getcredhostname(struct ucred *cred, char *buf, size_t size) 3547 { 3548 struct prison *pr; 3549 3550 /* 3551 * A NULL credential can be used to shortcut to the physical 3552 * system's hostname. 3553 */ 3554 pr = (cred != NULL) ? cred->cr_prison : &prison0; 3555 mtx_lock(&pr->pr_mtx); 3556 strlcpy(buf, pr->pr_hostname, size); 3557 mtx_unlock(&pr->pr_mtx); 3558 } 3559 3560 void 3561 getcreddomainname(struct ucred *cred, char *buf, size_t size) 3562 { 3563 3564 mtx_lock(&cred->cr_prison->pr_mtx); 3565 strlcpy(buf, cred->cr_prison->pr_domainname, size); 3566 mtx_unlock(&cred->cr_prison->pr_mtx); 3567 } 3568 3569 void 3570 getcredhostuuid(struct ucred *cred, char *buf, size_t size) 3571 { 3572 3573 mtx_lock(&cred->cr_prison->pr_mtx); 3574 strlcpy(buf, cred->cr_prison->pr_hostuuid, size); 3575 mtx_unlock(&cred->cr_prison->pr_mtx); 3576 } 3577 3578 void 3579 getcredhostid(struct ucred *cred, unsigned long *hostid) 3580 { 3581 3582 mtx_lock(&cred->cr_prison->pr_mtx); 3583 *hostid = cred->cr_prison->pr_hostid; 3584 mtx_unlock(&cred->cr_prison->pr_mtx); 3585 } 3586 3587 #ifdef VIMAGE 3588 /* 3589 * Determine whether the prison represented by cred owns 3590 * its vnet rather than having it inherited. 3591 * 3592 * Returns 1 in case the prison owns the vnet, 0 otherwise. 3593 */ 3594 int 3595 prison_owns_vnet(struct ucred *cred) 3596 { 3597 3598 /* 3599 * vnets cannot be added/removed after jail creation, 3600 * so no need to lock here. 3601 */ 3602 return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0); 3603 } 3604 #endif 3605 3606 /* 3607 * Determine whether the subject represented by cred can "see" 3608 * status of a mount point. 3609 * Returns: 0 for permitted, ENOENT otherwise. 3610 * XXX: This function should be called cr_canseemount() and should be 3611 * placed in kern_prot.c. 3612 */ 3613 int 3614 prison_canseemount(struct ucred *cred, struct mount *mp) 3615 { 3616 struct prison *pr; 3617 struct statfs *sp; 3618 size_t len; 3619 3620 pr = cred->cr_prison; 3621 if (pr->pr_enforce_statfs == 0) 3622 return (0); 3623 if (pr->pr_root->v_mount == mp) 3624 return (0); 3625 if (pr->pr_enforce_statfs == 2) 3626 return (ENOENT); 3627 /* 3628 * If jail's chroot directory is set to "/" we should be able to see 3629 * all mount-points from inside a jail. 3630 * This is ugly check, but this is the only situation when jail's 3631 * directory ends with '/'. 3632 */ 3633 if (strcmp(pr->pr_path, "/") == 0) 3634 return (0); 3635 len = strlen(pr->pr_path); 3636 sp = &mp->mnt_stat; 3637 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3638 return (ENOENT); 3639 /* 3640 * Be sure that we don't have situation where jail's root directory 3641 * is "/some/path" and mount point is "/some/pathpath". 3642 */ 3643 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3644 return (ENOENT); 3645 return (0); 3646 } 3647 3648 void 3649 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3650 { 3651 char jpath[MAXPATHLEN]; 3652 struct prison *pr; 3653 size_t len; 3654 3655 pr = cred->cr_prison; 3656 if (pr->pr_enforce_statfs == 0) 3657 return; 3658 if (prison_canseemount(cred, mp) != 0) { 3659 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3660 strlcpy(sp->f_mntonname, "[restricted]", 3661 sizeof(sp->f_mntonname)); 3662 return; 3663 } 3664 if (pr->pr_root->v_mount == mp) { 3665 /* 3666 * Clear current buffer data, so we are sure nothing from 3667 * the valid path left there. 3668 */ 3669 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3670 *sp->f_mntonname = '/'; 3671 return; 3672 } 3673 /* 3674 * If jail's chroot directory is set to "/" we should be able to see 3675 * all mount-points from inside a jail. 3676 */ 3677 if (strcmp(pr->pr_path, "/") == 0) 3678 return; 3679 len = strlen(pr->pr_path); 3680 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3681 /* 3682 * Clear current buffer data, so we are sure nothing from 3683 * the valid path left there. 3684 */ 3685 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3686 if (*jpath == '\0') { 3687 /* Should never happen. */ 3688 *sp->f_mntonname = '/'; 3689 } else { 3690 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3691 } 3692 } 3693 3694 /* 3695 * Check with permission for a specific privilege is granted within jail. We 3696 * have a specific list of accepted privileges; the rest are denied. 3697 */ 3698 int 3699 prison_priv_check(struct ucred *cred, int priv) 3700 { 3701 3702 if (!jailed(cred)) 3703 return (0); 3704 3705 #ifdef VIMAGE 3706 /* 3707 * Privileges specific to prisons with a virtual network stack. 3708 * There might be a duplicate entry here in case the privilege 3709 * is only granted conditionally in the legacy jail case. 3710 */ 3711 switch (priv) { 3712 #ifdef notyet 3713 /* 3714 * NFS-specific privileges. 3715 */ 3716 case PRIV_NFS_DAEMON: 3717 case PRIV_NFS_LOCKD: 3718 #endif 3719 /* 3720 * Network stack privileges. 3721 */ 3722 case PRIV_NET_BRIDGE: 3723 case PRIV_NET_GRE: 3724 case PRIV_NET_BPF: 3725 case PRIV_NET_RAW: /* Dup, cond. in legacy jail case. */ 3726 case PRIV_NET_ROUTE: 3727 case PRIV_NET_TAP: 3728 case PRIV_NET_SETIFMTU: 3729 case PRIV_NET_SETIFFLAGS: 3730 case PRIV_NET_SETIFCAP: 3731 case PRIV_NET_SETIFDESCR: 3732 case PRIV_NET_SETIFNAME : 3733 case PRIV_NET_SETIFMETRIC: 3734 case PRIV_NET_SETIFPHYS: 3735 case PRIV_NET_SETIFMAC: 3736 case PRIV_NET_ADDMULTI: 3737 case PRIV_NET_DELMULTI: 3738 case PRIV_NET_HWIOCTL: 3739 case PRIV_NET_SETLLADDR: 3740 case PRIV_NET_ADDIFGROUP: 3741 case PRIV_NET_DELIFGROUP: 3742 case PRIV_NET_IFCREATE: 3743 case PRIV_NET_IFDESTROY: 3744 case PRIV_NET_ADDIFADDR: 3745 case PRIV_NET_DELIFADDR: 3746 case PRIV_NET_LAGG: 3747 case PRIV_NET_GIF: 3748 case PRIV_NET_SETIFVNET: 3749 case PRIV_NET_SETIFFIB: 3750 3751 /* 3752 * 802.11-related privileges. 3753 */ 3754 case PRIV_NET80211_GETKEY: 3755 #ifdef notyet 3756 case PRIV_NET80211_MANAGE: /* XXX-BZ discuss with sam@ */ 3757 #endif 3758 3759 #ifdef notyet 3760 /* 3761 * ATM privileges. 3762 */ 3763 case PRIV_NETATM_CFG: 3764 case PRIV_NETATM_ADD: 3765 case PRIV_NETATM_DEL: 3766 case PRIV_NETATM_SET: 3767 3768 /* 3769 * Bluetooth privileges. 3770 */ 3771 case PRIV_NETBLUETOOTH_RAW: 3772 #endif 3773 3774 /* 3775 * Netgraph and netgraph module privileges. 3776 */ 3777 case PRIV_NETGRAPH_CONTROL: 3778 #ifdef notyet 3779 case PRIV_NETGRAPH_TTY: 3780 #endif 3781 3782 /* 3783 * IPv4 and IPv6 privileges. 3784 */ 3785 case PRIV_NETINET_IPFW: 3786 case PRIV_NETINET_DIVERT: 3787 case PRIV_NETINET_PF: 3788 case PRIV_NETINET_DUMMYNET: 3789 case PRIV_NETINET_CARP: 3790 case PRIV_NETINET_MROUTE: 3791 case PRIV_NETINET_RAW: 3792 case PRIV_NETINET_ADDRCTRL6: 3793 case PRIV_NETINET_ND6: 3794 case PRIV_NETINET_SCOPE6: 3795 case PRIV_NETINET_ALIFETIME6: 3796 case PRIV_NETINET_IPSEC: 3797 case PRIV_NETINET_BINDANY: 3798 3799 #ifdef notyet 3800 /* 3801 * NCP privileges. 3802 */ 3803 case PRIV_NETNCP: 3804 3805 /* 3806 * SMB privileges. 3807 */ 3808 case PRIV_NETSMB: 3809 #endif 3810 3811 /* 3812 * No default: or deny here. 3813 * In case of no permit fall through to next switch(). 3814 */ 3815 if (cred->cr_prison->pr_flags & PR_VNET) 3816 return (0); 3817 } 3818 #endif /* VIMAGE */ 3819 3820 switch (priv) { 3821 3822 /* 3823 * Allow ktrace privileges for root in jail. 3824 */ 3825 case PRIV_KTRACE: 3826 3827 #if 0 3828 /* 3829 * Allow jailed processes to configure audit identity and 3830 * submit audit records (login, etc). In the future we may 3831 * want to further refine the relationship between audit and 3832 * jail. 3833 */ 3834 case PRIV_AUDIT_GETAUDIT: 3835 case PRIV_AUDIT_SETAUDIT: 3836 case PRIV_AUDIT_SUBMIT: 3837 #endif 3838 3839 /* 3840 * Allow jailed processes to manipulate process UNIX 3841 * credentials in any way they see fit. 3842 */ 3843 case PRIV_CRED_SETUID: 3844 case PRIV_CRED_SETEUID: 3845 case PRIV_CRED_SETGID: 3846 case PRIV_CRED_SETEGID: 3847 case PRIV_CRED_SETGROUPS: 3848 case PRIV_CRED_SETREUID: 3849 case PRIV_CRED_SETREGID: 3850 case PRIV_CRED_SETRESUID: 3851 case PRIV_CRED_SETRESGID: 3852 3853 /* 3854 * Jail implements visibility constraints already, so allow 3855 * jailed root to override uid/gid-based constraints. 3856 */ 3857 case PRIV_SEEOTHERGIDS: 3858 case PRIV_SEEOTHERUIDS: 3859 3860 /* 3861 * Jail implements inter-process debugging limits already, so 3862 * allow jailed root various debugging privileges. 3863 */ 3864 case PRIV_DEBUG_DIFFCRED: 3865 case PRIV_DEBUG_SUGID: 3866 case PRIV_DEBUG_UNPRIV: 3867 3868 /* 3869 * Allow jail to set various resource limits and login 3870 * properties, and for now, exceed process resource limits. 3871 */ 3872 case PRIV_PROC_LIMIT: 3873 case PRIV_PROC_SETLOGIN: 3874 case PRIV_PROC_SETRLIMIT: 3875 3876 /* 3877 * System V and POSIX IPC privileges are granted in jail. 3878 */ 3879 case PRIV_IPC_READ: 3880 case PRIV_IPC_WRITE: 3881 case PRIV_IPC_ADMIN: 3882 case PRIV_IPC_MSGSIZE: 3883 case PRIV_MQ_ADMIN: 3884 3885 /* 3886 * Jail operations within a jail work on child jails. 3887 */ 3888 case PRIV_JAIL_ATTACH: 3889 case PRIV_JAIL_SET: 3890 case PRIV_JAIL_REMOVE: 3891 3892 /* 3893 * Jail implements its own inter-process limits, so allow 3894 * root processes in jail to change scheduling on other 3895 * processes in the same jail. Likewise for signalling. 3896 */ 3897 case PRIV_SCHED_DIFFCRED: 3898 case PRIV_SCHED_CPUSET: 3899 case PRIV_SIGNAL_DIFFCRED: 3900 case PRIV_SIGNAL_SUGID: 3901 3902 /* 3903 * Allow jailed processes to write to sysctls marked as jail 3904 * writable. 3905 */ 3906 case PRIV_SYSCTL_WRITEJAIL: 3907 3908 /* 3909 * Allow root in jail to manage a variety of quota 3910 * properties. These should likely be conditional on a 3911 * configuration option. 3912 */ 3913 case PRIV_VFS_GETQUOTA: 3914 case PRIV_VFS_SETQUOTA: 3915 3916 /* 3917 * Since Jail relies on chroot() to implement file system 3918 * protections, grant many VFS privileges to root in jail. 3919 * Be careful to exclude mount-related and NFS-related 3920 * privileges. 3921 */ 3922 case PRIV_VFS_READ: 3923 case PRIV_VFS_WRITE: 3924 case PRIV_VFS_ADMIN: 3925 case PRIV_VFS_EXEC: 3926 case PRIV_VFS_LOOKUP: 3927 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3928 case PRIV_VFS_CHFLAGS_DEV: 3929 case PRIV_VFS_CHOWN: 3930 case PRIV_VFS_CHROOT: 3931 case PRIV_VFS_RETAINSUGID: 3932 case PRIV_VFS_FCHROOT: 3933 case PRIV_VFS_LINK: 3934 case PRIV_VFS_SETGID: 3935 case PRIV_VFS_STAT: 3936 case PRIV_VFS_STICKYFILE: 3937 3938 /* 3939 * As in the non-jail case, non-root users are expected to be 3940 * able to read kernel/phyiscal memory (provided /dev/[k]mem 3941 * exists in the jail and they have permission to access it). 3942 */ 3943 case PRIV_KMEM_READ: 3944 return (0); 3945 3946 /* 3947 * Depending on the global setting, allow privilege of 3948 * setting system flags. 3949 */ 3950 case PRIV_VFS_SYSFLAGS: 3951 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3952 return (0); 3953 else 3954 return (EPERM); 3955 3956 /* 3957 * Depending on the global setting, allow privilege of 3958 * mounting/unmounting file systems. 3959 */ 3960 case PRIV_VFS_MOUNT: 3961 case PRIV_VFS_UNMOUNT: 3962 case PRIV_VFS_MOUNT_NONUSER: 3963 case PRIV_VFS_MOUNT_OWNER: 3964 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT && 3965 cred->cr_prison->pr_enforce_statfs < 2) 3966 return (0); 3967 else 3968 return (EPERM); 3969 3970 /* 3971 * Allow jailed root to bind reserved ports and reuse in-use 3972 * ports. 3973 */ 3974 case PRIV_NETINET_RESERVEDPORT: 3975 case PRIV_NETINET_REUSEPORT: 3976 return (0); 3977 3978 /* 3979 * Allow jailed root to set certian IPv4/6 (option) headers. 3980 */ 3981 case PRIV_NETINET_SETHDROPTS: 3982 return (0); 3983 3984 /* 3985 * Conditionally allow creating raw sockets in jail. 3986 */ 3987 case PRIV_NETINET_RAW: 3988 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 3989 return (0); 3990 else 3991 return (EPERM); 3992 3993 /* 3994 * Since jail implements its own visibility limits on netstat 3995 * sysctls, allow getcred. This allows identd to work in 3996 * jail. 3997 */ 3998 case PRIV_NETINET_GETCRED: 3999 return (0); 4000 4001 /* 4002 * Allow jailed root to set loginclass. 4003 */ 4004 case PRIV_PROC_SETLOGINCLASS: 4005 return (0); 4006 4007 default: 4008 /* 4009 * In all remaining cases, deny the privilege request. This 4010 * includes almost all network privileges, many system 4011 * configuration privileges. 4012 */ 4013 return (EPERM); 4014 } 4015 } 4016 4017 /* 4018 * Return the part of pr2's name that is relative to pr1, or the whole name 4019 * if it does not directly follow. 4020 */ 4021 4022 char * 4023 prison_name(struct prison *pr1, struct prison *pr2) 4024 { 4025 char *name; 4026 4027 /* Jails see themselves as "0" (if they see themselves at all). */ 4028 if (pr1 == pr2) 4029 return "0"; 4030 name = pr2->pr_name; 4031 if (prison_ischild(pr1, pr2)) { 4032 /* 4033 * pr1 isn't locked (and allprison_lock may not be either) 4034 * so its length can't be counted on. But the number of dots 4035 * can be counted on - and counted. 4036 */ 4037 for (; pr1 != &prison0; pr1 = pr1->pr_parent) 4038 name = strchr(name, '.') + 1; 4039 } 4040 return (name); 4041 } 4042 4043 /* 4044 * Return the part of pr2's path that is relative to pr1, or the whole path 4045 * if it does not directly follow. 4046 */ 4047 static char * 4048 prison_path(struct prison *pr1, struct prison *pr2) 4049 { 4050 char *path1, *path2; 4051 int len1; 4052 4053 path1 = pr1->pr_path; 4054 path2 = pr2->pr_path; 4055 if (!strcmp(path1, "/")) 4056 return (path2); 4057 len1 = strlen(path1); 4058 if (strncmp(path1, path2, len1)) 4059 return (path2); 4060 if (path2[len1] == '\0') 4061 return "/"; 4062 if (path2[len1] == '/') 4063 return (path2 + len1); 4064 return (path2); 4065 } 4066 4067 4068 /* 4069 * Jail-related sysctls. 4070 */ 4071 static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 4072 "Jails"); 4073 4074 static int 4075 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 4076 { 4077 struct xprison *xp; 4078 struct prison *pr, *cpr; 4079 #ifdef INET 4080 struct in_addr *ip4 = NULL; 4081 int ip4s = 0; 4082 #endif 4083 #ifdef INET6 4084 struct in6_addr *ip6 = NULL; 4085 int ip6s = 0; 4086 #endif 4087 int descend, error; 4088 4089 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 4090 pr = req->td->td_ucred->cr_prison; 4091 error = 0; 4092 sx_slock(&allprison_lock); 4093 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 4094 #if defined(INET) || defined(INET6) 4095 again: 4096 #endif 4097 mtx_lock(&cpr->pr_mtx); 4098 #ifdef INET 4099 if (cpr->pr_ip4s > 0) { 4100 if (ip4s < cpr->pr_ip4s) { 4101 ip4s = cpr->pr_ip4s; 4102 mtx_unlock(&cpr->pr_mtx); 4103 ip4 = realloc(ip4, ip4s * 4104 sizeof(struct in_addr), M_TEMP, M_WAITOK); 4105 goto again; 4106 } 4107 bcopy(cpr->pr_ip4, ip4, 4108 cpr->pr_ip4s * sizeof(struct in_addr)); 4109 } 4110 #endif 4111 #ifdef INET6 4112 if (cpr->pr_ip6s > 0) { 4113 if (ip6s < cpr->pr_ip6s) { 4114 ip6s = cpr->pr_ip6s; 4115 mtx_unlock(&cpr->pr_mtx); 4116 ip6 = realloc(ip6, ip6s * 4117 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 4118 goto again; 4119 } 4120 bcopy(cpr->pr_ip6, ip6, 4121 cpr->pr_ip6s * sizeof(struct in6_addr)); 4122 } 4123 #endif 4124 if (cpr->pr_ref == 0) { 4125 mtx_unlock(&cpr->pr_mtx); 4126 continue; 4127 } 4128 bzero(xp, sizeof(*xp)); 4129 xp->pr_version = XPRISON_VERSION; 4130 xp->pr_id = cpr->pr_id; 4131 xp->pr_state = cpr->pr_uref > 0 4132 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 4133 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 4134 strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host)); 4135 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 4136 #ifdef INET 4137 xp->pr_ip4s = cpr->pr_ip4s; 4138 #endif 4139 #ifdef INET6 4140 xp->pr_ip6s = cpr->pr_ip6s; 4141 #endif 4142 mtx_unlock(&cpr->pr_mtx); 4143 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 4144 if (error) 4145 break; 4146 #ifdef INET 4147 if (xp->pr_ip4s > 0) { 4148 error = SYSCTL_OUT(req, ip4, 4149 xp->pr_ip4s * sizeof(struct in_addr)); 4150 if (error) 4151 break; 4152 } 4153 #endif 4154 #ifdef INET6 4155 if (xp->pr_ip6s > 0) { 4156 error = SYSCTL_OUT(req, ip6, 4157 xp->pr_ip6s * sizeof(struct in6_addr)); 4158 if (error) 4159 break; 4160 } 4161 #endif 4162 } 4163 sx_sunlock(&allprison_lock); 4164 free(xp, M_TEMP); 4165 #ifdef INET 4166 free(ip4, M_TEMP); 4167 #endif 4168 #ifdef INET6 4169 free(ip6, M_TEMP); 4170 #endif 4171 return (error); 4172 } 4173 4174 SYSCTL_OID(_security_jail, OID_AUTO, list, 4175 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4176 sysctl_jail_list, "S", "List of active jails"); 4177 4178 static int 4179 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 4180 { 4181 int error, injail; 4182 4183 injail = jailed(req->td->td_ucred); 4184 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 4185 4186 return (error); 4187 } 4188 4189 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 4190 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4191 sysctl_jail_jailed, "I", "Process in jail?"); 4192 4193 static int 4194 sysctl_jail_vnet(SYSCTL_HANDLER_ARGS) 4195 { 4196 int error, havevnet; 4197 #ifdef VIMAGE 4198 struct ucred *cred = req->td->td_ucred; 4199 4200 havevnet = jailed(cred) && prison_owns_vnet(cred); 4201 #else 4202 havevnet = 0; 4203 #endif 4204 error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet)); 4205 4206 return (error); 4207 } 4208 4209 SYSCTL_PROC(_security_jail, OID_AUTO, vnet, 4210 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 4211 sysctl_jail_vnet, "I", "Jail owns VNET?"); 4212 4213 #if defined(INET) || defined(INET6) 4214 SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 4215 &jail_max_af_ips, 0, 4216 "Number of IP addresses a jail may have at most per address family"); 4217 #endif 4218 4219 /* 4220 * Default parameters for jail(2) compatability. For historical reasons, 4221 * the sysctl names have varying similarity to the parameter names. Prisons 4222 * just see their own parameters, and can't change them. 4223 */ 4224 static int 4225 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 4226 { 4227 struct prison *pr; 4228 int allow, error, i; 4229 4230 pr = req->td->td_ucred->cr_prison; 4231 allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 4232 4233 /* Get the current flag value, and convert it to a boolean. */ 4234 i = (allow & arg2) ? 1 : 0; 4235 if (arg1 != NULL) 4236 i = !i; 4237 error = sysctl_handle_int(oidp, &i, 0, req); 4238 if (error || !req->newptr) 4239 return (error); 4240 i = i ? arg2 : 0; 4241 if (arg1 != NULL) 4242 i ^= arg2; 4243 /* 4244 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 4245 * for writing. 4246 */ 4247 mtx_lock(&prison0.pr_mtx); 4248 jail_default_allow = (jail_default_allow & ~arg2) | i; 4249 mtx_unlock(&prison0.pr_mtx); 4250 return (0); 4251 } 4252 4253 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 4254 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4255 NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 4256 "Processes in jail can set their hostnames"); 4257 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 4258 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4259 (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 4260 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 4261 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 4262 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4263 NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 4264 "Processes in jail can use System V IPC primitives"); 4265 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 4266 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4267 NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 4268 "Prison root can create raw sockets"); 4269 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 4270 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4271 NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 4272 "Processes in jail can alter system file flags"); 4273 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 4274 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4275 NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 4276 "Processes in jail can mount/unmount jail-friendly file systems"); 4277 SYSCTL_PROC(_security_jail, OID_AUTO, mount_devfs_allowed, 4278 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4279 NULL, PR_ALLOW_MOUNT_DEVFS, sysctl_jail_default_allow, "I", 4280 "Processes in jail can mount the devfs file system"); 4281 SYSCTL_PROC(_security_jail, OID_AUTO, mount_fdescfs_allowed, 4282 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4283 NULL, PR_ALLOW_MOUNT_FDESCFS, sysctl_jail_default_allow, "I", 4284 "Processes in jail can mount the fdescfs file system"); 4285 SYSCTL_PROC(_security_jail, OID_AUTO, mount_nullfs_allowed, 4286 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4287 NULL, PR_ALLOW_MOUNT_NULLFS, sysctl_jail_default_allow, "I", 4288 "Processes in jail can mount the nullfs file system"); 4289 SYSCTL_PROC(_security_jail, OID_AUTO, mount_procfs_allowed, 4290 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4291 NULL, PR_ALLOW_MOUNT_PROCFS, sysctl_jail_default_allow, "I", 4292 "Processes in jail can mount the procfs file system"); 4293 SYSCTL_PROC(_security_jail, OID_AUTO, mount_tmpfs_allowed, 4294 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4295 NULL, PR_ALLOW_MOUNT_TMPFS, sysctl_jail_default_allow, "I", 4296 "Processes in jail can mount the tmpfs file system"); 4297 SYSCTL_PROC(_security_jail, OID_AUTO, mount_zfs_allowed, 4298 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4299 NULL, PR_ALLOW_MOUNT_ZFS, sysctl_jail_default_allow, "I", 4300 "Processes in jail can mount the zfs file system"); 4301 4302 static int 4303 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS) 4304 { 4305 struct prison *pr; 4306 int level, error; 4307 4308 pr = req->td->td_ucred->cr_prison; 4309 level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); 4310 error = sysctl_handle_int(oidp, &level, 0, req); 4311 if (error || !req->newptr) 4312 return (error); 4313 *(int *)arg1 = level; 4314 return (0); 4315 } 4316 4317 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, 4318 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 4319 &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), 4320 sysctl_jail_default_level, "I", 4321 "Processes in jail cannot see all mounted file systems"); 4322 4323 SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset, 4324 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 4325 &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum), 4326 sysctl_jail_default_level, "I", 4327 "Ruleset for the devfs filesystem in jail"); 4328 4329 /* 4330 * Nodes to describe jail parameters. Maximum length of string parameters 4331 * is returned in the string itself, and the other parameters exist merely 4332 * to make themselves and their types known. 4333 */ 4334 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, 4335 "Jail parameters"); 4336 4337 int 4338 sysctl_jail_param(SYSCTL_HANDLER_ARGS) 4339 { 4340 int i; 4341 long l; 4342 size_t s; 4343 char numbuf[12]; 4344 4345 switch (oidp->oid_kind & CTLTYPE) 4346 { 4347 case CTLTYPE_LONG: 4348 case CTLTYPE_ULONG: 4349 l = 0; 4350 #ifdef SCTL_MASK32 4351 if (!(req->flags & SCTL_MASK32)) 4352 #endif 4353 return (SYSCTL_OUT(req, &l, sizeof(l))); 4354 case CTLTYPE_INT: 4355 case CTLTYPE_UINT: 4356 i = 0; 4357 return (SYSCTL_OUT(req, &i, sizeof(i))); 4358 case CTLTYPE_STRING: 4359 snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2); 4360 return 4361 (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); 4362 case CTLTYPE_STRUCT: 4363 s = (size_t)arg2; 4364 return (SYSCTL_OUT(req, &s, sizeof(s))); 4365 } 4366 return (0); 4367 } 4368 4369 /* 4370 * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at 4371 * jail creation time but cannot be changed in an existing jail. 4372 */ 4373 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); 4374 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); 4375 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); 4376 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); 4377 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, 4378 "I", "Jail secure level"); 4379 SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I", 4380 "Jail value for kern.osreldate and uname -K"); 4381 SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN, 4382 "Jail value for kern.osrelease and uname -r"); 4383 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, 4384 "I", "Jail cannot see all mounted file systems"); 4385 SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW, 4386 "I", "Ruleset for in-jail devfs mounts"); 4387 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, 4388 "B", "Jail persistence"); 4389 #ifdef VIMAGE 4390 SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN, 4391 "E,jailsys", "Virtual network stack"); 4392 #endif 4393 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, 4394 "B", "Jail is in the process of shutting down"); 4395 4396 SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails"); 4397 SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD, 4398 "I", "Current number of child jails"); 4399 SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW, 4400 "I", "Maximum number of child jails"); 4401 4402 SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info"); 4403 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, 4404 "Jail hostname"); 4405 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, 4406 "Jail NIS domainname"); 4407 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, 4408 "Jail host UUID"); 4409 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, 4410 "LU", "Jail host ID"); 4411 4412 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); 4413 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); 4414 4415 #ifdef INET 4416 SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN, 4417 "Jail IPv4 address virtualization"); 4418 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), 4419 "S,in_addr,a", "Jail IPv4 addresses"); 4420 SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW, 4421 "B", "Do (not) use IPv4 source address selection rather than the " 4422 "primary jail IPv4 address."); 4423 #endif 4424 #ifdef INET6 4425 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN, 4426 "Jail IPv6 address virtualization"); 4427 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), 4428 "S,in6_addr,a", "Jail IPv6 addresses"); 4429 SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW, 4430 "B", "Do (not) use IPv6 source address selection rather than the " 4431 "primary jail IPv6 address."); 4432 #endif 4433 4434 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); 4435 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, 4436 "B", "Jail may set hostname"); 4437 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, 4438 "B", "Jail may use SYSV IPC"); 4439 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, 4440 "B", "Jail may create raw sockets"); 4441 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, 4442 "B", "Jail may alter system file flags"); 4443 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, 4444 "B", "Jail may set file quotas"); 4445 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, 4446 "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); 4447 4448 SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags"); 4449 SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW, 4450 "B", "Jail may mount/unmount jail-friendly file systems in general"); 4451 SYSCTL_JAIL_PARAM(_allow_mount, devfs, CTLTYPE_INT | CTLFLAG_RW, 4452 "B", "Jail may mount the devfs file system"); 4453 SYSCTL_JAIL_PARAM(_allow_mount, fdescfs, CTLTYPE_INT | CTLFLAG_RW, 4454 "B", "Jail may mount the fdescfs file system"); 4455 SYSCTL_JAIL_PARAM(_allow_mount, nullfs, CTLTYPE_INT | CTLFLAG_RW, 4456 "B", "Jail may mount the nullfs file system"); 4457 SYSCTL_JAIL_PARAM(_allow_mount, procfs, CTLTYPE_INT | CTLFLAG_RW, 4458 "B", "Jail may mount the procfs file system"); 4459 SYSCTL_JAIL_PARAM(_allow_mount, tmpfs, CTLTYPE_INT | CTLFLAG_RW, 4460 "B", "Jail may mount the tmpfs file system"); 4461 SYSCTL_JAIL_PARAM(_allow_mount, zfs, CTLTYPE_INT | CTLFLAG_RW, 4462 "B", "Jail may mount the zfs file system"); 4463 4464 #ifdef RACCT 4465 void 4466 prison_racct_foreach(void (*callback)(struct racct *racct, 4467 void *arg2, void *arg3), void *arg2, void *arg3) 4468 { 4469 struct prison_racct *prr; 4470 4471 ASSERT_RACCT_ENABLED(); 4472 4473 sx_slock(&allprison_lock); 4474 LIST_FOREACH(prr, &allprison_racct, prr_next) 4475 (callback)(prr->prr_racct, arg2, arg3); 4476 sx_sunlock(&allprison_lock); 4477 } 4478 4479 static struct prison_racct * 4480 prison_racct_find_locked(const char *name) 4481 { 4482 struct prison_racct *prr; 4483 4484 ASSERT_RACCT_ENABLED(); 4485 sx_assert(&allprison_lock, SA_XLOCKED); 4486 4487 if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN) 4488 return (NULL); 4489 4490 LIST_FOREACH(prr, &allprison_racct, prr_next) { 4491 if (strcmp(name, prr->prr_name) != 0) 4492 continue; 4493 4494 /* Found prison_racct with a matching name? */ 4495 prison_racct_hold(prr); 4496 return (prr); 4497 } 4498 4499 /* Add new prison_racct. */ 4500 prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK); 4501 racct_create(&prr->prr_racct); 4502 4503 strcpy(prr->prr_name, name); 4504 refcount_init(&prr->prr_refcount, 1); 4505 LIST_INSERT_HEAD(&allprison_racct, prr, prr_next); 4506 4507 return (prr); 4508 } 4509 4510 struct prison_racct * 4511 prison_racct_find(const char *name) 4512 { 4513 struct prison_racct *prr; 4514 4515 ASSERT_RACCT_ENABLED(); 4516 4517 sx_xlock(&allprison_lock); 4518 prr = prison_racct_find_locked(name); 4519 sx_xunlock(&allprison_lock); 4520 return (prr); 4521 } 4522 4523 void 4524 prison_racct_hold(struct prison_racct *prr) 4525 { 4526 4527 ASSERT_RACCT_ENABLED(); 4528 4529 refcount_acquire(&prr->prr_refcount); 4530 } 4531 4532 static void 4533 prison_racct_free_locked(struct prison_racct *prr) 4534 { 4535 4536 ASSERT_RACCT_ENABLED(); 4537 sx_assert(&allprison_lock, SA_XLOCKED); 4538 4539 if (refcount_release(&prr->prr_refcount)) { 4540 racct_destroy(&prr->prr_racct); 4541 LIST_REMOVE(prr, prr_next); 4542 free(prr, M_PRISON_RACCT); 4543 } 4544 } 4545 4546 void 4547 prison_racct_free(struct prison_racct *prr) 4548 { 4549 int old; 4550 4551 ASSERT_RACCT_ENABLED(); 4552 sx_assert(&allprison_lock, SA_UNLOCKED); 4553 4554 old = prr->prr_refcount; 4555 if (old > 1 && atomic_cmpset_int(&prr->prr_refcount, old, old - 1)) 4556 return; 4557 4558 sx_xlock(&allprison_lock); 4559 prison_racct_free_locked(prr); 4560 sx_xunlock(&allprison_lock); 4561 } 4562 4563 static void 4564 prison_racct_attach(struct prison *pr) 4565 { 4566 struct prison_racct *prr; 4567 4568 ASSERT_RACCT_ENABLED(); 4569 sx_assert(&allprison_lock, SA_XLOCKED); 4570 4571 prr = prison_racct_find_locked(pr->pr_name); 4572 KASSERT(prr != NULL, ("cannot find prison_racct")); 4573 4574 pr->pr_prison_racct = prr; 4575 } 4576 4577 /* 4578 * Handle jail renaming. From the racct point of view, renaming means 4579 * moving from one prison_racct to another. 4580 */ 4581 static void 4582 prison_racct_modify(struct prison *pr) 4583 { 4584 struct proc *p; 4585 struct ucred *cred; 4586 struct prison_racct *oldprr; 4587 4588 ASSERT_RACCT_ENABLED(); 4589 4590 sx_slock(&allproc_lock); 4591 sx_xlock(&allprison_lock); 4592 4593 if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) { 4594 sx_xunlock(&allprison_lock); 4595 sx_sunlock(&allproc_lock); 4596 return; 4597 } 4598 4599 oldprr = pr->pr_prison_racct; 4600 pr->pr_prison_racct = NULL; 4601 4602 prison_racct_attach(pr); 4603 4604 /* 4605 * Move resource utilisation records. 4606 */ 4607 racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct); 4608 4609 /* 4610 * Force rctl to reattach rules to processes. 4611 */ 4612 FOREACH_PROC_IN_SYSTEM(p) { 4613 PROC_LOCK(p); 4614 cred = crhold(p->p_ucred); 4615 PROC_UNLOCK(p); 4616 racct_proc_ucred_changed(p, cred, cred); 4617 crfree(cred); 4618 } 4619 4620 sx_sunlock(&allproc_lock); 4621 prison_racct_free_locked(oldprr); 4622 sx_xunlock(&allprison_lock); 4623 } 4624 4625 static void 4626 prison_racct_detach(struct prison *pr) 4627 { 4628 4629 ASSERT_RACCT_ENABLED(); 4630 sx_assert(&allprison_lock, SA_UNLOCKED); 4631 4632 if (pr->pr_prison_racct == NULL) 4633 return; 4634 prison_racct_free(pr->pr_prison_racct); 4635 pr->pr_prison_racct = NULL; 4636 } 4637 #endif /* RACCT */ 4638 4639 #ifdef DDB 4640 4641 static void 4642 db_show_prison(struct prison *pr) 4643 { 4644 int fi; 4645 #if defined(INET) || defined(INET6) 4646 int ii; 4647 #endif 4648 unsigned jsf; 4649 #ifdef INET6 4650 char ip6buf[INET6_ADDRSTRLEN]; 4651 #endif 4652 4653 db_printf("prison %p:\n", pr); 4654 db_printf(" jid = %d\n", pr->pr_id); 4655 db_printf(" name = %s\n", pr->pr_name); 4656 db_printf(" parent = %p\n", pr->pr_parent); 4657 db_printf(" ref = %d\n", pr->pr_ref); 4658 db_printf(" uref = %d\n", pr->pr_uref); 4659 db_printf(" path = %s\n", pr->pr_path); 4660 db_printf(" cpuset = %d\n", pr->pr_cpuset 4661 ? pr->pr_cpuset->cs_id : -1); 4662 #ifdef VIMAGE 4663 db_printf(" vnet = %p\n", pr->pr_vnet); 4664 #endif 4665 db_printf(" root = %p\n", pr->pr_root); 4666 db_printf(" securelevel = %d\n", pr->pr_securelevel); 4667 db_printf(" devfs_rsnum = %d\n", pr->pr_devfs_rsnum); 4668 db_printf(" children.max = %d\n", pr->pr_childmax); 4669 db_printf(" children.cur = %d\n", pr->pr_childcount); 4670 db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); 4671 db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); 4672 db_printf(" flags = 0x%x", pr->pr_flags); 4673 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 4674 fi++) 4675 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi))) 4676 db_printf(" %s", pr_flag_names[fi]); 4677 for (fi = 0; fi < sizeof(pr_flag_jailsys) / sizeof(pr_flag_jailsys[0]); 4678 fi++) { 4679 jsf = pr->pr_flags & 4680 (pr_flag_jailsys[fi].disable | pr_flag_jailsys[fi].new); 4681 db_printf(" %-16s= %s\n", pr_flag_jailsys[fi].name, 4682 pr_flag_jailsys[fi].disable && 4683 (jsf == pr_flag_jailsys[fi].disable) ? "disable" 4684 : (jsf == pr_flag_jailsys[fi].new) ? "new" 4685 : "inherit"); 4686 } 4687 db_printf(" allow = 0x%x", pr->pr_allow); 4688 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 4689 fi++) 4690 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi))) 4691 db_printf(" %s", pr_allow_names[fi]); 4692 db_printf("\n"); 4693 db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); 4694 db_printf(" host.hostname = %s\n", pr->pr_hostname); 4695 db_printf(" host.domainname = %s\n", pr->pr_domainname); 4696 db_printf(" host.hostuuid = %s\n", pr->pr_hostuuid); 4697 db_printf(" host.hostid = %lu\n", pr->pr_hostid); 4698 #ifdef INET 4699 db_printf(" ip4s = %d\n", pr->pr_ip4s); 4700 for (ii = 0; ii < pr->pr_ip4s; ii++) 4701 db_printf(" %s %s\n", 4702 ii == 0 ? "ip4.addr =" : " ", 4703 inet_ntoa(pr->pr_ip4[ii])); 4704 #endif 4705 #ifdef INET6 4706 db_printf(" ip6s = %d\n", pr->pr_ip6s); 4707 for (ii = 0; ii < pr->pr_ip6s; ii++) 4708 db_printf(" %s %s\n", 4709 ii == 0 ? "ip6.addr =" : " ", 4710 ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); 4711 #endif 4712 } 4713 4714 DB_SHOW_COMMAND(prison, db_show_prison_command) 4715 { 4716 struct prison *pr; 4717 4718 if (!have_addr) { 4719 /* 4720 * Show all prisons in the list, and prison0 which is not 4721 * listed. 4722 */ 4723 db_show_prison(&prison0); 4724 if (!db_pager_quit) { 4725 TAILQ_FOREACH(pr, &allprison, pr_list) { 4726 db_show_prison(pr); 4727 if (db_pager_quit) 4728 break; 4729 } 4730 } 4731 return; 4732 } 4733 4734 if (addr == 0) 4735 pr = &prison0; 4736 else { 4737 /* Look for a prison with the ID and with references. */ 4738 TAILQ_FOREACH(pr, &allprison, pr_list) 4739 if (pr->pr_id == addr && pr->pr_ref > 0) 4740 break; 4741 if (pr == NULL) 4742 /* Look again, without requiring a reference. */ 4743 TAILQ_FOREACH(pr, &allprison, pr_list) 4744 if (pr->pr_id == addr) 4745 break; 4746 if (pr == NULL) 4747 /* Assume address points to a valid prison. */ 4748 pr = (struct prison *)addr; 4749 } 4750 db_show_prison(pr); 4751 } 4752 4753 #endif /* DDB */ 4754