1 /*- 2 * Copyright (c) 1999 Poul-Henning Kamp. 3 * Copyright (c) 2008 Bjoern A. Zeeb. 4 * Copyright (c) 2009 James Gritton. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 #include "opt_ddb.h" 34 #include "opt_inet.h" 35 #include "opt_inet6.h" 36 #include "opt_mac.h" 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/kernel.h> 41 #include <sys/systm.h> 42 #include <sys/errno.h> 43 #include <sys/sysproto.h> 44 #include <sys/malloc.h> 45 #include <sys/osd.h> 46 #include <sys/priv.h> 47 #include <sys/proc.h> 48 #include <sys/taskqueue.h> 49 #include <sys/fcntl.h> 50 #include <sys/jail.h> 51 #include <sys/lock.h> 52 #include <sys/mutex.h> 53 #include <sys/sx.h> 54 #include <sys/sysent.h> 55 #include <sys/namei.h> 56 #include <sys/mount.h> 57 #include <sys/queue.h> 58 #include <sys/socket.h> 59 #include <sys/syscallsubr.h> 60 #include <sys/sysctl.h> 61 #include <sys/vnode.h> 62 #include <sys/vimage.h> 63 #include <net/if.h> 64 #include <netinet/in.h> 65 #ifdef DDB 66 #include <ddb/ddb.h> 67 #ifdef INET6 68 #include <netinet6/in6_var.h> 69 #endif /* INET6 */ 70 #endif /* DDB */ 71 72 #include <security/mac/mac_framework.h> 73 74 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 75 76 /* prison0 describes what is "real" about the system. */ 77 struct prison prison0 = { 78 .pr_id = 0, 79 .pr_name = "0", 80 .pr_ref = 1, 81 .pr_uref = 1, 82 .pr_path = "/", 83 .pr_securelevel = -1, 84 .pr_uuid = "00000000-0000-0000-0000-000000000000", 85 .pr_children = LIST_HEAD_INITIALIZER(&prison0.pr_children), 86 .pr_flags = PR_HOST, 87 .pr_allow = PR_ALLOW_ALL, 88 }; 89 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); 90 91 /* allprison and lastprid are protected by allprison_lock. */ 92 struct sx allprison_lock; 93 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); 94 struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); 95 int lastprid = 0; 96 97 static int do_jail_attach(struct thread *td, struct prison *pr); 98 static void prison_complete(void *context, int pending); 99 static void prison_deref(struct prison *pr, int flags); 100 static char *prison_path(struct prison *pr1, struct prison *pr2); 101 static void prison_remove_one(struct prison *pr); 102 #ifdef INET 103 static int _prison_check_ip4(struct prison *pr, struct in_addr *ia); 104 static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); 105 #endif 106 #ifdef INET6 107 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); 108 static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); 109 #endif 110 111 /* Flags for prison_deref */ 112 #define PD_DEREF 0x01 113 #define PD_DEUREF 0x02 114 #define PD_LOCKED 0x04 115 #define PD_LIST_SLOCKED 0x08 116 #define PD_LIST_XLOCKED 0x10 117 118 /* 119 * Parameter names corresponding to PR_* flag values 120 */ 121 static char *pr_flag_names[] = { 122 [0] = "persist", 123 "host", 124 #ifdef INET 125 "ip4", 126 #endif 127 #ifdef INET6 128 [3] = "ip6", 129 #endif 130 }; 131 132 static char *pr_flag_nonames[] = { 133 [0] = "nopersist", 134 "nohost", 135 #ifdef INET 136 "noip4", 137 #endif 138 #ifdef INET6 139 [3] = "noip6", 140 #endif 141 }; 142 143 static char *pr_allow_names[] = { 144 "allow.set_hostname", 145 "allow.sysvipc", 146 "allow.raw_sockets", 147 "allow.chflags", 148 "allow.mount", 149 "allow.quotas", 150 "allow.jails", 151 "allow.socket_af", 152 }; 153 154 static char *pr_allow_nonames[] = { 155 "allow.noset_hostname", 156 "allow.nosysvipc", 157 "allow.noraw_sockets", 158 "allow.nochflags", 159 "allow.nomount", 160 "allow.noquotas", 161 "allow.nojails", 162 "allow.nosocket_af", 163 }; 164 165 #define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 166 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 167 static int jail_default_enforce_statfs = 2; 168 #if defined(INET) || defined(INET6) 169 static int jail_max_af_ips = 255; 170 #endif 171 172 #ifdef INET 173 static int 174 qcmp_v4(const void *ip1, const void *ip2) 175 { 176 in_addr_t iaa, iab; 177 178 /* 179 * We need to compare in HBO here to get the list sorted as expected 180 * by the result of the code. Sorting NBO addresses gives you 181 * interesting results. If you do not understand, do not try. 182 */ 183 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 184 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 185 186 /* 187 * Do not simply return the difference of the two numbers, the int is 188 * not wide enough. 189 */ 190 if (iaa > iab) 191 return (1); 192 else if (iaa < iab) 193 return (-1); 194 else 195 return (0); 196 } 197 #endif 198 199 #ifdef INET6 200 static int 201 qcmp_v6(const void *ip1, const void *ip2) 202 { 203 const struct in6_addr *ia6a, *ia6b; 204 int i, rc; 205 206 ia6a = (const struct in6_addr *)ip1; 207 ia6b = (const struct in6_addr *)ip2; 208 209 rc = 0; 210 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 211 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 212 rc = 1; 213 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 214 rc = -1; 215 } 216 return (rc); 217 } 218 #endif 219 220 /* 221 * struct jail_args { 222 * struct jail *jail; 223 * }; 224 */ 225 int 226 jail(struct thread *td, struct jail_args *uap) 227 { 228 uint32_t version; 229 int error; 230 struct jail j; 231 232 error = copyin(uap->jail, &version, sizeof(uint32_t)); 233 if (error) 234 return (error); 235 236 switch (version) { 237 case 0: 238 { 239 struct jail_v0 j0; 240 241 /* FreeBSD single IPv4 jails. */ 242 bzero(&j, sizeof(struct jail)); 243 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 244 if (error) 245 return (error); 246 j.version = j0.version; 247 j.path = j0.path; 248 j.hostname = j0.hostname; 249 j.ip4s = j0.ip_number; 250 break; 251 } 252 253 case 1: 254 /* 255 * Version 1 was used by multi-IPv4 jail implementations 256 * that never made it into the official kernel. 257 */ 258 return (EINVAL); 259 260 case 2: /* JAIL_API_VERSION */ 261 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 262 error = copyin(uap->jail, &j, sizeof(struct jail)); 263 if (error) 264 return (error); 265 break; 266 267 default: 268 /* Sci-Fi jails are not supported, sorry. */ 269 return (EINVAL); 270 } 271 return (kern_jail(td, &j)); 272 } 273 274 int 275 kern_jail(struct thread *td, struct jail *j) 276 { 277 struct iovec optiov[24]; 278 struct uio opt; 279 char *u_path, *u_hostname, *u_name; 280 #ifdef INET 281 int ip4s; 282 struct in_addr *u_ip4; 283 #endif 284 #ifdef INET6 285 struct in6_addr *u_ip6; 286 #endif 287 size_t tmplen; 288 int error, enforce_statfs, fi; 289 290 bzero(&optiov, sizeof(optiov)); 291 opt.uio_iov = optiov; 292 opt.uio_iovcnt = 0; 293 opt.uio_offset = -1; 294 opt.uio_resid = -1; 295 opt.uio_segflg = UIO_SYSSPACE; 296 opt.uio_rw = UIO_READ; 297 opt.uio_td = td; 298 299 /* Set permissions for top-level jails from sysctls. */ 300 if (!jailed(td->td_ucred)) { 301 for (fi = 0; fi < sizeof(pr_allow_names) / 302 sizeof(pr_allow_names[0]); fi++) { 303 optiov[opt.uio_iovcnt].iov_base = 304 (jail_default_allow & (1 << fi)) 305 ? pr_allow_names[fi] : pr_allow_nonames[fi]; 306 optiov[opt.uio_iovcnt].iov_len = 307 strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 308 opt.uio_iovcnt += 2; 309 } 310 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 311 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 312 opt.uio_iovcnt++; 313 enforce_statfs = jail_default_enforce_statfs; 314 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 315 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 316 opt.uio_iovcnt++; 317 } 318 319 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 320 #ifdef INET 321 ip4s = (j->version == 0) ? 1 : j->ip4s; 322 if (ip4s > jail_max_af_ips) 323 return (EINVAL); 324 tmplen += ip4s * sizeof(struct in_addr); 325 #else 326 if (j->ip4s > 0) 327 return (EINVAL); 328 #endif 329 #ifdef INET6 330 if (j->ip6s > jail_max_af_ips) 331 return (EINVAL); 332 tmplen += j->ip6s * sizeof(struct in6_addr); 333 #else 334 if (j->ip6s > 0) 335 return (EINVAL); 336 #endif 337 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 338 u_hostname = u_path + MAXPATHLEN; 339 u_name = u_hostname + MAXHOSTNAMELEN; 340 #ifdef INET 341 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 342 #endif 343 #ifdef INET6 344 #ifdef INET 345 u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 346 #else 347 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 348 #endif 349 #endif 350 optiov[opt.uio_iovcnt].iov_base = "path"; 351 optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 352 opt.uio_iovcnt++; 353 optiov[opt.uio_iovcnt].iov_base = u_path; 354 error = copyinstr(j->path, u_path, MAXPATHLEN, 355 &optiov[opt.uio_iovcnt].iov_len); 356 if (error) { 357 free(u_path, M_TEMP); 358 return (error); 359 } 360 opt.uio_iovcnt++; 361 optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 362 optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 363 opt.uio_iovcnt++; 364 optiov[opt.uio_iovcnt].iov_base = u_hostname; 365 error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 366 &optiov[opt.uio_iovcnt].iov_len); 367 if (error) { 368 free(u_path, M_TEMP); 369 return (error); 370 } 371 opt.uio_iovcnt++; 372 if (j->jailname != NULL) { 373 optiov[opt.uio_iovcnt].iov_base = "name"; 374 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 375 opt.uio_iovcnt++; 376 optiov[opt.uio_iovcnt].iov_base = u_name; 377 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 378 &optiov[opt.uio_iovcnt].iov_len); 379 if (error) { 380 free(u_path, M_TEMP); 381 return (error); 382 } 383 opt.uio_iovcnt++; 384 } 385 #ifdef INET 386 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 387 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 388 opt.uio_iovcnt++; 389 optiov[opt.uio_iovcnt].iov_base = u_ip4; 390 optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 391 if (j->version == 0) 392 u_ip4->s_addr = j->ip4s; 393 else { 394 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 395 if (error) { 396 free(u_path, M_TEMP); 397 return (error); 398 } 399 } 400 opt.uio_iovcnt++; 401 #endif 402 #ifdef INET6 403 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 404 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 405 opt.uio_iovcnt++; 406 optiov[opt.uio_iovcnt].iov_base = u_ip6; 407 optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 408 error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 409 if (error) { 410 free(u_path, M_TEMP); 411 return (error); 412 } 413 opt.uio_iovcnt++; 414 #endif 415 KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 416 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 417 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 418 free(u_path, M_TEMP); 419 return (error); 420 } 421 422 423 /* 424 * struct jail_set_args { 425 * struct iovec *iovp; 426 * unsigned int iovcnt; 427 * int flags; 428 * }; 429 */ 430 int 431 jail_set(struct thread *td, struct jail_set_args *uap) 432 { 433 struct uio *auio; 434 int error; 435 436 /* Check that we have an even number of iovecs. */ 437 if (uap->iovcnt & 1) 438 return (EINVAL); 439 440 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 441 if (error) 442 return (error); 443 error = kern_jail_set(td, auio, uap->flags); 444 free(auio, M_IOV); 445 return (error); 446 } 447 448 int 449 kern_jail_set(struct thread *td, struct uio *optuio, int flags) 450 { 451 struct nameidata nd; 452 #ifdef INET 453 struct in_addr *ip4; 454 #endif 455 #ifdef INET6 456 struct in6_addr *ip6; 457 #endif 458 struct vfsopt *opt; 459 struct vfsoptlist *opts; 460 struct prison *pr, *deadpr, *mypr, *ppr, *tpr; 461 struct vnode *root; 462 char *domain, *errmsg, *host, *name, *p, *path, *uuid; 463 #if defined(INET) || defined(INET6) 464 void *op; 465 #endif 466 unsigned long hid; 467 size_t namelen, onamelen; 468 int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 469 int gotenforce, gothid, gotslevel, fi, jid, len; 470 int slevel, vfslocked; 471 #if defined(INET) || defined(INET6) 472 int ii, ij; 473 #endif 474 #ifdef INET 475 int ip4s, ip4a, redo_ip4; 476 #endif 477 #ifdef INET6 478 int ip6s, ip6a, redo_ip6; 479 #endif 480 unsigned pr_flags, ch_flags; 481 unsigned pr_allow, ch_allow, tallow; 482 char numbuf[12]; 483 484 error = priv_check(td, PRIV_JAIL_SET); 485 if (!error && (flags & JAIL_ATTACH)) 486 error = priv_check(td, PRIV_JAIL_ATTACH); 487 if (error) 488 return (error); 489 mypr = ppr = td->td_ucred->cr_prison; 490 if ((flags & JAIL_CREATE) && !(mypr->pr_allow & PR_ALLOW_JAILS)) 491 return (EPERM); 492 if (flags & ~JAIL_SET_MASK) 493 return (EINVAL); 494 495 /* 496 * Check all the parameters before committing to anything. Not all 497 * errors can be caught early, but we may as well try. Also, this 498 * takes care of some expensive stuff (path lookup) before getting 499 * the allprison lock. 500 * 501 * XXX Jails are not filesystems, and jail parameters are not mount 502 * options. But it makes more sense to re-use the vfsopt code 503 * than duplicate it under a different name. 504 */ 505 error = vfs_buildopts(optuio, &opts); 506 if (error) 507 return (error); 508 #ifdef INET 509 ip4a = 0; 510 ip4 = NULL; 511 #endif 512 #ifdef INET6 513 ip6a = 0; 514 ip6 = NULL; 515 #endif 516 517 #if defined(INET) || defined(INET6) 518 again: 519 #endif 520 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 521 if (error == ENOENT) 522 jid = 0; 523 else if (error != 0) 524 goto done_free; 525 526 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 527 if (error == ENOENT) 528 gotslevel = 0; 529 else if (error != 0) 530 goto done_free; 531 else 532 gotslevel = 1; 533 534 error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 535 gotenforce = (error == 0); 536 if (gotenforce) { 537 if (enforce < 0 || enforce > 2) 538 return (EINVAL); 539 } else if (error != ENOENT) 540 goto done_free; 541 542 pr_flags = ch_flags = 0; 543 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 544 fi++) { 545 if (pr_flag_names[fi] == NULL) 546 continue; 547 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 548 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 549 } 550 ch_flags |= pr_flags; 551 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 552 && !(pr_flags & PR_PERSIST)) { 553 error = EINVAL; 554 vfs_opterror(opts, "new jail must persist or attach"); 555 goto done_errmsg; 556 } 557 558 pr_allow = ch_allow = 0; 559 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 560 fi++) { 561 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 562 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 563 } 564 ch_allow |= pr_allow; 565 566 error = vfs_getopt(opts, "name", (void **)&name, &len); 567 if (error == ENOENT) 568 name = NULL; 569 else if (error != 0) 570 goto done_free; 571 else { 572 if (len == 0 || name[len - 1] != '\0') { 573 error = EINVAL; 574 goto done_free; 575 } 576 if (len > MAXHOSTNAMELEN) { 577 error = ENAMETOOLONG; 578 goto done_free; 579 } 580 } 581 582 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 583 if (error == ENOENT) 584 host = NULL; 585 else if (error != 0) 586 goto done_free; 587 else { 588 ch_flags |= PR_HOST; 589 pr_flags |= PR_HOST; 590 if (len == 0 || host[len - 1] != '\0') { 591 error = EINVAL; 592 goto done_free; 593 } 594 if (len > MAXHOSTNAMELEN) { 595 error = ENAMETOOLONG; 596 goto done_free; 597 } 598 } 599 600 error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 601 if (error == ENOENT) 602 domain = NULL; 603 else if (error != 0) 604 goto done_free; 605 else { 606 ch_flags |= PR_HOST; 607 pr_flags |= PR_HOST; 608 if (len == 0 || domain[len - 1] != '\0') { 609 error = EINVAL; 610 goto done_free; 611 } 612 if (len > MAXHOSTNAMELEN) { 613 error = ENAMETOOLONG; 614 goto done_free; 615 } 616 } 617 618 error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 619 if (error == ENOENT) 620 uuid = NULL; 621 else if (error != 0) 622 goto done_free; 623 else { 624 ch_flags |= PR_HOST; 625 pr_flags |= PR_HOST; 626 if (len == 0 || uuid[len - 1] != '\0') { 627 error = EINVAL; 628 goto done_free; 629 } 630 if (len > HOSTUUIDLEN) { 631 error = ENAMETOOLONG; 632 goto done_free; 633 } 634 } 635 636 #ifdef COMPAT_IA32 637 if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 638 uint32_t hid32; 639 640 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 641 hid = hid32; 642 } else 643 #endif 644 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 645 if (error == ENOENT) 646 gothid = 0; 647 else if (error != 0) 648 goto done_free; 649 else { 650 gothid = 1; 651 ch_flags |= PR_HOST; 652 pr_flags |= PR_HOST; 653 } 654 655 /* This might be the second time around for this option. */ 656 #ifdef INET 657 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 658 if (error == ENOENT) 659 ip4s = -1; 660 else if (error != 0) 661 goto done_free; 662 else if (ip4s & (sizeof(*ip4) - 1)) { 663 error = EINVAL; 664 goto done_free; 665 } else { 666 ch_flags |= PR_IP4_USER; 667 pr_flags |= PR_IP4_USER; 668 if (ip4s > 0) { 669 ip4s /= sizeof(*ip4); 670 if (ip4s > jail_max_af_ips) { 671 error = EINVAL; 672 vfs_opterror(opts, "too many IPv4 addresses"); 673 goto done_errmsg; 674 } 675 if (ip4a < ip4s) { 676 ip4a = ip4s; 677 free(ip4, M_PRISON); 678 ip4 = NULL; 679 } 680 if (ip4 == NULL) 681 ip4 = malloc(ip4a * sizeof(*ip4), M_PRISON, 682 M_WAITOK); 683 bcopy(op, ip4, ip4s * sizeof(*ip4)); 684 /* 685 * IP addresses are all sorted but ip[0] to preserve 686 * the primary IP address as given from userland. 687 * This special IP is used for unbound outgoing 688 * connections as well for "loopback" traffic. 689 */ 690 if (ip4s > 1) 691 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 692 /* 693 * Check for duplicate addresses and do some simple 694 * zero and broadcast checks. If users give other bogus 695 * addresses it is their problem. 696 * 697 * We do not have to care about byte order for these 698 * checks so we will do them in NBO. 699 */ 700 for (ii = 0; ii < ip4s; ii++) { 701 if (ip4[ii].s_addr == INADDR_ANY || 702 ip4[ii].s_addr == INADDR_BROADCAST) { 703 error = EINVAL; 704 goto done_free; 705 } 706 if ((ii+1) < ip4s && 707 (ip4[0].s_addr == ip4[ii+1].s_addr || 708 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 709 error = EINVAL; 710 goto done_free; 711 } 712 } 713 } 714 } 715 #endif 716 717 #ifdef INET6 718 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 719 if (error == ENOENT) 720 ip6s = -1; 721 else if (error != 0) 722 goto done_free; 723 else if (ip6s & (sizeof(*ip6) - 1)) { 724 error = EINVAL; 725 goto done_free; 726 } else { 727 ch_flags |= PR_IP6_USER; 728 pr_flags |= PR_IP6_USER; 729 if (ip6s > 0) { 730 ip6s /= sizeof(*ip6); 731 if (ip6s > jail_max_af_ips) { 732 error = EINVAL; 733 vfs_opterror(opts, "too many IPv6 addresses"); 734 goto done_errmsg; 735 } 736 if (ip6a < ip6s) { 737 ip6a = ip6s; 738 free(ip6, M_PRISON); 739 ip6 = NULL; 740 } 741 if (ip6 == NULL) 742 ip6 = malloc(ip6a * sizeof(*ip6), M_PRISON, 743 M_WAITOK); 744 bcopy(op, ip6, ip6s * sizeof(*ip6)); 745 if (ip6s > 1) 746 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 747 for (ii = 0; ii < ip6s; ii++) { 748 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 749 error = EINVAL; 750 goto done_free; 751 } 752 if ((ii+1) < ip6s && 753 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 754 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 755 { 756 error = EINVAL; 757 goto done_free; 758 } 759 } 760 } 761 } 762 #endif 763 764 root = NULL; 765 error = vfs_getopt(opts, "path", (void **)&path, &len); 766 if (error == ENOENT) 767 path = NULL; 768 else if (error != 0) 769 goto done_free; 770 else { 771 if (flags & JAIL_UPDATE) { 772 error = EINVAL; 773 vfs_opterror(opts, 774 "path cannot be changed after creation"); 775 goto done_errmsg; 776 } 777 if (len == 0 || path[len - 1] != '\0') { 778 error = EINVAL; 779 goto done_free; 780 } 781 if (len < 2 || (len == 2 && path[0] == '/')) 782 path = NULL; 783 else { 784 /* Leave room for a real-root full pathname. */ 785 if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 786 ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 787 error = ENAMETOOLONG; 788 goto done_free; 789 } 790 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE, 791 path, td); 792 error = namei(&nd); 793 if (error) 794 goto done_free; 795 vfslocked = NDHASGIANT(&nd); 796 root = nd.ni_vp; 797 NDFREE(&nd, NDF_ONLY_PNBUF); 798 if (root->v_type != VDIR) { 799 error = ENOTDIR; 800 vrele(root); 801 VFS_UNLOCK_GIANT(vfslocked); 802 goto done_free; 803 } 804 VFS_UNLOCK_GIANT(vfslocked); 805 } 806 } 807 808 /* 809 * Grab the allprison lock before letting modules check their 810 * parameters. Once we have it, do not let go so we'll have a 811 * consistent view of the OSD list. 812 */ 813 sx_xlock(&allprison_lock); 814 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 815 if (error) 816 goto done_unlock_list; 817 818 /* By now, all parameters should have been noted. */ 819 TAILQ_FOREACH(opt, opts, link) { 820 if (!opt->seen && strcmp(opt->name, "errmsg")) { 821 error = EINVAL; 822 vfs_opterror(opts, "unknown parameter: %s", opt->name); 823 goto done_unlock_list; 824 } 825 } 826 827 /* 828 * See if we are creating a new record or updating an existing one. 829 * This abuses the file error codes ENOENT and EEXIST. 830 */ 831 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 832 if (!cuflags) { 833 error = EINVAL; 834 vfs_opterror(opts, "no valid operation (create or update)"); 835 goto done_unlock_list; 836 } 837 pr = NULL; 838 if (jid != 0) { 839 /* 840 * See if a requested jid already exists. There is an 841 * information leak here if the jid exists but is not within 842 * the caller's jail hierarchy. Jail creators will get EEXIST 843 * even though they cannot see the jail, and CREATE | UPDATE 844 * will return ENOENT which is not normally a valid error. 845 */ 846 if (jid < 0) { 847 error = EINVAL; 848 vfs_opterror(opts, "negative jid"); 849 goto done_unlock_list; 850 } 851 pr = prison_find(jid); 852 if (pr != NULL) { 853 ppr = pr->pr_parent; 854 /* Create: jid must not exist. */ 855 if (cuflags == JAIL_CREATE) { 856 mtx_unlock(&pr->pr_mtx); 857 error = EEXIST; 858 vfs_opterror(opts, "jail %d already exists", 859 jid); 860 goto done_unlock_list; 861 } 862 if (!prison_ischild(mypr, pr)) { 863 mtx_unlock(&pr->pr_mtx); 864 pr = NULL; 865 } else if (pr->pr_uref == 0) { 866 if (!(flags & JAIL_DYING)) { 867 mtx_unlock(&pr->pr_mtx); 868 error = ENOENT; 869 vfs_opterror(opts, "jail %d is dying", 870 jid); 871 goto done_unlock_list; 872 } else if ((flags & JAIL_ATTACH) || 873 (pr_flags & PR_PERSIST)) { 874 /* 875 * A dying jail might be resurrected 876 * (via attach or persist), but first 877 * it must determine if another jail 878 * has claimed its name. Accomplish 879 * this by implicitly re-setting the 880 * name. 881 */ 882 if (name == NULL) 883 name = prison_name(mypr, pr); 884 } 885 } 886 } 887 if (pr == NULL) { 888 /* Update: jid must exist. */ 889 if (cuflags == JAIL_UPDATE) { 890 error = ENOENT; 891 vfs_opterror(opts, "jail %d not found", jid); 892 goto done_unlock_list; 893 } 894 } 895 } 896 /* 897 * If the caller provided a name, look for a jail by that name. 898 * This has different semantics for creates and updates keyed by jid 899 * (where the name must not already exist in a different jail), 900 * and updates keyed by the name itself (where the name must exist 901 * because that is the jail being updated). 902 */ 903 if (name != NULL) { 904 p = strrchr(name, '.'); 905 if (p != NULL) { 906 /* 907 * This is a hierarchical name. Split it into the 908 * parent and child names, and make sure the parent 909 * exists or matches an already found jail. 910 */ 911 *p = '\0'; 912 if (pr != NULL) { 913 if (strncmp(name, ppr->pr_name, p - name) || 914 ppr->pr_name[p - name] != '\0') { 915 mtx_unlock(&pr->pr_mtx); 916 error = EINVAL; 917 vfs_opterror(opts, 918 "cannot change jail's parent"); 919 goto done_unlock_list; 920 } 921 } else { 922 ppr = prison_find_name(mypr, name); 923 if (ppr == NULL) { 924 error = ENOENT; 925 vfs_opterror(opts, 926 "jail \"%s\" not found", name); 927 goto done_unlock_list; 928 } 929 mtx_unlock(&ppr->pr_mtx); 930 } 931 name = p + 1; 932 } 933 if (name[0] != '\0') { 934 namelen = 935 (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 936 name_again: 937 deadpr = NULL; 938 FOREACH_PRISON_CHILD(ppr, tpr) { 939 if (tpr != pr && tpr->pr_ref > 0 && 940 !strcmp(tpr->pr_name + namelen, name)) { 941 if (pr == NULL && 942 cuflags != JAIL_CREATE) { 943 mtx_lock(&tpr->pr_mtx); 944 if (tpr->pr_ref > 0) { 945 /* 946 * Use this jail 947 * for updates. 948 */ 949 if (tpr->pr_uref > 0) { 950 pr = tpr; 951 break; 952 } 953 deadpr = tpr; 954 } 955 mtx_unlock(&tpr->pr_mtx); 956 } else if (tpr->pr_uref > 0) { 957 /* 958 * Create, or update(jid): 959 * name must not exist in an 960 * active sibling jail. 961 */ 962 error = EEXIST; 963 if (pr != NULL) 964 mtx_unlock(&pr->pr_mtx); 965 vfs_opterror(opts, 966 "jail \"%s\" already exists", 967 name); 968 goto done_unlock_list; 969 } 970 } 971 } 972 /* If no active jail is found, use a dying one. */ 973 if (deadpr != NULL && pr == NULL) { 974 if (flags & JAIL_DYING) { 975 mtx_lock(&deadpr->pr_mtx); 976 if (deadpr->pr_ref == 0) { 977 mtx_unlock(&deadpr->pr_mtx); 978 goto name_again; 979 } 980 pr = deadpr; 981 } else if (cuflags == JAIL_UPDATE) { 982 error = ENOENT; 983 vfs_opterror(opts, 984 "jail \"%s\" is dying", name); 985 goto done_unlock_list; 986 } 987 } 988 /* Update: name must exist if no jid. */ 989 else if (cuflags == JAIL_UPDATE && pr == NULL) { 990 error = ENOENT; 991 vfs_opterror(opts, "jail \"%s\" not found", 992 name); 993 goto done_unlock_list; 994 } 995 } 996 } 997 /* Update: must provide a jid or name. */ 998 else if (cuflags == JAIL_UPDATE && pr == NULL) { 999 error = ENOENT; 1000 vfs_opterror(opts, "update specified no jail"); 1001 goto done_unlock_list; 1002 } 1003 1004 /* If there's no prison to update, create a new one and link it in. */ 1005 if (pr == NULL) { 1006 created = 1; 1007 mtx_lock(&ppr->pr_mtx); 1008 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1009 mtx_unlock(&ppr->pr_mtx); 1010 error = ENOENT; 1011 vfs_opterror(opts, "parent jail went away!"); 1012 goto done_unlock_list; 1013 } 1014 ppr->pr_ref++; 1015 ppr->pr_uref++; 1016 mtx_unlock(&ppr->pr_mtx); 1017 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1018 if (jid == 0) { 1019 /* Find the next free jid. */ 1020 jid = lastprid + 1; 1021 findnext: 1022 if (jid == JAIL_MAX) 1023 jid = 1; 1024 TAILQ_FOREACH(tpr, &allprison, pr_list) { 1025 if (tpr->pr_id < jid) 1026 continue; 1027 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1028 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1029 break; 1030 } 1031 if (jid == lastprid) { 1032 error = EAGAIN; 1033 vfs_opterror(opts, 1034 "no available jail IDs"); 1035 free(pr, M_PRISON); 1036 prison_deref(ppr, PD_DEREF | 1037 PD_DEUREF | PD_LIST_XLOCKED); 1038 goto done_releroot; 1039 } 1040 jid++; 1041 goto findnext; 1042 } 1043 lastprid = jid; 1044 } else { 1045 /* 1046 * The jail already has a jid (that did not yet exist), 1047 * so just find where to insert it. 1048 */ 1049 TAILQ_FOREACH(tpr, &allprison, pr_list) 1050 if (tpr->pr_id >= jid) { 1051 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1052 break; 1053 } 1054 } 1055 if (tpr == NULL) 1056 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1057 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1058 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1059 tpr->pr_prisoncount++; 1060 1061 pr->pr_parent = ppr; 1062 pr->pr_id = jid; 1063 1064 /* Set some default values, and inherit some from the parent. */ 1065 if (name == NULL) 1066 name = ""; 1067 if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1068 if (host == NULL) 1069 host = ppr->pr_host; 1070 if (domain == NULL) 1071 domain = ppr->pr_domain; 1072 if (uuid == NULL) 1073 uuid = ppr->pr_uuid; 1074 if (!gothid) 1075 hid = ppr->pr_hostid; 1076 } 1077 if (path == NULL) { 1078 path = "/"; 1079 root = mypr->pr_root; 1080 vref(root); 1081 } 1082 #ifdef INET 1083 pr->pr_flags |= ppr->pr_flags & PR_IP4; 1084 pr->pr_ip4s = ppr->pr_ip4s; 1085 if (ppr->pr_ip4 != NULL) { 1086 pr->pr_ip4 = malloc(pr->pr_ip4s * 1087 sizeof(struct in_addr), M_PRISON, M_WAITOK); 1088 bcopy(ppr->pr_ip4, pr->pr_ip4, 1089 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1090 } 1091 #endif 1092 #ifdef INET6 1093 pr->pr_flags |= ppr->pr_flags & PR_IP6; 1094 pr->pr_ip6s = ppr->pr_ip6s; 1095 if (ppr->pr_ip6 != NULL) { 1096 pr->pr_ip6 = malloc(pr->pr_ip6s * 1097 sizeof(struct in6_addr), M_PRISON, M_WAITOK); 1098 bcopy(ppr->pr_ip6, pr->pr_ip6, 1099 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1100 } 1101 #endif 1102 pr->pr_securelevel = ppr->pr_securelevel; 1103 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1104 pr->pr_enforce_statfs = ppr->pr_enforce_statfs; 1105 1106 LIST_INIT(&pr->pr_children); 1107 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1108 1109 /* 1110 * Allocate a dedicated cpuset for each jail. 1111 * Unlike other initial settings, this may return an erorr. 1112 */ 1113 error = cpuset_create_root(ppr, &pr->pr_cpuset); 1114 if (error) { 1115 prison_deref(pr, PD_LIST_XLOCKED); 1116 goto done_releroot; 1117 } 1118 1119 mtx_lock(&pr->pr_mtx); 1120 /* 1121 * New prisons do not yet have a reference, because we do not 1122 * want other to see the incomplete prison once the 1123 * allprison_lock is downgraded. 1124 */ 1125 } else { 1126 created = 0; 1127 /* 1128 * Grab a reference for existing prisons, to ensure they 1129 * continue to exist for the duration of the call. 1130 */ 1131 pr->pr_ref++; 1132 } 1133 1134 /* Do final error checking before setting anything. */ 1135 if (gotslevel) { 1136 if (slevel < ppr->pr_securelevel) { 1137 error = EPERM; 1138 goto done_deref_locked; 1139 } 1140 } 1141 if (gotenforce) { 1142 if (enforce < ppr->pr_enforce_statfs) { 1143 error = EPERM; 1144 goto done_deref_locked; 1145 } 1146 } 1147 #ifdef INET 1148 if (ch_flags & PR_IP4_USER) { 1149 if (ppr->pr_flags & PR_IP4) { 1150 if (!(pr_flags & PR_IP4_USER)) { 1151 /* 1152 * Silently ignore attempts to make the IP 1153 * addresses unrestricted when the parent is 1154 * restricted; in other words, interpret 1155 * "unrestricted" as "as unrestricted as 1156 * possible". 1157 */ 1158 ip4s = ppr->pr_ip4s; 1159 if (ip4s == 0) { 1160 free(ip4, M_PRISON); 1161 ip4 = NULL; 1162 } else if (ip4s <= ip4a) { 1163 /* Inherit the parent's address(es). */ 1164 bcopy(ppr->pr_ip4, ip4, 1165 ip4s * sizeof(*ip4)); 1166 } else { 1167 /* 1168 * There's no room for the parent's 1169 * address list. Allocate some more. 1170 */ 1171 ip4a = ip4s; 1172 free(ip4, M_PRISON); 1173 ip4 = malloc(ip4a * sizeof(*ip4), 1174 M_PRISON, M_NOWAIT); 1175 if (ip4 != NULL) 1176 bcopy(ppr->pr_ip4, ip4, 1177 ip4s * sizeof(*ip4)); 1178 else { 1179 /* Allocation failed without 1180 * sleeping. Unlocking the 1181 * prison now will invalidate 1182 * some checks and prematurely 1183 * show an unfinished new jail. 1184 * So let go of everything and 1185 * start over. 1186 */ 1187 prison_deref(pr, created 1188 ? PD_LOCKED | 1189 PD_LIST_XLOCKED 1190 : PD_DEREF | PD_LOCKED | 1191 PD_LIST_XLOCKED); 1192 if (root != NULL) { 1193 vfslocked = 1194 VFS_LOCK_GIANT( 1195 root->v_mount); 1196 vrele(root); 1197 VFS_UNLOCK_GIANT( 1198 vfslocked); 1199 } 1200 ip4 = malloc(ip4a * 1201 sizeof(*ip4), M_PRISON, 1202 M_WAITOK); 1203 goto again; 1204 } 1205 } 1206 } else if (ip4s > 0) { 1207 /* 1208 * Make sure the new set of IP addresses is a 1209 * subset of the parent's list. Don't worry 1210 * about the parent being unlocked, as any 1211 * setting is done with allprison_lock held. 1212 */ 1213 for (ij = 0; ij < ppr->pr_ip4s; ij++) 1214 if (ip4[0].s_addr == 1215 ppr->pr_ip4[ij].s_addr) 1216 break; 1217 if (ij == ppr->pr_ip4s) { 1218 error = EPERM; 1219 goto done_deref_locked; 1220 } 1221 if (ip4s > 1) { 1222 for (ii = ij = 1; ii < ip4s; ii++) { 1223 if (ip4[ii].s_addr == 1224 ppr->pr_ip4[0].s_addr) 1225 continue; 1226 for (; ij < ppr->pr_ip4s; ij++) 1227 if (ip4[ii].s_addr == 1228 ppr->pr_ip4[ij].s_addr) 1229 break; 1230 if (ij == ppr->pr_ip4s) 1231 break; 1232 } 1233 if (ij == ppr->pr_ip4s) { 1234 error = EPERM; 1235 goto done_deref_locked; 1236 } 1237 } 1238 } 1239 } 1240 if (ip4s > 0) { 1241 /* 1242 * Check for conflicting IP addresses. We permit them 1243 * if there is no more than one IP on each jail. If 1244 * there is a duplicate on a jail with more than one 1245 * IP stop checking and return error. 1246 */ 1247 FOREACH_PRISON_DESCENDANT(&prison0, tpr, descend) { 1248 if (tpr == pr || tpr->pr_uref == 0) { 1249 descend = 0; 1250 continue; 1251 } 1252 if (!(tpr->pr_flags & PR_IP4_USER)) 1253 continue; 1254 descend = 0; 1255 if (tpr->pr_ip4 == NULL || 1256 (ip4s == 1 && tpr->pr_ip4s == 1)) 1257 continue; 1258 for (ii = 0; ii < ip4s; ii++) { 1259 if (_prison_check_ip4(tpr, 1260 &ip4[ii]) == 0) { 1261 error = EADDRINUSE; 1262 vfs_opterror(opts, 1263 "IPv4 addresses clash"); 1264 goto done_deref_locked; 1265 } 1266 } 1267 } 1268 } 1269 } 1270 #endif 1271 #ifdef INET6 1272 if (ch_flags & PR_IP6_USER) { 1273 if (ppr->pr_flags & PR_IP6) { 1274 if (!(pr_flags & PR_IP6_USER)) { 1275 /* 1276 * Silently ignore attempts to make the IP 1277 * addresses unrestricted when the parent is 1278 * restricted. 1279 */ 1280 ip6s = ppr->pr_ip6s; 1281 if (ip6s == 0) { 1282 free(ip6, M_PRISON); 1283 ip6 = NULL; 1284 } else if (ip6s <= ip6a) { 1285 /* Inherit the parent's address(es). */ 1286 bcopy(ppr->pr_ip6, ip6, 1287 ip6s * sizeof(*ip6)); 1288 } else { 1289 /* 1290 * There's no room for the parent's 1291 * address list. 1292 */ 1293 ip6a = ip6s; 1294 free(ip6, M_PRISON); 1295 ip6 = malloc(ip6a * sizeof(*ip6), 1296 M_PRISON, M_NOWAIT); 1297 if (ip6 != NULL) 1298 bcopy(ppr->pr_ip6, ip6, 1299 ip6s * sizeof(*ip6)); 1300 else { 1301 prison_deref(pr, created 1302 ? PD_LOCKED | 1303 PD_LIST_XLOCKED 1304 : PD_DEREF | PD_LOCKED | 1305 PD_LIST_XLOCKED); 1306 if (root != NULL) { 1307 vfslocked = 1308 VFS_LOCK_GIANT( 1309 root->v_mount); 1310 vrele(root); 1311 VFS_UNLOCK_GIANT( 1312 vfslocked); 1313 } 1314 ip6 = malloc(ip6a * 1315 sizeof(*ip6), M_PRISON, 1316 M_WAITOK); 1317 goto again; 1318 } 1319 } 1320 } else if (ip6s > 0) { 1321 /* 1322 * Make sure the new set of IP addresses is a 1323 * subset of the parent's list. 1324 */ 1325 for (ij = 0; ij < ppr->pr_ip6s; ij++) 1326 if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1327 &ppr->pr_ip6[ij])) 1328 break; 1329 if (ij == ppr->pr_ip6s) { 1330 error = EPERM; 1331 goto done_deref_locked; 1332 } 1333 if (ip6s > 1) { 1334 for (ii = ij = 1; ii < ip6s; ii++) { 1335 if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1336 &ppr->pr_ip6[0])) 1337 continue; 1338 for (; ij < ppr->pr_ip6s; ij++) 1339 if (IN6_ARE_ADDR_EQUAL( 1340 &ip6[ii], 1341 &ppr->pr_ip6[ij])) 1342 break; 1343 if (ij == ppr->pr_ip6s) 1344 break; 1345 } 1346 if (ij == ppr->pr_ip6s) { 1347 error = EPERM; 1348 goto done_deref_locked; 1349 } 1350 } 1351 } 1352 } 1353 if (ip6s > 0) { 1354 /* Check for conflicting IP addresses. */ 1355 FOREACH_PRISON_DESCENDANT(&prison0, tpr, descend) { 1356 if (tpr == pr || tpr->pr_uref == 0) { 1357 descend = 0; 1358 continue; 1359 } 1360 if (!(tpr->pr_flags & PR_IP6_USER)) 1361 continue; 1362 descend = 0; 1363 if (tpr->pr_ip6 == NULL || 1364 (ip6s == 1 && tpr->pr_ip6s == 1)) 1365 continue; 1366 for (ii = 0; ii < ip6s; ii++) { 1367 if (_prison_check_ip6(tpr, 1368 &ip6[ii]) == 0) { 1369 error = EADDRINUSE; 1370 vfs_opterror(opts, 1371 "IPv6 addresses clash"); 1372 goto done_deref_locked; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 #endif 1379 onamelen = namelen = 0; 1380 if (name != NULL) { 1381 /* Give a default name of the jid. */ 1382 if (name[0] == '\0') 1383 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1384 else if (strtoul(name, &p, 10) != jid && *p == '\0') { 1385 error = EINVAL; 1386 vfs_opterror(opts, "name cannot be numeric"); 1387 goto done_deref_locked; 1388 } 1389 /* 1390 * Make sure the name isn't too long for the prison or its 1391 * children. 1392 */ 1393 onamelen = strlen(pr->pr_name); 1394 namelen = strlen(name); 1395 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1396 error = ENAMETOOLONG; 1397 goto done_deref_locked; 1398 } 1399 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1400 if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1401 sizeof(pr->pr_name)) { 1402 error = ENAMETOOLONG; 1403 goto done_deref_locked; 1404 } 1405 } 1406 } 1407 if (pr_allow & ~ppr->pr_allow) { 1408 error = EPERM; 1409 goto done_deref_locked; 1410 } 1411 1412 /* Set the parameters of the prison. */ 1413 #ifdef INET 1414 redo_ip4 = 0; 1415 if (ch_flags & PR_IP4_USER) { 1416 if (pr_flags & PR_IP4_USER) { 1417 /* Some restriction set. */ 1418 pr->pr_flags |= PR_IP4; 1419 if (ip4s >= 0) { 1420 free(pr->pr_ip4, M_PRISON); 1421 pr->pr_ip4s = ip4s; 1422 pr->pr_ip4 = ip4; 1423 ip4 = NULL; 1424 } 1425 } else if (ppr->pr_flags & PR_IP4) { 1426 /* This restriction cleared, but keep inherited. */ 1427 free(pr->pr_ip4, M_PRISON); 1428 pr->pr_ip4s = ip4s; 1429 pr->pr_ip4 = ip4; 1430 ip4 = NULL; 1431 } else { 1432 /* Restriction cleared, now unrestricted. */ 1433 pr->pr_flags &= ~PR_IP4; 1434 free(pr->pr_ip4, M_PRISON); 1435 pr->pr_ip4s = 0; 1436 } 1437 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1438 if (prison_restrict_ip4(tpr, NULL)) { 1439 redo_ip4 = 1; 1440 descend = 0; 1441 } 1442 } 1443 } 1444 #endif 1445 #ifdef INET6 1446 redo_ip6 = 0; 1447 if (ch_flags & PR_IP6_USER) { 1448 if (pr_flags & PR_IP6_USER) { 1449 /* Some restriction set. */ 1450 pr->pr_flags |= PR_IP6; 1451 if (ip6s >= 0) { 1452 free(pr->pr_ip6, M_PRISON); 1453 pr->pr_ip6s = ip6s; 1454 pr->pr_ip6 = ip6; 1455 ip6 = NULL; 1456 } 1457 } else if (ppr->pr_flags & PR_IP6) { 1458 /* This restriction cleared, but keep inherited. */ 1459 free(pr->pr_ip6, M_PRISON); 1460 pr->pr_ip6s = ip6s; 1461 pr->pr_ip6 = ip6; 1462 ip6 = NULL; 1463 } else { 1464 /* Restriction cleared, now unrestricted. */ 1465 pr->pr_flags &= ~PR_IP6; 1466 free(pr->pr_ip6, M_PRISON); 1467 pr->pr_ip6s = 0; 1468 } 1469 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1470 if (prison_restrict_ip6(tpr, NULL)) { 1471 redo_ip6 = 1; 1472 descend = 0; 1473 } 1474 } 1475 } 1476 #endif 1477 if (gotslevel) { 1478 pr->pr_securelevel = slevel; 1479 /* Set all child jails to be at least this level. */ 1480 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1481 if (tpr->pr_securelevel < slevel) 1482 tpr->pr_securelevel = slevel; 1483 } 1484 if (gotenforce) { 1485 pr->pr_enforce_statfs = enforce; 1486 /* Pass this restriction on to the children. */ 1487 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1488 if (tpr->pr_enforce_statfs < enforce) 1489 tpr->pr_enforce_statfs = enforce; 1490 } 1491 if (name != NULL) { 1492 if (ppr == &prison0) 1493 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1494 else 1495 snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1496 ppr->pr_name, name); 1497 /* Change this component of child names. */ 1498 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1499 bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1500 strlen(tpr->pr_name + onamelen) + 1); 1501 bcopy(pr->pr_name, tpr->pr_name, namelen); 1502 } 1503 } 1504 if (path != NULL) { 1505 /* Try to keep a real-rooted full pathname. */ 1506 if (path[0] == '/' && strcmp(mypr->pr_path, "/")) 1507 snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1508 mypr->pr_path, path); 1509 else 1510 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1511 pr->pr_root = root; 1512 } 1513 if (PR_HOST & ch_flags & ~pr_flags) { 1514 if (pr->pr_flags & PR_HOST) { 1515 /* 1516 * Copy the parent's host info. As with pr_ip4 above, 1517 * the lack of a lock on the parent is not a problem; 1518 * it is always set with allprison_lock at least 1519 * shared, and is held exclusively here. 1520 */ 1521 strlcpy(pr->pr_host, pr->pr_parent->pr_host, 1522 sizeof(pr->pr_host)); 1523 strlcpy(pr->pr_domain, pr->pr_parent->pr_domain, 1524 sizeof(pr->pr_domain)); 1525 strlcpy(pr->pr_uuid, pr->pr_parent->pr_uuid, 1526 sizeof(pr->pr_uuid)); 1527 pr->pr_hostid = pr->pr_parent->pr_hostid; 1528 } 1529 } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1530 /* Set this prison, and any descendants without PR_HOST. */ 1531 if (host != NULL) 1532 strlcpy(pr->pr_host, host, sizeof(pr->pr_host)); 1533 if (domain != NULL) 1534 strlcpy(pr->pr_domain, domain, sizeof(pr->pr_domain)); 1535 if (uuid != NULL) 1536 strlcpy(pr->pr_uuid, uuid, sizeof(pr->pr_uuid)); 1537 if (gothid) 1538 pr->pr_hostid = hid; 1539 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1540 if (tpr->pr_flags & PR_HOST) 1541 descend = 0; 1542 else { 1543 if (host != NULL) 1544 strlcpy(tpr->pr_host, pr->pr_host, 1545 sizeof(tpr->pr_host)); 1546 if (domain != NULL) 1547 strlcpy(tpr->pr_domain, pr->pr_domain, 1548 sizeof(tpr->pr_domain)); 1549 if (uuid != NULL) 1550 strlcpy(tpr->pr_uuid, pr->pr_uuid, 1551 sizeof(tpr->pr_uuid)); 1552 if (gothid) 1553 tpr->pr_hostid = hid; 1554 } 1555 } 1556 } 1557 if ((tallow = ch_allow & ~pr_allow)) { 1558 /* Clear allow bits in all children. */ 1559 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1560 tpr->pr_allow &= ~tallow; 1561 } 1562 pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1563 /* 1564 * Persistent prisons get an extra reference, and prisons losing their 1565 * persist flag lose that reference. Only do this for existing prisons 1566 * for now, so new ones will remain unseen until after the module 1567 * handlers have completed. 1568 */ 1569 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1570 if (pr_flags & PR_PERSIST) { 1571 pr->pr_ref++; 1572 pr->pr_uref++; 1573 } else { 1574 pr->pr_ref--; 1575 pr->pr_uref--; 1576 } 1577 } 1578 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1579 mtx_unlock(&pr->pr_mtx); 1580 1581 /* Locks may have prevented a complete restriction of child IP 1582 * addresses. If so, allocate some more memory and try again. 1583 */ 1584 #ifdef INET 1585 while (redo_ip4) { 1586 ip4s = pr->pr_ip4s; 1587 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1588 mtx_lock(&pr->pr_mtx); 1589 redo_ip4 = 0; 1590 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1591 if (prison_restrict_ip4(tpr, ip4)) { 1592 if (ip4 != NULL) 1593 ip4 = NULL; 1594 else 1595 redo_ip4 = 1; 1596 } 1597 } 1598 mtx_unlock(&pr->pr_mtx); 1599 } 1600 #endif 1601 #ifdef INET6 1602 while (redo_ip6) { 1603 ip6s = pr->pr_ip6s; 1604 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1605 mtx_lock(&pr->pr_mtx); 1606 redo_ip6 = 0; 1607 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1608 if (prison_restrict_ip6(tpr, ip6)) { 1609 if (ip6 != NULL) 1610 ip6 = NULL; 1611 else 1612 redo_ip6 = 1; 1613 } 1614 } 1615 mtx_unlock(&pr->pr_mtx); 1616 } 1617 #endif 1618 1619 /* Let the modules do their work. */ 1620 sx_downgrade(&allprison_lock); 1621 if (created) { 1622 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1623 if (error) { 1624 prison_deref(pr, PD_LIST_SLOCKED); 1625 goto done_errmsg; 1626 } 1627 } 1628 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1629 if (error) { 1630 prison_deref(pr, created 1631 ? PD_LIST_SLOCKED 1632 : PD_DEREF | PD_LIST_SLOCKED); 1633 goto done_errmsg; 1634 } 1635 1636 /* Attach this process to the prison if requested. */ 1637 if (flags & JAIL_ATTACH) { 1638 mtx_lock(&pr->pr_mtx); 1639 error = do_jail_attach(td, pr); 1640 if (error) { 1641 vfs_opterror(opts, "attach failed"); 1642 if (!created) 1643 prison_deref(pr, PD_DEREF); 1644 goto done_errmsg; 1645 } 1646 } 1647 1648 /* 1649 * Now that it is all there, drop the temporary reference from existing 1650 * prisons. Or add a reference to newly created persistent prisons 1651 * (which was not done earlier so that the prison would not be publicly 1652 * visible). 1653 */ 1654 if (!created) { 1655 prison_deref(pr, (flags & JAIL_ATTACH) 1656 ? PD_DEREF 1657 : PD_DEREF | PD_LIST_SLOCKED); 1658 } else { 1659 if (pr_flags & PR_PERSIST) { 1660 mtx_lock(&pr->pr_mtx); 1661 pr->pr_ref++; 1662 pr->pr_uref++; 1663 mtx_unlock(&pr->pr_mtx); 1664 } 1665 if (!(flags & JAIL_ATTACH)) 1666 sx_sunlock(&allprison_lock); 1667 } 1668 td->td_retval[0] = pr->pr_id; 1669 goto done_errmsg; 1670 1671 done_deref_locked: 1672 prison_deref(pr, created 1673 ? PD_LOCKED | PD_LIST_XLOCKED 1674 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1675 goto done_releroot; 1676 done_unlock_list: 1677 sx_xunlock(&allprison_lock); 1678 done_releroot: 1679 if (root != NULL) { 1680 vfslocked = VFS_LOCK_GIANT(root->v_mount); 1681 vrele(root); 1682 VFS_UNLOCK_GIANT(vfslocked); 1683 } 1684 done_errmsg: 1685 if (error) { 1686 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1687 if (errmsg_len > 0) { 1688 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1689 if (errmsg_pos > 0) { 1690 if (optuio->uio_segflg == UIO_SYSSPACE) 1691 bcopy(errmsg, 1692 optuio->uio_iov[errmsg_pos].iov_base, 1693 errmsg_len); 1694 else 1695 copyout(errmsg, 1696 optuio->uio_iov[errmsg_pos].iov_base, 1697 errmsg_len); 1698 } 1699 } 1700 } 1701 done_free: 1702 #ifdef INET 1703 free(ip4, M_PRISON); 1704 #endif 1705 #ifdef INET6 1706 free(ip6, M_PRISON); 1707 #endif 1708 vfs_freeopts(opts); 1709 return (error); 1710 } 1711 1712 1713 /* 1714 * struct jail_get_args { 1715 * struct iovec *iovp; 1716 * unsigned int iovcnt; 1717 * int flags; 1718 * }; 1719 */ 1720 int 1721 jail_get(struct thread *td, struct jail_get_args *uap) 1722 { 1723 struct uio *auio; 1724 int error; 1725 1726 /* Check that we have an even number of iovecs. */ 1727 if (uap->iovcnt & 1) 1728 return (EINVAL); 1729 1730 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1731 if (error) 1732 return (error); 1733 error = kern_jail_get(td, auio, uap->flags); 1734 if (error == 0) 1735 error = copyout(auio->uio_iov, uap->iovp, 1736 uap->iovcnt * sizeof (struct iovec)); 1737 free(auio, M_IOV); 1738 return (error); 1739 } 1740 1741 int 1742 kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1743 { 1744 struct prison *pr, *mypr; 1745 struct vfsopt *opt; 1746 struct vfsoptlist *opts; 1747 char *errmsg, *name; 1748 int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1749 1750 if (flags & ~JAIL_GET_MASK) 1751 return (EINVAL); 1752 1753 /* Get the parameter list. */ 1754 error = vfs_buildopts(optuio, &opts); 1755 if (error) 1756 return (error); 1757 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1758 mypr = td->td_ucred->cr_prison; 1759 1760 /* 1761 * Find the prison specified by one of: lastjid, jid, name. 1762 */ 1763 sx_slock(&allprison_lock); 1764 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1765 if (error == 0) { 1766 TAILQ_FOREACH(pr, &allprison, pr_list) { 1767 if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 1768 mtx_lock(&pr->pr_mtx); 1769 if (pr->pr_ref > 0 && 1770 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1771 break; 1772 mtx_unlock(&pr->pr_mtx); 1773 } 1774 } 1775 if (pr != NULL) 1776 goto found_prison; 1777 error = ENOENT; 1778 vfs_opterror(opts, "no jail after %d", jid); 1779 goto done_unlock_list; 1780 } else if (error != ENOENT) 1781 goto done_unlock_list; 1782 1783 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 1784 if (error == 0) { 1785 if (jid != 0) { 1786 pr = prison_find_child(mypr, jid); 1787 if (pr != NULL) { 1788 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1789 mtx_unlock(&pr->pr_mtx); 1790 error = ENOENT; 1791 vfs_opterror(opts, "jail %d is dying", 1792 jid); 1793 goto done_unlock_list; 1794 } 1795 goto found_prison; 1796 } 1797 error = ENOENT; 1798 vfs_opterror(opts, "jail %d not found", jid); 1799 goto done_unlock_list; 1800 } 1801 } else if (error != ENOENT) 1802 goto done_unlock_list; 1803 1804 error = vfs_getopt(opts, "name", (void **)&name, &len); 1805 if (error == 0) { 1806 if (len == 0 || name[len - 1] != '\0') { 1807 error = EINVAL; 1808 goto done_unlock_list; 1809 } 1810 pr = prison_find_name(mypr, name); 1811 if (pr != NULL) { 1812 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1813 mtx_unlock(&pr->pr_mtx); 1814 error = ENOENT; 1815 vfs_opterror(opts, "jail \"%s\" is dying", 1816 name); 1817 goto done_unlock_list; 1818 } 1819 goto found_prison; 1820 } 1821 error = ENOENT; 1822 vfs_opterror(opts, "jail \"%s\" not found", name); 1823 goto done_unlock_list; 1824 } else if (error != ENOENT) 1825 goto done_unlock_list; 1826 1827 vfs_opterror(opts, "no jail specified"); 1828 error = ENOENT; 1829 goto done_unlock_list; 1830 1831 found_prison: 1832 /* Get the parameters of the prison. */ 1833 pr->pr_ref++; 1834 locked = PD_LOCKED; 1835 td->td_retval[0] = pr->pr_id; 1836 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 1837 if (error != 0 && error != ENOENT) 1838 goto done_deref; 1839 i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 1840 error = vfs_setopt(opts, "parent", &i, sizeof(i)); 1841 if (error != 0 && error != ENOENT) 1842 goto done_deref; 1843 error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 1844 if (error != 0 && error != ENOENT) 1845 goto done_deref; 1846 error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 1847 sizeof(pr->pr_cpuset->cs_id)); 1848 if (error != 0 && error != ENOENT) 1849 goto done_deref; 1850 error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 1851 if (error != 0 && error != ENOENT) 1852 goto done_deref; 1853 #ifdef INET 1854 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 1855 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1856 if (error != 0 && error != ENOENT) 1857 goto done_deref; 1858 #endif 1859 #ifdef INET6 1860 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 1861 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1862 if (error != 0 && error != ENOENT) 1863 goto done_deref; 1864 #endif 1865 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 1866 sizeof(pr->pr_securelevel)); 1867 if (error != 0 && error != ENOENT) 1868 goto done_deref; 1869 error = vfs_setopts(opts, "host.hostname", pr->pr_host); 1870 if (error != 0 && error != ENOENT) 1871 goto done_deref; 1872 error = vfs_setopts(opts, "host.domainname", pr->pr_domain); 1873 if (error != 0 && error != ENOENT) 1874 goto done_deref; 1875 error = vfs_setopts(opts, "host.hostuuid", pr->pr_uuid); 1876 if (error != 0 && error != ENOENT) 1877 goto done_deref; 1878 #ifdef COMPAT_IA32 1879 if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 1880 uint32_t hid32 = pr->pr_hostid; 1881 1882 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 1883 } else 1884 #endif 1885 error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 1886 sizeof(pr->pr_hostid)); 1887 if (error != 0 && error != ENOENT) 1888 goto done_deref; 1889 error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 1890 sizeof(pr->pr_enforce_statfs)); 1891 if (error != 0 && error != ENOENT) 1892 goto done_deref; 1893 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 1894 fi++) { 1895 if (pr_flag_names[fi] == NULL) 1896 continue; 1897 i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 1898 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 1899 if (error != 0 && error != ENOENT) 1900 goto done_deref; 1901 i = !i; 1902 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 1903 if (error != 0 && error != ENOENT) 1904 goto done_deref; 1905 } 1906 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 1907 fi++) { 1908 if (pr_allow_names[fi] == NULL) 1909 continue; 1910 i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 1911 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 1912 if (error != 0 && error != ENOENT) 1913 goto done_deref; 1914 i = !i; 1915 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 1916 if (error != 0 && error != ENOENT) 1917 goto done_deref; 1918 } 1919 i = (pr->pr_uref == 0); 1920 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 1921 if (error != 0 && error != ENOENT) 1922 goto done_deref; 1923 i = !i; 1924 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 1925 if (error != 0 && error != ENOENT) 1926 goto done_deref; 1927 1928 /* Get the module parameters. */ 1929 mtx_unlock(&pr->pr_mtx); 1930 locked = 0; 1931 error = osd_jail_call(pr, PR_METHOD_GET, opts); 1932 if (error) 1933 goto done_deref; 1934 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 1935 1936 /* By now, all parameters should have been noted. */ 1937 TAILQ_FOREACH(opt, opts, link) { 1938 if (!opt->seen && strcmp(opt->name, "errmsg")) { 1939 error = EINVAL; 1940 vfs_opterror(opts, "unknown parameter: %s", opt->name); 1941 goto done_errmsg; 1942 } 1943 } 1944 1945 /* Write the fetched parameters back to userspace. */ 1946 error = 0; 1947 TAILQ_FOREACH(opt, opts, link) { 1948 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 1949 pos = 2 * opt->pos + 1; 1950 optuio->uio_iov[pos].iov_len = opt->len; 1951 if (opt->value != NULL) { 1952 if (optuio->uio_segflg == UIO_SYSSPACE) { 1953 bcopy(opt->value, 1954 optuio->uio_iov[pos].iov_base, 1955 opt->len); 1956 } else { 1957 error = copyout(opt->value, 1958 optuio->uio_iov[pos].iov_base, 1959 opt->len); 1960 if (error) 1961 break; 1962 } 1963 } 1964 } 1965 } 1966 goto done_errmsg; 1967 1968 done_deref: 1969 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 1970 goto done_errmsg; 1971 1972 done_unlock_list: 1973 sx_sunlock(&allprison_lock); 1974 done_errmsg: 1975 if (error && errmsg_pos >= 0) { 1976 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1977 errmsg_pos = 2 * errmsg_pos + 1; 1978 if (errmsg_len > 0) { 1979 if (optuio->uio_segflg == UIO_SYSSPACE) 1980 bcopy(errmsg, 1981 optuio->uio_iov[errmsg_pos].iov_base, 1982 errmsg_len); 1983 else 1984 copyout(errmsg, 1985 optuio->uio_iov[errmsg_pos].iov_base, 1986 errmsg_len); 1987 } 1988 } 1989 vfs_freeopts(opts); 1990 return (error); 1991 } 1992 1993 1994 /* 1995 * struct jail_remove_args { 1996 * int jid; 1997 * }; 1998 */ 1999 int 2000 jail_remove(struct thread *td, struct jail_remove_args *uap) 2001 { 2002 struct prison *pr, *cpr, *lpr, *tpr; 2003 int descend, error; 2004 2005 error = priv_check(td, PRIV_JAIL_REMOVE); 2006 if (error) 2007 return (error); 2008 2009 sx_xlock(&allprison_lock); 2010 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2011 if (pr == NULL) { 2012 sx_xunlock(&allprison_lock); 2013 return (EINVAL); 2014 } 2015 2016 /* Remove all descendants of this prison, then remove this prison. */ 2017 pr->pr_ref++; 2018 pr->pr_flags |= PR_REMOVE; 2019 if (!LIST_EMPTY(&pr->pr_children)) { 2020 mtx_unlock(&pr->pr_mtx); 2021 lpr = NULL; 2022 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2023 mtx_lock(&cpr->pr_mtx); 2024 if (cpr->pr_ref > 0) { 2025 tpr = cpr; 2026 cpr->pr_ref++; 2027 cpr->pr_flags |= PR_REMOVE; 2028 } else { 2029 /* Already removed - do not do it again. */ 2030 tpr = NULL; 2031 } 2032 mtx_unlock(&cpr->pr_mtx); 2033 if (lpr != NULL) { 2034 mtx_lock(&lpr->pr_mtx); 2035 prison_remove_one(lpr); 2036 sx_xlock(&allprison_lock); 2037 } 2038 lpr = tpr; 2039 } 2040 if (lpr != NULL) { 2041 mtx_lock(&lpr->pr_mtx); 2042 prison_remove_one(lpr); 2043 sx_xlock(&allprison_lock); 2044 } 2045 mtx_lock(&pr->pr_mtx); 2046 } 2047 prison_remove_one(pr); 2048 return (0); 2049 } 2050 2051 static void 2052 prison_remove_one(struct prison *pr) 2053 { 2054 struct proc *p; 2055 int deuref; 2056 2057 /* If the prison was persistent, it is not anymore. */ 2058 deuref = 0; 2059 if (pr->pr_flags & PR_PERSIST) { 2060 pr->pr_ref--; 2061 deuref = PD_DEUREF; 2062 pr->pr_flags &= ~PR_PERSIST; 2063 } 2064 2065 /* 2066 * jail_remove added a reference. If that's the only one, remove 2067 * the prison now. 2068 */ 2069 KASSERT(pr->pr_ref > 0, 2070 ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2071 if (pr->pr_ref == 1) { 2072 prison_deref(pr, 2073 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2074 return; 2075 } 2076 2077 mtx_unlock(&pr->pr_mtx); 2078 sx_xunlock(&allprison_lock); 2079 /* 2080 * Kill all processes unfortunate enough to be attached to this prison. 2081 */ 2082 sx_slock(&allproc_lock); 2083 LIST_FOREACH(p, &allproc, p_list) { 2084 PROC_LOCK(p); 2085 if (p->p_state != PRS_NEW && p->p_ucred && 2086 p->p_ucred->cr_prison == pr) 2087 psignal(p, SIGKILL); 2088 PROC_UNLOCK(p); 2089 } 2090 sx_sunlock(&allproc_lock); 2091 /* Remove the temporary reference added by jail_remove. */ 2092 prison_deref(pr, deuref | PD_DEREF); 2093 } 2094 2095 2096 /* 2097 * struct jail_attach_args { 2098 * int jid; 2099 * }; 2100 */ 2101 int 2102 jail_attach(struct thread *td, struct jail_attach_args *uap) 2103 { 2104 struct prison *pr; 2105 int error; 2106 2107 error = priv_check(td, PRIV_JAIL_ATTACH); 2108 if (error) 2109 return (error); 2110 2111 sx_slock(&allprison_lock); 2112 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2113 if (pr == NULL) { 2114 sx_sunlock(&allprison_lock); 2115 return (EINVAL); 2116 } 2117 2118 /* 2119 * Do not allow a process to attach to a prison that is not 2120 * considered to be "alive". 2121 */ 2122 if (pr->pr_uref == 0) { 2123 mtx_unlock(&pr->pr_mtx); 2124 sx_sunlock(&allprison_lock); 2125 return (EINVAL); 2126 } 2127 2128 return (do_jail_attach(td, pr)); 2129 } 2130 2131 static int 2132 do_jail_attach(struct thread *td, struct prison *pr) 2133 { 2134 struct prison *ppr; 2135 struct proc *p; 2136 struct ucred *newcred, *oldcred; 2137 int vfslocked, error; 2138 2139 /* 2140 * XXX: Note that there is a slight race here if two threads 2141 * in the same privileged process attempt to attach to two 2142 * different jails at the same time. It is important for 2143 * user processes not to do this, or they might end up with 2144 * a process root from one prison, but attached to the jail 2145 * of another. 2146 */ 2147 pr->pr_ref++; 2148 pr->pr_uref++; 2149 mtx_unlock(&pr->pr_mtx); 2150 2151 /* Let modules do whatever they need to prepare for attaching. */ 2152 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2153 if (error) { 2154 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2155 return (error); 2156 } 2157 sx_sunlock(&allprison_lock); 2158 2159 /* 2160 * Reparent the newly attached process to this jail. 2161 */ 2162 ppr = td->td_ucred->cr_prison; 2163 p = td->td_proc; 2164 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2165 if (error) 2166 goto e_revert_osd; 2167 2168 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2169 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2170 if ((error = change_dir(pr->pr_root, td)) != 0) 2171 goto e_unlock; 2172 #ifdef MAC 2173 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2174 goto e_unlock; 2175 #endif 2176 VOP_UNLOCK(pr->pr_root, 0); 2177 if ((error = change_root(pr->pr_root, td))) 2178 goto e_unlock_giant; 2179 VFS_UNLOCK_GIANT(vfslocked); 2180 2181 newcred = crget(); 2182 PROC_LOCK(p); 2183 oldcred = p->p_ucred; 2184 setsugid(p); 2185 crcopy(newcred, oldcred); 2186 newcred->cr_prison = pr; 2187 p->p_ucred = newcred; 2188 PROC_UNLOCK(p); 2189 crfree(oldcred); 2190 prison_deref(ppr, PD_DEREF | PD_DEUREF); 2191 return (0); 2192 e_unlock: 2193 VOP_UNLOCK(pr->pr_root, 0); 2194 e_unlock_giant: 2195 VFS_UNLOCK_GIANT(vfslocked); 2196 e_revert_osd: 2197 /* Tell modules this thread is still in its old jail after all. */ 2198 (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2199 prison_deref(pr, PD_DEREF | PD_DEUREF); 2200 return (error); 2201 } 2202 2203 2204 /* 2205 * Returns a locked prison instance, or NULL on failure. 2206 */ 2207 struct prison * 2208 prison_find(int prid) 2209 { 2210 struct prison *pr; 2211 2212 sx_assert(&allprison_lock, SX_LOCKED); 2213 TAILQ_FOREACH(pr, &allprison, pr_list) { 2214 if (pr->pr_id == prid) { 2215 mtx_lock(&pr->pr_mtx); 2216 if (pr->pr_ref > 0) 2217 return (pr); 2218 mtx_unlock(&pr->pr_mtx); 2219 } 2220 } 2221 return (NULL); 2222 } 2223 2224 /* 2225 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2226 */ 2227 struct prison * 2228 prison_find_child(struct prison *mypr, int prid) 2229 { 2230 struct prison *pr; 2231 int descend; 2232 2233 sx_assert(&allprison_lock, SX_LOCKED); 2234 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2235 if (pr->pr_id == prid) { 2236 mtx_lock(&pr->pr_mtx); 2237 if (pr->pr_ref > 0) 2238 return (pr); 2239 mtx_unlock(&pr->pr_mtx); 2240 } 2241 } 2242 return (NULL); 2243 } 2244 2245 /* 2246 * Look for the name relative to mypr. Returns a locked prison or NULL. 2247 */ 2248 struct prison * 2249 prison_find_name(struct prison *mypr, const char *name) 2250 { 2251 struct prison *pr, *deadpr; 2252 size_t mylen; 2253 int descend; 2254 2255 sx_assert(&allprison_lock, SX_LOCKED); 2256 mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2257 again: 2258 deadpr = NULL; 2259 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2260 if (!strcmp(pr->pr_name + mylen, name)) { 2261 mtx_lock(&pr->pr_mtx); 2262 if (pr->pr_ref > 0) { 2263 if (pr->pr_uref > 0) 2264 return (pr); 2265 deadpr = pr; 2266 } 2267 mtx_unlock(&pr->pr_mtx); 2268 } 2269 } 2270 /* There was no valid prison - perhaps there was a dying one. */ 2271 if (deadpr != NULL) { 2272 mtx_lock(&deadpr->pr_mtx); 2273 if (deadpr->pr_ref == 0) { 2274 mtx_unlock(&deadpr->pr_mtx); 2275 goto again; 2276 } 2277 } 2278 return (deadpr); 2279 } 2280 2281 /* 2282 * See if a prison has the specific flag set. 2283 */ 2284 int 2285 prison_flag(struct ucred *cred, unsigned flag) 2286 { 2287 2288 /* This is an atomic read, so no locking is necessary. */ 2289 return (cred->cr_prison->pr_flags & flag); 2290 } 2291 2292 int 2293 prison_allow(struct ucred *cred, unsigned flag) 2294 { 2295 2296 /* This is an atomic read, so no locking is necessary. */ 2297 return (cred->cr_prison->pr_allow & flag); 2298 } 2299 2300 /* 2301 * Remove a prison reference. If that was the last reference, remove the 2302 * prison itself - but not in this context in case there are locks held. 2303 */ 2304 void 2305 prison_free_locked(struct prison *pr) 2306 { 2307 2308 mtx_assert(&pr->pr_mtx, MA_OWNED); 2309 pr->pr_ref--; 2310 if (pr->pr_ref == 0) { 2311 mtx_unlock(&pr->pr_mtx); 2312 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2313 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 2314 return; 2315 } 2316 mtx_unlock(&pr->pr_mtx); 2317 } 2318 2319 void 2320 prison_free(struct prison *pr) 2321 { 2322 2323 mtx_lock(&pr->pr_mtx); 2324 prison_free_locked(pr); 2325 } 2326 2327 static void 2328 prison_complete(void *context, int pending) 2329 { 2330 2331 prison_deref((struct prison *)context, 0); 2332 } 2333 2334 /* 2335 * Remove a prison reference (usually). This internal version assumes no 2336 * mutexes are held, except perhaps the prison itself. If there are no more 2337 * references, release and delist the prison. On completion, the prison lock 2338 * and the allprison lock are both unlocked. 2339 */ 2340 static void 2341 prison_deref(struct prison *pr, int flags) 2342 { 2343 struct prison *ppr, *tpr; 2344 int vfslocked; 2345 2346 if (!(flags & PD_LOCKED)) 2347 mtx_lock(&pr->pr_mtx); 2348 /* Decrement the user references in a separate loop. */ 2349 if (flags & PD_DEUREF) { 2350 for (tpr = pr;; tpr = tpr->pr_parent) { 2351 if (tpr != pr) 2352 mtx_lock(&tpr->pr_mtx); 2353 if (--tpr->pr_uref > 0) 2354 break; 2355 KASSERT(tpr != &prison0, ("prison0 pr_uref=0")); 2356 mtx_unlock(&tpr->pr_mtx); 2357 } 2358 /* Done if there were only user references to remove. */ 2359 if (!(flags & PD_DEREF)) { 2360 mtx_unlock(&tpr->pr_mtx); 2361 if (flags & PD_LIST_SLOCKED) 2362 sx_sunlock(&allprison_lock); 2363 else if (flags & PD_LIST_XLOCKED) 2364 sx_xunlock(&allprison_lock); 2365 return; 2366 } 2367 if (tpr != pr) { 2368 mtx_unlock(&tpr->pr_mtx); 2369 mtx_lock(&pr->pr_mtx); 2370 } 2371 } 2372 2373 for (;;) { 2374 if (flags & PD_DEREF) 2375 pr->pr_ref--; 2376 /* If the prison still has references, nothing else to do. */ 2377 if (pr->pr_ref > 0) { 2378 mtx_unlock(&pr->pr_mtx); 2379 if (flags & PD_LIST_SLOCKED) 2380 sx_sunlock(&allprison_lock); 2381 else if (flags & PD_LIST_XLOCKED) 2382 sx_xunlock(&allprison_lock); 2383 return; 2384 } 2385 2386 mtx_unlock(&pr->pr_mtx); 2387 if (flags & PD_LIST_SLOCKED) { 2388 if (!sx_try_upgrade(&allprison_lock)) { 2389 sx_sunlock(&allprison_lock); 2390 sx_xlock(&allprison_lock); 2391 } 2392 } else if (!(flags & PD_LIST_XLOCKED)) 2393 sx_xlock(&allprison_lock); 2394 2395 TAILQ_REMOVE(&allprison, pr, pr_list); 2396 LIST_REMOVE(pr, pr_sibling); 2397 ppr = pr->pr_parent; 2398 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2399 tpr->pr_prisoncount--; 2400 sx_downgrade(&allprison_lock); 2401 2402 if (pr->pr_root != NULL) { 2403 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2404 vrele(pr->pr_root); 2405 VFS_UNLOCK_GIANT(vfslocked); 2406 } 2407 mtx_destroy(&pr->pr_mtx); 2408 #ifdef INET 2409 free(pr->pr_ip4, M_PRISON); 2410 #endif 2411 #ifdef INET6 2412 free(pr->pr_ip6, M_PRISON); 2413 #endif 2414 if (pr->pr_cpuset != NULL) 2415 cpuset_rel(pr->pr_cpuset); 2416 osd_jail_exit(pr); 2417 free(pr, M_PRISON); 2418 2419 /* Removing a prison frees a reference on its parent. */ 2420 pr = ppr; 2421 mtx_lock(&pr->pr_mtx); 2422 flags = PD_DEREF | PD_LIST_SLOCKED; 2423 } 2424 } 2425 2426 void 2427 prison_hold_locked(struct prison *pr) 2428 { 2429 2430 mtx_assert(&pr->pr_mtx, MA_OWNED); 2431 KASSERT(pr->pr_ref > 0, 2432 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 2433 pr->pr_ref++; 2434 } 2435 2436 void 2437 prison_hold(struct prison *pr) 2438 { 2439 2440 mtx_lock(&pr->pr_mtx); 2441 prison_hold_locked(pr); 2442 mtx_unlock(&pr->pr_mtx); 2443 } 2444 2445 void 2446 prison_proc_hold(struct prison *pr) 2447 { 2448 2449 mtx_lock(&pr->pr_mtx); 2450 KASSERT(pr->pr_uref > 0, 2451 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2452 pr->pr_uref++; 2453 mtx_unlock(&pr->pr_mtx); 2454 } 2455 2456 void 2457 prison_proc_free(struct prison *pr) 2458 { 2459 2460 mtx_lock(&pr->pr_mtx); 2461 KASSERT(pr->pr_uref > 0, 2462 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2463 prison_deref(pr, PD_DEUREF | PD_LOCKED); 2464 } 2465 2466 2467 #ifdef INET 2468 /* 2469 * Restrict a prison's IP address list with its parent's, possibly replacing 2470 * it. Return true if the replacement buffer was used (or would have been). 2471 */ 2472 static int 2473 prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2474 { 2475 int ii, ij, used; 2476 struct prison *ppr; 2477 2478 ppr = pr->pr_parent; 2479 if (!(pr->pr_flags & PR_IP4_USER)) { 2480 /* This has no user settings, so just copy the parent's list. */ 2481 if (pr->pr_ip4s < ppr->pr_ip4s) { 2482 /* 2483 * There's no room for the parent's list. Use the 2484 * new list buffer, which is assumed to be big enough 2485 * (if it was passed). If there's no buffer, try to 2486 * allocate one. 2487 */ 2488 used = 1; 2489 if (newip4 == NULL) { 2490 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2491 M_PRISON, M_NOWAIT); 2492 if (newip4 != NULL) 2493 used = 0; 2494 } 2495 if (newip4 != NULL) { 2496 bcopy(ppr->pr_ip4, newip4, 2497 ppr->pr_ip4s * sizeof(*newip4)); 2498 free(pr->pr_ip4, M_PRISON); 2499 pr->pr_ip4 = newip4; 2500 pr->pr_ip4s = ppr->pr_ip4s; 2501 pr->pr_flags |= PR_IP4; 2502 } 2503 return (used); 2504 } 2505 pr->pr_ip4s = ppr->pr_ip4s; 2506 if (pr->pr_ip4s > 0) 2507 bcopy(ppr->pr_ip4, pr->pr_ip4, 2508 pr->pr_ip4s * sizeof(*newip4)); 2509 else if (pr->pr_ip4 != NULL) { 2510 free(pr->pr_ip4, M_PRISON); 2511 pr->pr_ip4 = NULL; 2512 } 2513 pr->pr_flags = 2514 (pr->pr_flags & ~PR_IP4) | (ppr->pr_flags & PR_IP4); 2515 } else if (pr->pr_ip4s > 0 && (ppr->pr_flags & PR_IP4)) { 2516 /* Remove addresses that aren't in the parent. */ 2517 for (ij = 0; ij < ppr->pr_ip4s; ij++) 2518 if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2519 break; 2520 if (ij < ppr->pr_ip4s) 2521 ii = 1; 2522 else { 2523 bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2524 --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2525 ii = 0; 2526 } 2527 for (ij = 1; ii < pr->pr_ip4s; ) { 2528 if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2529 ii++; 2530 continue; 2531 } 2532 switch (ij >= ppr->pr_ip4s ? -1 : 2533 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2534 case -1: 2535 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2536 (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2537 break; 2538 case 0: 2539 ii++; 2540 ij++; 2541 break; 2542 case 1: 2543 ij++; 2544 break; 2545 } 2546 } 2547 if (pr->pr_ip4s == 0) { 2548 free(pr->pr_ip4, M_PRISON); 2549 pr->pr_ip4 = NULL; 2550 } 2551 } 2552 return (0); 2553 } 2554 2555 /* 2556 * Pass back primary IPv4 address of this jail. 2557 * 2558 * If not restricted return success but do not alter the address. Caller has 2559 * to make sure to initialize it correctly (e.g. INADDR_ANY). 2560 * 2561 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2562 * Address returned in NBO. 2563 */ 2564 int 2565 prison_get_ip4(struct ucred *cred, struct in_addr *ia) 2566 { 2567 struct prison *pr; 2568 2569 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2570 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2571 2572 pr = cred->cr_prison; 2573 if (!(pr->pr_flags & PR_IP4)) 2574 return (0); 2575 mtx_lock(&pr->pr_mtx); 2576 if (!(pr->pr_flags & PR_IP4)) { 2577 mtx_unlock(&pr->pr_mtx); 2578 return (0); 2579 } 2580 if (pr->pr_ip4 == NULL) { 2581 mtx_unlock(&pr->pr_mtx); 2582 return (EAFNOSUPPORT); 2583 } 2584 2585 ia->s_addr = pr->pr_ip4[0].s_addr; 2586 mtx_unlock(&pr->pr_mtx); 2587 return (0); 2588 } 2589 2590 /* 2591 * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2592 */ 2593 int 2594 prison_equal_ip4(struct prison *pr1, struct prison *pr2) 2595 { 2596 2597 if (pr1 == pr2) 2598 return (1); 2599 2600 /* 2601 * jail_set maintains an exclusive hold on allprison_lock while it 2602 * changes the IP addresses, so only a shared hold is needed. This is 2603 * easier than locking the two prisons which would require finding the 2604 * proper locking order and end up needing allprison_lock anyway. 2605 */ 2606 sx_slock(&allprison_lock); 2607 while (pr1 != &prison0 && !(pr1->pr_flags & PR_IP4_USER)) 2608 pr1 = pr1->pr_parent; 2609 while (pr2 != &prison0 && !(pr2->pr_flags & PR_IP4_USER)) 2610 pr2 = pr2->pr_parent; 2611 sx_sunlock(&allprison_lock); 2612 return (pr1 == pr2); 2613 } 2614 2615 /* 2616 * Make sure our (source) address is set to something meaningful to this 2617 * jail. 2618 * 2619 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2620 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2621 * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2622 */ 2623 int 2624 prison_local_ip4(struct ucred *cred, struct in_addr *ia) 2625 { 2626 struct prison *pr; 2627 struct in_addr ia0; 2628 int error; 2629 2630 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2631 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2632 2633 pr = cred->cr_prison; 2634 if (!(pr->pr_flags & PR_IP4)) 2635 return (0); 2636 mtx_lock(&pr->pr_mtx); 2637 if (!(pr->pr_flags & PR_IP4)) { 2638 mtx_unlock(&pr->pr_mtx); 2639 return (0); 2640 } 2641 if (pr->pr_ip4 == NULL) { 2642 mtx_unlock(&pr->pr_mtx); 2643 return (EAFNOSUPPORT); 2644 } 2645 2646 ia0.s_addr = ntohl(ia->s_addr); 2647 if (ia0.s_addr == INADDR_LOOPBACK) { 2648 ia->s_addr = pr->pr_ip4[0].s_addr; 2649 mtx_unlock(&pr->pr_mtx); 2650 return (0); 2651 } 2652 2653 if (ia0.s_addr == INADDR_ANY) { 2654 /* 2655 * In case there is only 1 IPv4 address, bind directly. 2656 */ 2657 if (pr->pr_ip4s == 1) 2658 ia->s_addr = pr->pr_ip4[0].s_addr; 2659 mtx_unlock(&pr->pr_mtx); 2660 return (0); 2661 } 2662 2663 error = _prison_check_ip4(pr, ia); 2664 mtx_unlock(&pr->pr_mtx); 2665 return (error); 2666 } 2667 2668 /* 2669 * Rewrite destination address in case we will connect to loopback address. 2670 * 2671 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2672 * Address passed in in NBO and returned in NBO. 2673 */ 2674 int 2675 prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2676 { 2677 struct prison *pr; 2678 2679 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2680 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2681 2682 pr = cred->cr_prison; 2683 if (!(pr->pr_flags & PR_IP4)) 2684 return (0); 2685 mtx_lock(&pr->pr_mtx); 2686 if (!(pr->pr_flags & PR_IP4)) { 2687 mtx_unlock(&pr->pr_mtx); 2688 return (0); 2689 } 2690 if (pr->pr_ip4 == NULL) { 2691 mtx_unlock(&pr->pr_mtx); 2692 return (EAFNOSUPPORT); 2693 } 2694 2695 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2696 ia->s_addr = pr->pr_ip4[0].s_addr; 2697 mtx_unlock(&pr->pr_mtx); 2698 return (0); 2699 } 2700 2701 /* 2702 * Return success because nothing had to be changed. 2703 */ 2704 mtx_unlock(&pr->pr_mtx); 2705 return (0); 2706 } 2707 2708 /* 2709 * Check if given address belongs to the jail referenced by cred/prison. 2710 * 2711 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2712 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2713 * doesn't allow IPv4. Address passed in in NBO. 2714 */ 2715 static int 2716 _prison_check_ip4(struct prison *pr, struct in_addr *ia) 2717 { 2718 int i, a, z, d; 2719 2720 /* 2721 * Check the primary IP. 2722 */ 2723 if (pr->pr_ip4[0].s_addr == ia->s_addr) 2724 return (0); 2725 2726 /* 2727 * All the other IPs are sorted so we can do a binary search. 2728 */ 2729 a = 0; 2730 z = pr->pr_ip4s - 2; 2731 while (a <= z) { 2732 i = (a + z) / 2; 2733 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 2734 if (d > 0) 2735 z = i - 1; 2736 else if (d < 0) 2737 a = i + 1; 2738 else 2739 return (0); 2740 } 2741 2742 return (EADDRNOTAVAIL); 2743 } 2744 2745 int 2746 prison_check_ip4(struct ucred *cred, struct in_addr *ia) 2747 { 2748 struct prison *pr; 2749 int error; 2750 2751 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2752 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2753 2754 pr = cred->cr_prison; 2755 if (!(pr->pr_flags & PR_IP4)) 2756 return (0); 2757 mtx_lock(&pr->pr_mtx); 2758 if (!(pr->pr_flags & PR_IP4)) { 2759 mtx_unlock(&pr->pr_mtx); 2760 return (0); 2761 } 2762 if (pr->pr_ip4 == NULL) { 2763 mtx_unlock(&pr->pr_mtx); 2764 return (EAFNOSUPPORT); 2765 } 2766 2767 error = _prison_check_ip4(pr, ia); 2768 mtx_unlock(&pr->pr_mtx); 2769 return (error); 2770 } 2771 #endif 2772 2773 #ifdef INET6 2774 static int 2775 prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 2776 { 2777 int ii, ij, used; 2778 struct prison *ppr; 2779 2780 ppr = pr->pr_parent; 2781 if (!(pr->pr_flags & PR_IP6_USER)) { 2782 /* This has no user settings, so just copy the parent's list. */ 2783 if (pr->pr_ip6s < ppr->pr_ip6s) { 2784 /* 2785 * There's no room for the parent's list. Use the 2786 * new list buffer, which is assumed to be big enough 2787 * (if it was passed). If there's no buffer, try to 2788 * allocate one. 2789 */ 2790 used = 1; 2791 if (newip6 == NULL) { 2792 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 2793 M_PRISON, M_NOWAIT); 2794 if (newip6 != NULL) 2795 used = 0; 2796 } 2797 if (newip6 != NULL) { 2798 bcopy(ppr->pr_ip6, newip6, 2799 ppr->pr_ip6s * sizeof(*newip6)); 2800 free(pr->pr_ip6, M_PRISON); 2801 pr->pr_ip6 = newip6; 2802 pr->pr_ip6s = ppr->pr_ip6s; 2803 pr->pr_flags |= PR_IP6; 2804 } 2805 return (used); 2806 } 2807 pr->pr_ip6s = ppr->pr_ip6s; 2808 if (pr->pr_ip6s > 0) 2809 bcopy(ppr->pr_ip6, pr->pr_ip6, 2810 pr->pr_ip6s * sizeof(*newip6)); 2811 else if (pr->pr_ip6 != NULL) { 2812 free(pr->pr_ip6, M_PRISON); 2813 pr->pr_ip6 = NULL; 2814 } 2815 pr->pr_flags = 2816 (pr->pr_flags & ~PR_IP6) | (ppr->pr_flags & PR_IP6); 2817 } else if (pr->pr_ip6s > 0 && (ppr->pr_flags & PR_IP6)) { 2818 /* Remove addresses that aren't in the parent. */ 2819 for (ij = 0; ij < ppr->pr_ip6s; ij++) 2820 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 2821 &ppr->pr_ip6[ij])) 2822 break; 2823 if (ij < ppr->pr_ip6s) 2824 ii = 1; 2825 else { 2826 bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 2827 --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2828 ii = 0; 2829 } 2830 for (ij = 1; ii < pr->pr_ip6s; ) { 2831 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 2832 &ppr->pr_ip6[0])) { 2833 ii++; 2834 continue; 2835 } 2836 switch (ij >= ppr->pr_ip4s ? -1 : 2837 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 2838 case -1: 2839 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 2840 (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 2841 break; 2842 case 0: 2843 ii++; 2844 ij++; 2845 break; 2846 case 1: 2847 ij++; 2848 break; 2849 } 2850 } 2851 if (pr->pr_ip6s == 0) { 2852 free(pr->pr_ip6, M_PRISON); 2853 pr->pr_ip6 = NULL; 2854 } 2855 } 2856 return 0; 2857 } 2858 2859 /* 2860 * Pass back primary IPv6 address for this jail. 2861 * 2862 * If not restricted return success but do not alter the address. Caller has 2863 * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 2864 * 2865 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2866 */ 2867 int 2868 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 2869 { 2870 struct prison *pr; 2871 2872 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2873 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2874 2875 pr = cred->cr_prison; 2876 if (!(pr->pr_flags & PR_IP6)) 2877 return (0); 2878 mtx_lock(&pr->pr_mtx); 2879 if (!(pr->pr_flags & PR_IP6)) { 2880 mtx_unlock(&pr->pr_mtx); 2881 return (0); 2882 } 2883 if (pr->pr_ip6 == NULL) { 2884 mtx_unlock(&pr->pr_mtx); 2885 return (EAFNOSUPPORT); 2886 } 2887 2888 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2889 mtx_unlock(&pr->pr_mtx); 2890 return (0); 2891 } 2892 2893 /* 2894 * Return true if pr1 and pr2 have the same IPv6 address restrictions. 2895 */ 2896 int 2897 prison_equal_ip6(struct prison *pr1, struct prison *pr2) 2898 { 2899 2900 if (pr1 == pr2) 2901 return (1); 2902 2903 sx_slock(&allprison_lock); 2904 while (pr1 != &prison0 && !(pr1->pr_flags & PR_IP6_USER)) 2905 pr1 = pr1->pr_parent; 2906 while (pr2 != &prison0 && !(pr2->pr_flags & PR_IP6_USER)) 2907 pr2 = pr2->pr_parent; 2908 sx_sunlock(&allprison_lock); 2909 return (pr1 == pr2); 2910 } 2911 2912 /* 2913 * Make sure our (source) address is set to something meaningful to this jail. 2914 * 2915 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 2916 * when needed while binding. 2917 * 2918 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 2919 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2920 * doesn't allow IPv6. 2921 */ 2922 int 2923 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 2924 { 2925 struct prison *pr; 2926 int error; 2927 2928 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2929 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2930 2931 pr = cred->cr_prison; 2932 if (!(pr->pr_flags & PR_IP6)) 2933 return (0); 2934 mtx_lock(&pr->pr_mtx); 2935 if (!(pr->pr_flags & PR_IP6)) { 2936 mtx_unlock(&pr->pr_mtx); 2937 return (0); 2938 } 2939 if (pr->pr_ip6 == NULL) { 2940 mtx_unlock(&pr->pr_mtx); 2941 return (EAFNOSUPPORT); 2942 } 2943 2944 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 2945 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2946 mtx_unlock(&pr->pr_mtx); 2947 return (0); 2948 } 2949 2950 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 2951 /* 2952 * In case there is only 1 IPv6 address, and v6only is true, 2953 * then bind directly. 2954 */ 2955 if (v6only != 0 && pr->pr_ip6s == 1) 2956 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2957 mtx_unlock(&pr->pr_mtx); 2958 return (0); 2959 } 2960 2961 error = _prison_check_ip6(pr, ia6); 2962 mtx_unlock(&pr->pr_mtx); 2963 return (error); 2964 } 2965 2966 /* 2967 * Rewrite destination address in case we will connect to loopback address. 2968 * 2969 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2970 */ 2971 int 2972 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 2973 { 2974 struct prison *pr; 2975 2976 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2977 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2978 2979 pr = cred->cr_prison; 2980 if (!(pr->pr_flags & PR_IP6)) 2981 return (0); 2982 mtx_lock(&pr->pr_mtx); 2983 if (!(pr->pr_flags & PR_IP6)) { 2984 mtx_unlock(&pr->pr_mtx); 2985 return (0); 2986 } 2987 if (pr->pr_ip6 == NULL) { 2988 mtx_unlock(&pr->pr_mtx); 2989 return (EAFNOSUPPORT); 2990 } 2991 2992 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 2993 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2994 mtx_unlock(&pr->pr_mtx); 2995 return (0); 2996 } 2997 2998 /* 2999 * Return success because nothing had to be changed. 3000 */ 3001 mtx_unlock(&pr->pr_mtx); 3002 return (0); 3003 } 3004 3005 /* 3006 * Check if given address belongs to the jail referenced by cred/prison. 3007 * 3008 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3009 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3010 * doesn't allow IPv6. 3011 */ 3012 static int 3013 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 3014 { 3015 int i, a, z, d; 3016 3017 /* 3018 * Check the primary IP. 3019 */ 3020 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3021 return (0); 3022 3023 /* 3024 * All the other IPs are sorted so we can do a binary search. 3025 */ 3026 a = 0; 3027 z = pr->pr_ip6s - 2; 3028 while (a <= z) { 3029 i = (a + z) / 2; 3030 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3031 if (d > 0) 3032 z = i - 1; 3033 else if (d < 0) 3034 a = i + 1; 3035 else 3036 return (0); 3037 } 3038 3039 return (EADDRNOTAVAIL); 3040 } 3041 3042 int 3043 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3044 { 3045 struct prison *pr; 3046 int error; 3047 3048 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3049 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3050 3051 pr = cred->cr_prison; 3052 if (!(pr->pr_flags & PR_IP6)) 3053 return (0); 3054 mtx_lock(&pr->pr_mtx); 3055 if (!(pr->pr_flags & PR_IP6)) { 3056 mtx_unlock(&pr->pr_mtx); 3057 return (0); 3058 } 3059 if (pr->pr_ip6 == NULL) { 3060 mtx_unlock(&pr->pr_mtx); 3061 return (EAFNOSUPPORT); 3062 } 3063 3064 error = _prison_check_ip6(pr, ia6); 3065 mtx_unlock(&pr->pr_mtx); 3066 return (error); 3067 } 3068 #endif 3069 3070 /* 3071 * Check if a jail supports the given address family. 3072 * 3073 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3074 * if not. 3075 */ 3076 int 3077 prison_check_af(struct ucred *cred, int af) 3078 { 3079 struct prison *pr; 3080 int error; 3081 3082 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3083 3084 pr = cred->cr_prison; 3085 error = 0; 3086 switch (af) 3087 { 3088 #ifdef INET 3089 case AF_INET: 3090 if (pr->pr_flags & PR_IP4) 3091 { 3092 mtx_lock(&pr->pr_mtx); 3093 if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3094 error = EAFNOSUPPORT; 3095 mtx_unlock(&pr->pr_mtx); 3096 } 3097 break; 3098 #endif 3099 #ifdef INET6 3100 case AF_INET6: 3101 if (pr->pr_flags & PR_IP6) 3102 { 3103 mtx_lock(&pr->pr_mtx); 3104 if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3105 error = EAFNOSUPPORT; 3106 mtx_unlock(&pr->pr_mtx); 3107 } 3108 break; 3109 #endif 3110 case AF_LOCAL: 3111 case AF_ROUTE: 3112 break; 3113 default: 3114 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3115 error = EAFNOSUPPORT; 3116 } 3117 return (error); 3118 } 3119 3120 /* 3121 * Check if given address belongs to the jail referenced by cred (wrapper to 3122 * prison_check_ip[46]). 3123 * 3124 * Returns 0 if jail doesn't restrict the address family or if address belongs 3125 * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3126 * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3127 */ 3128 int 3129 prison_if(struct ucred *cred, struct sockaddr *sa) 3130 { 3131 #ifdef INET 3132 struct sockaddr_in *sai; 3133 #endif 3134 #ifdef INET6 3135 struct sockaddr_in6 *sai6; 3136 #endif 3137 int error; 3138 3139 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3140 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3141 3142 error = 0; 3143 switch (sa->sa_family) 3144 { 3145 #ifdef INET 3146 case AF_INET: 3147 sai = (struct sockaddr_in *)sa; 3148 error = prison_check_ip4(cred, &sai->sin_addr); 3149 break; 3150 #endif 3151 #ifdef INET6 3152 case AF_INET6: 3153 sai6 = (struct sockaddr_in6 *)sa; 3154 error = prison_check_ip6(cred, &sai6->sin6_addr); 3155 break; 3156 #endif 3157 default: 3158 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3159 error = EAFNOSUPPORT; 3160 } 3161 return (error); 3162 } 3163 3164 /* 3165 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 3166 */ 3167 int 3168 prison_check(struct ucred *cred1, struct ucred *cred2) 3169 { 3170 3171 #ifdef VIMAGE 3172 if (cred2->cr_vimage->v_procg != cred1->cr_vimage->v_procg) 3173 return (ESRCH); 3174 #endif 3175 return ((cred1->cr_prison == cred2->cr_prison || 3176 prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3177 } 3178 3179 /* 3180 * Return 1 if p2 is a child of p1, otherwise 0. 3181 */ 3182 int 3183 prison_ischild(struct prison *pr1, struct prison *pr2) 3184 { 3185 3186 for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3187 if (pr1 == pr2) 3188 return (1); 3189 return (0); 3190 } 3191 3192 /* 3193 * Return 1 if the passed credential is in a jail, otherwise 0. 3194 */ 3195 int 3196 jailed(struct ucred *cred) 3197 { 3198 3199 return (cred->cr_prison != &prison0); 3200 } 3201 3202 /* 3203 * Return the correct hostname for the passed credential. 3204 */ 3205 void 3206 getcredhostname(struct ucred *cred, char *buf, size_t size) 3207 { 3208 struct prison *pr; 3209 3210 pr = (cred != NULL) ? cred->cr_prison : &prison0; 3211 mtx_lock(&pr->pr_mtx); 3212 strlcpy(buf, pr->pr_host, size); 3213 mtx_unlock(&pr->pr_mtx); 3214 } 3215 3216 /* 3217 * Determine whether the subject represented by cred can "see" 3218 * status of a mount point. 3219 * Returns: 0 for permitted, ENOENT otherwise. 3220 * XXX: This function should be called cr_canseemount() and should be 3221 * placed in kern_prot.c. 3222 */ 3223 int 3224 prison_canseemount(struct ucred *cred, struct mount *mp) 3225 { 3226 struct prison *pr; 3227 struct statfs *sp; 3228 size_t len; 3229 3230 pr = cred->cr_prison; 3231 if (pr->pr_enforce_statfs == 0) 3232 return (0); 3233 if (pr->pr_root->v_mount == mp) 3234 return (0); 3235 if (pr->pr_enforce_statfs == 2) 3236 return (ENOENT); 3237 /* 3238 * If jail's chroot directory is set to "/" we should be able to see 3239 * all mount-points from inside a jail. 3240 * This is ugly check, but this is the only situation when jail's 3241 * directory ends with '/'. 3242 */ 3243 if (strcmp(pr->pr_path, "/") == 0) 3244 return (0); 3245 len = strlen(pr->pr_path); 3246 sp = &mp->mnt_stat; 3247 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3248 return (ENOENT); 3249 /* 3250 * Be sure that we don't have situation where jail's root directory 3251 * is "/some/path" and mount point is "/some/pathpath". 3252 */ 3253 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3254 return (ENOENT); 3255 return (0); 3256 } 3257 3258 void 3259 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3260 { 3261 char jpath[MAXPATHLEN]; 3262 struct prison *pr; 3263 size_t len; 3264 3265 pr = cred->cr_prison; 3266 if (pr->pr_enforce_statfs == 0) 3267 return; 3268 if (prison_canseemount(cred, mp) != 0) { 3269 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3270 strlcpy(sp->f_mntonname, "[restricted]", 3271 sizeof(sp->f_mntonname)); 3272 return; 3273 } 3274 if (pr->pr_root->v_mount == mp) { 3275 /* 3276 * Clear current buffer data, so we are sure nothing from 3277 * the valid path left there. 3278 */ 3279 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3280 *sp->f_mntonname = '/'; 3281 return; 3282 } 3283 /* 3284 * If jail's chroot directory is set to "/" we should be able to see 3285 * all mount-points from inside a jail. 3286 */ 3287 if (strcmp(pr->pr_path, "/") == 0) 3288 return; 3289 len = strlen(pr->pr_path); 3290 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3291 /* 3292 * Clear current buffer data, so we are sure nothing from 3293 * the valid path left there. 3294 */ 3295 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3296 if (*jpath == '\0') { 3297 /* Should never happen. */ 3298 *sp->f_mntonname = '/'; 3299 } else { 3300 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3301 } 3302 } 3303 3304 /* 3305 * Check with permission for a specific privilege is granted within jail. We 3306 * have a specific list of accepted privileges; the rest are denied. 3307 */ 3308 int 3309 prison_priv_check(struct ucred *cred, int priv) 3310 { 3311 3312 if (!jailed(cred)) 3313 return (0); 3314 3315 switch (priv) { 3316 3317 /* 3318 * Allow ktrace privileges for root in jail. 3319 */ 3320 case PRIV_KTRACE: 3321 3322 #if 0 3323 /* 3324 * Allow jailed processes to configure audit identity and 3325 * submit audit records (login, etc). In the future we may 3326 * want to further refine the relationship between audit and 3327 * jail. 3328 */ 3329 case PRIV_AUDIT_GETAUDIT: 3330 case PRIV_AUDIT_SETAUDIT: 3331 case PRIV_AUDIT_SUBMIT: 3332 #endif 3333 3334 /* 3335 * Allow jailed processes to manipulate process UNIX 3336 * credentials in any way they see fit. 3337 */ 3338 case PRIV_CRED_SETUID: 3339 case PRIV_CRED_SETEUID: 3340 case PRIV_CRED_SETGID: 3341 case PRIV_CRED_SETEGID: 3342 case PRIV_CRED_SETGROUPS: 3343 case PRIV_CRED_SETREUID: 3344 case PRIV_CRED_SETREGID: 3345 case PRIV_CRED_SETRESUID: 3346 case PRIV_CRED_SETRESGID: 3347 3348 /* 3349 * Jail implements visibility constraints already, so allow 3350 * jailed root to override uid/gid-based constraints. 3351 */ 3352 case PRIV_SEEOTHERGIDS: 3353 case PRIV_SEEOTHERUIDS: 3354 3355 /* 3356 * Jail implements inter-process debugging limits already, so 3357 * allow jailed root various debugging privileges. 3358 */ 3359 case PRIV_DEBUG_DIFFCRED: 3360 case PRIV_DEBUG_SUGID: 3361 case PRIV_DEBUG_UNPRIV: 3362 3363 /* 3364 * Allow jail to set various resource limits and login 3365 * properties, and for now, exceed process resource limits. 3366 */ 3367 case PRIV_PROC_LIMIT: 3368 case PRIV_PROC_SETLOGIN: 3369 case PRIV_PROC_SETRLIMIT: 3370 3371 /* 3372 * System V and POSIX IPC privileges are granted in jail. 3373 */ 3374 case PRIV_IPC_READ: 3375 case PRIV_IPC_WRITE: 3376 case PRIV_IPC_ADMIN: 3377 case PRIV_IPC_MSGSIZE: 3378 case PRIV_MQ_ADMIN: 3379 3380 /* 3381 * Jail operations within a jail work on child jails. 3382 */ 3383 case PRIV_JAIL_ATTACH: 3384 case PRIV_JAIL_SET: 3385 case PRIV_JAIL_REMOVE: 3386 3387 /* 3388 * Jail implements its own inter-process limits, so allow 3389 * root processes in jail to change scheduling on other 3390 * processes in the same jail. Likewise for signalling. 3391 */ 3392 case PRIV_SCHED_DIFFCRED: 3393 case PRIV_SCHED_CPUSET: 3394 case PRIV_SIGNAL_DIFFCRED: 3395 case PRIV_SIGNAL_SUGID: 3396 3397 /* 3398 * Allow jailed processes to write to sysctls marked as jail 3399 * writable. 3400 */ 3401 case PRIV_SYSCTL_WRITEJAIL: 3402 3403 /* 3404 * Allow root in jail to manage a variety of quota 3405 * properties. These should likely be conditional on a 3406 * configuration option. 3407 */ 3408 case PRIV_VFS_GETQUOTA: 3409 case PRIV_VFS_SETQUOTA: 3410 3411 /* 3412 * Since Jail relies on chroot() to implement file system 3413 * protections, grant many VFS privileges to root in jail. 3414 * Be careful to exclude mount-related and NFS-related 3415 * privileges. 3416 */ 3417 case PRIV_VFS_READ: 3418 case PRIV_VFS_WRITE: 3419 case PRIV_VFS_ADMIN: 3420 case PRIV_VFS_EXEC: 3421 case PRIV_VFS_LOOKUP: 3422 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3423 case PRIV_VFS_CHFLAGS_DEV: 3424 case PRIV_VFS_CHOWN: 3425 case PRIV_VFS_CHROOT: 3426 case PRIV_VFS_RETAINSUGID: 3427 case PRIV_VFS_FCHROOT: 3428 case PRIV_VFS_LINK: 3429 case PRIV_VFS_SETGID: 3430 case PRIV_VFS_STAT: 3431 case PRIV_VFS_STICKYFILE: 3432 return (0); 3433 3434 /* 3435 * Depending on the global setting, allow privilege of 3436 * setting system flags. 3437 */ 3438 case PRIV_VFS_SYSFLAGS: 3439 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3440 return (0); 3441 else 3442 return (EPERM); 3443 3444 /* 3445 * Depending on the global setting, allow privilege of 3446 * mounting/unmounting file systems. 3447 */ 3448 case PRIV_VFS_MOUNT: 3449 case PRIV_VFS_UNMOUNT: 3450 case PRIV_VFS_MOUNT_NONUSER: 3451 case PRIV_VFS_MOUNT_OWNER: 3452 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT) 3453 return (0); 3454 else 3455 return (EPERM); 3456 3457 /* 3458 * Allow jailed root to bind reserved ports and reuse in-use 3459 * ports. 3460 */ 3461 case PRIV_NETINET_RESERVEDPORT: 3462 case PRIV_NETINET_REUSEPORT: 3463 return (0); 3464 3465 /* 3466 * Allow jailed root to set certian IPv4/6 (option) headers. 3467 */ 3468 case PRIV_NETINET_SETHDROPTS: 3469 return (0); 3470 3471 /* 3472 * Conditionally allow creating raw sockets in jail. 3473 */ 3474 case PRIV_NETINET_RAW: 3475 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 3476 return (0); 3477 else 3478 return (EPERM); 3479 3480 /* 3481 * Since jail implements its own visibility limits on netstat 3482 * sysctls, allow getcred. This allows identd to work in 3483 * jail. 3484 */ 3485 case PRIV_NETINET_GETCRED: 3486 return (0); 3487 3488 default: 3489 /* 3490 * In all remaining cases, deny the privilege request. This 3491 * includes almost all network privileges, many system 3492 * configuration privileges. 3493 */ 3494 return (EPERM); 3495 } 3496 } 3497 3498 /* 3499 * Return the part of pr2's name that is relative to pr1, or the whole name 3500 * if it does not directly follow. 3501 */ 3502 3503 char * 3504 prison_name(struct prison *pr1, struct prison *pr2) 3505 { 3506 char *name; 3507 3508 /* Jails see themselves as "0" (if they see themselves at all). */ 3509 if (pr1 == pr2) 3510 return "0"; 3511 name = pr2->pr_name; 3512 if (prison_ischild(pr1, pr2)) { 3513 /* 3514 * pr1 isn't locked (and allprison_lock may not be either) 3515 * so its length can't be counted on. But the number of dots 3516 * can be counted on - and counted. 3517 */ 3518 for (; pr1 != &prison0; pr1 = pr1->pr_parent) 3519 name = strchr(name, '.') + 1; 3520 } 3521 return (name); 3522 } 3523 3524 /* 3525 * Return the part of pr2's path that is relative to pr1, or the whole path 3526 * if it does not directly follow. 3527 */ 3528 static char * 3529 prison_path(struct prison *pr1, struct prison *pr2) 3530 { 3531 char *path1, *path2; 3532 int len1; 3533 3534 path1 = pr1->pr_path; 3535 path2 = pr2->pr_path; 3536 if (!strcmp(path1, "/")) 3537 return (path2); 3538 len1 = strlen(path1); 3539 if (strncmp(path1, path2, len1)) 3540 return (path2); 3541 if (path2[len1] == '\0') 3542 return "/"; 3543 if (path2[len1] == '/') 3544 return (path2 + len1); 3545 return (path2); 3546 } 3547 3548 3549 /* 3550 * Jail-related sysctls. 3551 */ 3552 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 3553 "Jails"); 3554 3555 static int 3556 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 3557 { 3558 struct xprison *xp; 3559 struct prison *pr, *cpr; 3560 #ifdef INET 3561 struct in_addr *ip4 = NULL; 3562 int ip4s = 0; 3563 #endif 3564 #ifdef INET6 3565 struct in_addr *ip6 = NULL; 3566 int ip6s = 0; 3567 #endif 3568 int descend, error; 3569 3570 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 3571 pr = req->td->td_ucred->cr_prison; 3572 error = 0; 3573 sx_slock(&allprison_lock); 3574 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 3575 #if defined(INET) || defined(INET6) 3576 again: 3577 #endif 3578 mtx_lock(&cpr->pr_mtx); 3579 #ifdef INET 3580 if (cpr->pr_ip4s > 0) { 3581 if (ip4s < cpr->pr_ip4s) { 3582 ip4s = cpr->pr_ip4s; 3583 mtx_unlock(&cpr->pr_mtx); 3584 ip4 = realloc(ip4, ip4s * 3585 sizeof(struct in_addr), M_TEMP, M_WAITOK); 3586 goto again; 3587 } 3588 bcopy(cpr->pr_ip4, ip4, 3589 cpr->pr_ip4s * sizeof(struct in_addr)); 3590 } 3591 #endif 3592 #ifdef INET6 3593 if (cpr->pr_ip6s > 0) { 3594 if (ip6s < cpr->pr_ip6s) { 3595 ip6s = cpr->pr_ip6s; 3596 mtx_unlock(&cpr->pr_mtx); 3597 ip6 = realloc(ip6, ip6s * 3598 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 3599 goto again; 3600 } 3601 bcopy(cpr->pr_ip6, ip6, 3602 cpr->pr_ip6s * sizeof(struct in6_addr)); 3603 } 3604 #endif 3605 if (cpr->pr_ref == 0) { 3606 mtx_unlock(&cpr->pr_mtx); 3607 continue; 3608 } 3609 bzero(xp, sizeof(*xp)); 3610 xp->pr_version = XPRISON_VERSION; 3611 xp->pr_id = cpr->pr_id; 3612 xp->pr_state = cpr->pr_uref > 0 3613 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 3614 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 3615 strlcpy(xp->pr_host, cpr->pr_host, sizeof(xp->pr_host)); 3616 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 3617 #ifdef INET 3618 xp->pr_ip4s = cpr->pr_ip4s; 3619 #endif 3620 #ifdef INET6 3621 xp->pr_ip6s = cpr->pr_ip6s; 3622 #endif 3623 mtx_unlock(&cpr->pr_mtx); 3624 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 3625 if (error) 3626 break; 3627 #ifdef INET 3628 if (xp->pr_ip4s > 0) { 3629 error = SYSCTL_OUT(req, ip4, 3630 xp->pr_ip4s * sizeof(struct in_addr)); 3631 if (error) 3632 break; 3633 } 3634 #endif 3635 #ifdef INET6 3636 if (xp->pr_ip6s > 0) { 3637 error = SYSCTL_OUT(req, ip6, 3638 xp->pr_ip6s * sizeof(struct in6_addr)); 3639 if (error) 3640 break; 3641 } 3642 #endif 3643 } 3644 sx_sunlock(&allprison_lock); 3645 free(xp, M_TEMP); 3646 #ifdef INET 3647 free(ip4, M_TEMP); 3648 #endif 3649 #ifdef INET6 3650 free(ip6, M_TEMP); 3651 #endif 3652 return (error); 3653 } 3654 3655 SYSCTL_OID(_security_jail, OID_AUTO, list, 3656 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3657 sysctl_jail_list, "S", "List of active jails"); 3658 3659 static int 3660 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 3661 { 3662 int error, injail; 3663 3664 injail = jailed(req->td->td_ucred); 3665 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 3666 3667 return (error); 3668 } 3669 3670 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 3671 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3672 sysctl_jail_jailed, "I", "Process in jail?"); 3673 3674 #if defined(INET) || defined(INET6) 3675 SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 3676 &jail_max_af_ips, 0, 3677 "Number of IP addresses a jail may have at most per address family"); 3678 #endif 3679 3680 /* 3681 * Default parameters for jail(2) compatability. For historical reasons, 3682 * the sysctl names have varying similarity to the parameter names. Prisons 3683 * just see their own parameters, and can't change them. 3684 */ 3685 static int 3686 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 3687 { 3688 struct prison *pr; 3689 int allow, error, i; 3690 3691 pr = req->td->td_ucred->cr_prison; 3692 allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 3693 3694 /* Get the current flag value, and convert it to a boolean. */ 3695 i = (allow & arg2) ? 1 : 0; 3696 if (arg1 != NULL) 3697 i = !i; 3698 error = sysctl_handle_int(oidp, &i, 0, req); 3699 if (error || !req->newptr) 3700 return (error); 3701 i = i ? arg2 : 0; 3702 if (arg1 != NULL) 3703 i ^= arg2; 3704 /* 3705 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 3706 * for writing. 3707 */ 3708 mtx_lock(&prison0.pr_mtx); 3709 jail_default_allow = (jail_default_allow & ~arg2) | i; 3710 mtx_unlock(&prison0.pr_mtx); 3711 return (0); 3712 } 3713 3714 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 3715 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3716 NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 3717 "Processes in jail can set their hostnames"); 3718 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 3719 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3720 (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 3721 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 3722 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 3723 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3724 NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 3725 "Processes in jail can use System V IPC primitives"); 3726 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 3727 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3728 NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 3729 "Prison root can create raw sockets"); 3730 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 3731 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3732 NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 3733 "Processes in jail can alter system file flags"); 3734 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 3735 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3736 NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 3737 "Processes in jail can mount/unmount jail-friendly file systems"); 3738 3739 static int 3740 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS) 3741 { 3742 struct prison *pr; 3743 int level, error; 3744 3745 pr = req->td->td_ucred->cr_prison; 3746 level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); 3747 error = sysctl_handle_int(oidp, &level, 0, req); 3748 if (error || !req->newptr) 3749 return (error); 3750 *(int *)arg1 = level; 3751 return (0); 3752 } 3753 3754 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, 3755 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3756 &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), 3757 sysctl_jail_default_level, "I", 3758 "Processes in jail cannot see all mounted file systems"); 3759 3760 /* 3761 * Nodes to describe jail parameters. Maximum length of string parameters 3762 * is returned in the string itself, and the other parameters exist merely 3763 * to make themselves and their types known. 3764 */ 3765 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, 3766 "Jail parameters"); 3767 3768 int 3769 sysctl_jail_param(SYSCTL_HANDLER_ARGS) 3770 { 3771 int i; 3772 long l; 3773 size_t s; 3774 char numbuf[12]; 3775 3776 switch (oidp->oid_kind & CTLTYPE) 3777 { 3778 case CTLTYPE_LONG: 3779 case CTLTYPE_ULONG: 3780 l = 0; 3781 #ifdef SCTL_MASK32 3782 if (!(req->flags & SCTL_MASK32)) 3783 #endif 3784 return (SYSCTL_OUT(req, &l, sizeof(l))); 3785 case CTLTYPE_INT: 3786 case CTLTYPE_UINT: 3787 i = 0; 3788 return (SYSCTL_OUT(req, &i, sizeof(i))); 3789 case CTLTYPE_STRING: 3790 snprintf(numbuf, sizeof(numbuf), "%d", arg2); 3791 return 3792 (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); 3793 case CTLTYPE_STRUCT: 3794 s = (size_t)arg2; 3795 return (SYSCTL_OUT(req, &s, sizeof(s))); 3796 } 3797 return (0); 3798 } 3799 3800 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); 3801 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); 3802 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); 3803 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); 3804 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, 3805 "I", "Jail secure level"); 3806 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, 3807 "I", "Jail cannot see all mounted file systems"); 3808 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, 3809 "B", "Jail persistence"); 3810 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, 3811 "B", "Jail is in the process of shutting down"); 3812 3813 SYSCTL_JAIL_PARAM_NODE(host, "Jail host info"); 3814 SYSCTL_JAIL_PARAM(, nohost, CTLTYPE_INT | CTLFLAG_RW, 3815 "BN", "Jail w/ no host info"); 3816 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, 3817 "Jail hostname"); 3818 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, 3819 "Jail NIS domainname"); 3820 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, 3821 "Jail host UUID"); 3822 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, 3823 "LU", "Jail host ID"); 3824 3825 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); 3826 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); 3827 3828 #ifdef INET 3829 SYSCTL_JAIL_PARAM_NODE(ip4, "Jail IPv4 address virtualization"); 3830 SYSCTL_JAIL_PARAM(, noip4, CTLTYPE_INT | CTLFLAG_RW, 3831 "BN", "Jail w/ no IP address virtualization"); 3832 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), 3833 "S,in_addr,a", "Jail IPv4 addresses"); 3834 #endif 3835 #ifdef INET6 3836 SYSCTL_JAIL_PARAM_NODE(ip6, "Jail IPv6 address virtualization"); 3837 SYSCTL_JAIL_PARAM(, noip6, CTLTYPE_INT | CTLFLAG_RW, 3838 "BN", "Jail w/ no IP address virtualization"); 3839 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), 3840 "S,in6_addr,a", "Jail IPv6 addresses"); 3841 #endif 3842 3843 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); 3844 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, 3845 "B", "Jail may set hostname"); 3846 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, 3847 "B", "Jail may use SYSV IPC"); 3848 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, 3849 "B", "Jail may create raw sockets"); 3850 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, 3851 "B", "Jail may alter system file flags"); 3852 SYSCTL_JAIL_PARAM(_allow, mount, CTLTYPE_INT | CTLFLAG_RW, 3853 "B", "Jail may mount/unmount jail-friendly file systems"); 3854 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, 3855 "B", "Jail may set file quotas"); 3856 SYSCTL_JAIL_PARAM(_allow, jails, CTLTYPE_INT | CTLFLAG_RW, 3857 "B", "Jail may create child jails"); 3858 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, 3859 "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); 3860 3861 3862 #ifdef DDB 3863 3864 static void 3865 db_show_prison(struct prison *pr) 3866 { 3867 int fi; 3868 #if defined(INET) || defined(INET6) 3869 int ii; 3870 #endif 3871 #ifdef INET6 3872 char ip6buf[INET6_ADDRSTRLEN]; 3873 #endif 3874 3875 db_printf("prison %p:\n", pr); 3876 db_printf(" jid = %d\n", pr->pr_id); 3877 db_printf(" name = %s\n", pr->pr_name); 3878 db_printf(" parent = %p\n", pr->pr_parent); 3879 db_printf(" ref = %d\n", pr->pr_ref); 3880 db_printf(" uref = %d\n", pr->pr_uref); 3881 db_printf(" path = %s\n", pr->pr_path); 3882 db_printf(" cpuset = %d\n", pr->pr_cpuset 3883 ? pr->pr_cpuset->cs_id : -1); 3884 db_printf(" root = %p\n", pr->pr_root); 3885 db_printf(" securelevel = %d\n", pr->pr_securelevel); 3886 db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); 3887 db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); 3888 db_printf(" flags = %x", pr->pr_flags); 3889 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 3890 fi++) 3891 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi))) 3892 db_printf(" %s", pr_flag_names[fi]); 3893 db_printf(" allow = %x", pr->pr_allow); 3894 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 3895 fi++) 3896 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi))) 3897 db_printf(" %s", pr_allow_names[fi]); 3898 db_printf("\n"); 3899 db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); 3900 db_printf(" host.hostname = %s\n", pr->pr_host); 3901 db_printf(" host.domainname = %s\n", pr->pr_domain); 3902 db_printf(" host.hostuuid = %s\n", pr->pr_uuid); 3903 db_printf(" host.hostid = %lu\n", pr->pr_hostid); 3904 #ifdef INET 3905 db_printf(" ip4s = %d\n", pr->pr_ip4s); 3906 for (ii = 0; ii < pr->pr_ip4s; ii++) 3907 db_printf(" %s %s\n", 3908 ii == 0 ? "ip4 =" : " ", 3909 inet_ntoa(pr->pr_ip4[ii])); 3910 #endif 3911 #ifdef INET6 3912 db_printf(" ip6s = %d\n", pr->pr_ip6s); 3913 for (ii = 0; ii < pr->pr_ip6s; ii++) 3914 db_printf(" %s %s\n", 3915 ii == 0 ? "ip6 =" : " ", 3916 ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); 3917 #endif 3918 } 3919 3920 DB_SHOW_COMMAND(prison, db_show_prison_command) 3921 { 3922 struct prison *pr; 3923 3924 if (!have_addr) { 3925 /* 3926 * Show all prisons in the list, and prison0 which is not 3927 * listed. 3928 */ 3929 db_show_prison(&prison0); 3930 if (!db_pager_quit) { 3931 TAILQ_FOREACH(pr, &allprison, pr_list) { 3932 db_show_prison(pr); 3933 if (db_pager_quit) 3934 break; 3935 } 3936 } 3937 return; 3938 } 3939 3940 if (addr == 0) 3941 pr = &prison0; 3942 else { 3943 /* Look for a prison with the ID and with references. */ 3944 TAILQ_FOREACH(pr, &allprison, pr_list) 3945 if (pr->pr_id == addr && pr->pr_ref > 0) 3946 break; 3947 if (pr == NULL) 3948 /* Look again, without requiring a reference. */ 3949 TAILQ_FOREACH(pr, &allprison, pr_list) 3950 if (pr->pr_id == addr) 3951 break; 3952 if (pr == NULL) 3953 /* Assume address points to a valid prison. */ 3954 pr = (struct prison *)addr; 3955 } 3956 db_show_prison(pr); 3957 } 3958 3959 #endif /* DDB */ 3960