1 /*- 2 * Copyright (c) 1999 Poul-Henning Kamp. 3 * Copyright (c) 2008 Bjoern A. Zeeb. 4 * Copyright (c) 2009 James Gritton. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 #include "opt_ddb.h" 34 #include "opt_inet.h" 35 #include "opt_inet6.h" 36 37 #include <sys/param.h> 38 #include <sys/types.h> 39 #include <sys/kernel.h> 40 #include <sys/systm.h> 41 #include <sys/errno.h> 42 #include <sys/sysproto.h> 43 #include <sys/malloc.h> 44 #include <sys/osd.h> 45 #include <sys/priv.h> 46 #include <sys/proc.h> 47 #include <sys/taskqueue.h> 48 #include <sys/fcntl.h> 49 #include <sys/jail.h> 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/sx.h> 53 #include <sys/sysent.h> 54 #include <sys/namei.h> 55 #include <sys/mount.h> 56 #include <sys/queue.h> 57 #include <sys/socket.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysctl.h> 60 #include <sys/vnode.h> 61 #include <sys/vimage.h> 62 #include <net/if.h> 63 #include <netinet/in.h> 64 #ifdef DDB 65 #include <ddb/ddb.h> 66 #ifdef INET6 67 #include <netinet6/in6_var.h> 68 #endif /* INET6 */ 69 #endif /* DDB */ 70 71 #include <security/mac/mac_framework.h> 72 73 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); 74 75 /* prison0 describes what is "real" about the system. */ 76 struct prison prison0 = { 77 .pr_id = 0, 78 .pr_name = "0", 79 .pr_ref = 1, 80 .pr_uref = 1, 81 .pr_path = "/", 82 .pr_securelevel = -1, 83 .pr_uuid = "00000000-0000-0000-0000-000000000000", 84 .pr_children = LIST_HEAD_INITIALIZER(&prison0.pr_children), 85 .pr_flags = PR_HOST, 86 .pr_allow = PR_ALLOW_ALL, 87 }; 88 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); 89 90 /* allprison and lastprid are protected by allprison_lock. */ 91 struct sx allprison_lock; 92 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); 93 struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); 94 int lastprid = 0; 95 96 static int do_jail_attach(struct thread *td, struct prison *pr); 97 static void prison_complete(void *context, int pending); 98 static void prison_deref(struct prison *pr, int flags); 99 static char *prison_path(struct prison *pr1, struct prison *pr2); 100 static void prison_remove_one(struct prison *pr); 101 #ifdef INET 102 static int _prison_check_ip4(struct prison *pr, struct in_addr *ia); 103 static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); 104 #endif 105 #ifdef INET6 106 static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); 107 static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); 108 #endif 109 110 /* Flags for prison_deref */ 111 #define PD_DEREF 0x01 112 #define PD_DEUREF 0x02 113 #define PD_LOCKED 0x04 114 #define PD_LIST_SLOCKED 0x08 115 #define PD_LIST_XLOCKED 0x10 116 117 /* 118 * Parameter names corresponding to PR_* flag values 119 */ 120 static char *pr_flag_names[] = { 121 [0] = "persist", 122 "host", 123 #ifdef INET 124 "ip4", 125 #endif 126 #ifdef INET6 127 [3] = "ip6", 128 #endif 129 }; 130 131 static char *pr_flag_nonames[] = { 132 [0] = "nopersist", 133 "nohost", 134 #ifdef INET 135 "noip4", 136 #endif 137 #ifdef INET6 138 [3] = "noip6", 139 #endif 140 }; 141 142 static char *pr_allow_names[] = { 143 "allow.set_hostname", 144 "allow.sysvipc", 145 "allow.raw_sockets", 146 "allow.chflags", 147 "allow.mount", 148 "allow.quotas", 149 "allow.jails", 150 "allow.socket_af", 151 }; 152 153 static char *pr_allow_nonames[] = { 154 "allow.noset_hostname", 155 "allow.nosysvipc", 156 "allow.noraw_sockets", 157 "allow.nochflags", 158 "allow.nomount", 159 "allow.noquotas", 160 "allow.nojails", 161 "allow.nosocket_af", 162 }; 163 164 #define JAIL_DEFAULT_ALLOW PR_ALLOW_SET_HOSTNAME 165 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; 166 static int jail_default_enforce_statfs = 2; 167 #if defined(INET) || defined(INET6) 168 static int jail_max_af_ips = 255; 169 #endif 170 171 #ifdef INET 172 static int 173 qcmp_v4(const void *ip1, const void *ip2) 174 { 175 in_addr_t iaa, iab; 176 177 /* 178 * We need to compare in HBO here to get the list sorted as expected 179 * by the result of the code. Sorting NBO addresses gives you 180 * interesting results. If you do not understand, do not try. 181 */ 182 iaa = ntohl(((const struct in_addr *)ip1)->s_addr); 183 iab = ntohl(((const struct in_addr *)ip2)->s_addr); 184 185 /* 186 * Do not simply return the difference of the two numbers, the int is 187 * not wide enough. 188 */ 189 if (iaa > iab) 190 return (1); 191 else if (iaa < iab) 192 return (-1); 193 else 194 return (0); 195 } 196 #endif 197 198 #ifdef INET6 199 static int 200 qcmp_v6(const void *ip1, const void *ip2) 201 { 202 const struct in6_addr *ia6a, *ia6b; 203 int i, rc; 204 205 ia6a = (const struct in6_addr *)ip1; 206 ia6b = (const struct in6_addr *)ip2; 207 208 rc = 0; 209 for (i = 0; rc == 0 && i < sizeof(struct in6_addr); i++) { 210 if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) 211 rc = 1; 212 else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) 213 rc = -1; 214 } 215 return (rc); 216 } 217 #endif 218 219 /* 220 * struct jail_args { 221 * struct jail *jail; 222 * }; 223 */ 224 int 225 jail(struct thread *td, struct jail_args *uap) 226 { 227 uint32_t version; 228 int error; 229 struct jail j; 230 231 error = copyin(uap->jail, &version, sizeof(uint32_t)); 232 if (error) 233 return (error); 234 235 switch (version) { 236 case 0: 237 { 238 struct jail_v0 j0; 239 240 /* FreeBSD single IPv4 jails. */ 241 bzero(&j, sizeof(struct jail)); 242 error = copyin(uap->jail, &j0, sizeof(struct jail_v0)); 243 if (error) 244 return (error); 245 j.version = j0.version; 246 j.path = j0.path; 247 j.hostname = j0.hostname; 248 j.ip4s = j0.ip_number; 249 break; 250 } 251 252 case 1: 253 /* 254 * Version 1 was used by multi-IPv4 jail implementations 255 * that never made it into the official kernel. 256 */ 257 return (EINVAL); 258 259 case 2: /* JAIL_API_VERSION */ 260 /* FreeBSD multi-IPv4/IPv6,noIP jails. */ 261 error = copyin(uap->jail, &j, sizeof(struct jail)); 262 if (error) 263 return (error); 264 break; 265 266 default: 267 /* Sci-Fi jails are not supported, sorry. */ 268 return (EINVAL); 269 } 270 return (kern_jail(td, &j)); 271 } 272 273 int 274 kern_jail(struct thread *td, struct jail *j) 275 { 276 struct iovec optiov[24]; 277 struct uio opt; 278 char *u_path, *u_hostname, *u_name; 279 #ifdef INET 280 int ip4s; 281 struct in_addr *u_ip4; 282 #endif 283 #ifdef INET6 284 struct in6_addr *u_ip6; 285 #endif 286 size_t tmplen; 287 int error, enforce_statfs, fi; 288 289 bzero(&optiov, sizeof(optiov)); 290 opt.uio_iov = optiov; 291 opt.uio_iovcnt = 0; 292 opt.uio_offset = -1; 293 opt.uio_resid = -1; 294 opt.uio_segflg = UIO_SYSSPACE; 295 opt.uio_rw = UIO_READ; 296 opt.uio_td = td; 297 298 /* Set permissions for top-level jails from sysctls. */ 299 if (!jailed(td->td_ucred)) { 300 for (fi = 0; fi < sizeof(pr_allow_names) / 301 sizeof(pr_allow_names[0]); fi++) { 302 optiov[opt.uio_iovcnt].iov_base = 303 (jail_default_allow & (1 << fi)) 304 ? pr_allow_names[fi] : pr_allow_nonames[fi]; 305 optiov[opt.uio_iovcnt].iov_len = 306 strlen(optiov[opt.uio_iovcnt].iov_base) + 1; 307 opt.uio_iovcnt += 2; 308 } 309 optiov[opt.uio_iovcnt].iov_base = "enforce_statfs"; 310 optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs"); 311 opt.uio_iovcnt++; 312 enforce_statfs = jail_default_enforce_statfs; 313 optiov[opt.uio_iovcnt].iov_base = &enforce_statfs; 314 optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs); 315 opt.uio_iovcnt++; 316 } 317 318 tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN; 319 #ifdef INET 320 ip4s = (j->version == 0) ? 1 : j->ip4s; 321 if (ip4s > jail_max_af_ips) 322 return (EINVAL); 323 tmplen += ip4s * sizeof(struct in_addr); 324 #else 325 if (j->ip4s > 0) 326 return (EINVAL); 327 #endif 328 #ifdef INET6 329 if (j->ip6s > jail_max_af_ips) 330 return (EINVAL); 331 tmplen += j->ip6s * sizeof(struct in6_addr); 332 #else 333 if (j->ip6s > 0) 334 return (EINVAL); 335 #endif 336 u_path = malloc(tmplen, M_TEMP, M_WAITOK); 337 u_hostname = u_path + MAXPATHLEN; 338 u_name = u_hostname + MAXHOSTNAMELEN; 339 #ifdef INET 340 u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN); 341 #endif 342 #ifdef INET6 343 #ifdef INET 344 u_ip6 = (struct in6_addr *)(u_ip4 + ip4s); 345 #else 346 u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN); 347 #endif 348 #endif 349 optiov[opt.uio_iovcnt].iov_base = "path"; 350 optiov[opt.uio_iovcnt].iov_len = sizeof("path"); 351 opt.uio_iovcnt++; 352 optiov[opt.uio_iovcnt].iov_base = u_path; 353 error = copyinstr(j->path, u_path, MAXPATHLEN, 354 &optiov[opt.uio_iovcnt].iov_len); 355 if (error) { 356 free(u_path, M_TEMP); 357 return (error); 358 } 359 opt.uio_iovcnt++; 360 optiov[opt.uio_iovcnt].iov_base = "host.hostname"; 361 optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname"); 362 opt.uio_iovcnt++; 363 optiov[opt.uio_iovcnt].iov_base = u_hostname; 364 error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN, 365 &optiov[opt.uio_iovcnt].iov_len); 366 if (error) { 367 free(u_path, M_TEMP); 368 return (error); 369 } 370 opt.uio_iovcnt++; 371 if (j->jailname != NULL) { 372 optiov[opt.uio_iovcnt].iov_base = "name"; 373 optiov[opt.uio_iovcnt].iov_len = sizeof("name"); 374 opt.uio_iovcnt++; 375 optiov[opt.uio_iovcnt].iov_base = u_name; 376 error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN, 377 &optiov[opt.uio_iovcnt].iov_len); 378 if (error) { 379 free(u_path, M_TEMP); 380 return (error); 381 } 382 opt.uio_iovcnt++; 383 } 384 #ifdef INET 385 optiov[opt.uio_iovcnt].iov_base = "ip4.addr"; 386 optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr"); 387 opt.uio_iovcnt++; 388 optiov[opt.uio_iovcnt].iov_base = u_ip4; 389 optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr); 390 if (j->version == 0) 391 u_ip4->s_addr = j->ip4s; 392 else { 393 error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len); 394 if (error) { 395 free(u_path, M_TEMP); 396 return (error); 397 } 398 } 399 opt.uio_iovcnt++; 400 #endif 401 #ifdef INET6 402 optiov[opt.uio_iovcnt].iov_base = "ip6.addr"; 403 optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr"); 404 opt.uio_iovcnt++; 405 optiov[opt.uio_iovcnt].iov_base = u_ip6; 406 optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr); 407 error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len); 408 if (error) { 409 free(u_path, M_TEMP); 410 return (error); 411 } 412 opt.uio_iovcnt++; 413 #endif 414 KASSERT(opt.uio_iovcnt <= sizeof(optiov) / sizeof(optiov[0]), 415 ("kern_jail: too many iovecs (%d)", opt.uio_iovcnt)); 416 error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH); 417 free(u_path, M_TEMP); 418 return (error); 419 } 420 421 422 /* 423 * struct jail_set_args { 424 * struct iovec *iovp; 425 * unsigned int iovcnt; 426 * int flags; 427 * }; 428 */ 429 int 430 jail_set(struct thread *td, struct jail_set_args *uap) 431 { 432 struct uio *auio; 433 int error; 434 435 /* Check that we have an even number of iovecs. */ 436 if (uap->iovcnt & 1) 437 return (EINVAL); 438 439 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 440 if (error) 441 return (error); 442 error = kern_jail_set(td, auio, uap->flags); 443 free(auio, M_IOV); 444 return (error); 445 } 446 447 int 448 kern_jail_set(struct thread *td, struct uio *optuio, int flags) 449 { 450 struct nameidata nd; 451 #ifdef INET 452 struct in_addr *ip4; 453 #endif 454 #ifdef INET6 455 struct in6_addr *ip6; 456 #endif 457 struct vfsopt *opt; 458 struct vfsoptlist *opts; 459 struct prison *pr, *deadpr, *mypr, *ppr, *tpr; 460 struct vnode *root; 461 char *domain, *errmsg, *host, *name, *p, *path, *uuid; 462 #if defined(INET) || defined(INET6) 463 void *op; 464 #endif 465 unsigned long hid; 466 size_t namelen, onamelen; 467 int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos; 468 int gotenforce, gothid, gotslevel, fi, jid, len; 469 int slevel, vfslocked; 470 #if defined(INET) || defined(INET6) 471 int ii, ij; 472 #endif 473 #ifdef INET 474 int ip4s, ip4a, redo_ip4; 475 #endif 476 #ifdef INET6 477 int ip6s, ip6a, redo_ip6; 478 #endif 479 unsigned pr_flags, ch_flags; 480 unsigned pr_allow, ch_allow, tallow; 481 char numbuf[12]; 482 483 error = priv_check(td, PRIV_JAIL_SET); 484 if (!error && (flags & JAIL_ATTACH)) 485 error = priv_check(td, PRIV_JAIL_ATTACH); 486 if (error) 487 return (error); 488 mypr = ppr = td->td_ucred->cr_prison; 489 if ((flags & JAIL_CREATE) && !(mypr->pr_allow & PR_ALLOW_JAILS)) 490 return (EPERM); 491 if (flags & ~JAIL_SET_MASK) 492 return (EINVAL); 493 494 /* 495 * Check all the parameters before committing to anything. Not all 496 * errors can be caught early, but we may as well try. Also, this 497 * takes care of some expensive stuff (path lookup) before getting 498 * the allprison lock. 499 * 500 * XXX Jails are not filesystems, and jail parameters are not mount 501 * options. But it makes more sense to re-use the vfsopt code 502 * than duplicate it under a different name. 503 */ 504 error = vfs_buildopts(optuio, &opts); 505 if (error) 506 return (error); 507 #ifdef INET 508 ip4a = 0; 509 ip4 = NULL; 510 #endif 511 #ifdef INET6 512 ip6a = 0; 513 ip6 = NULL; 514 #endif 515 516 #if defined(INET) || defined(INET6) 517 again: 518 #endif 519 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 520 if (error == ENOENT) 521 jid = 0; 522 else if (error != 0) 523 goto done_free; 524 525 error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel)); 526 if (error == ENOENT) 527 gotslevel = 0; 528 else if (error != 0) 529 goto done_free; 530 else 531 gotslevel = 1; 532 533 error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce)); 534 gotenforce = (error == 0); 535 if (gotenforce) { 536 if (enforce < 0 || enforce > 2) 537 return (EINVAL); 538 } else if (error != ENOENT) 539 goto done_free; 540 541 pr_flags = ch_flags = 0; 542 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 543 fi++) { 544 if (pr_flag_names[fi] == NULL) 545 continue; 546 vfs_flagopt(opts, pr_flag_names[fi], &pr_flags, 1 << fi); 547 vfs_flagopt(opts, pr_flag_nonames[fi], &ch_flags, 1 << fi); 548 } 549 ch_flags |= pr_flags; 550 if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE 551 && !(pr_flags & PR_PERSIST)) { 552 error = EINVAL; 553 vfs_opterror(opts, "new jail must persist or attach"); 554 goto done_errmsg; 555 } 556 557 pr_allow = ch_allow = 0; 558 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 559 fi++) { 560 vfs_flagopt(opts, pr_allow_names[fi], &pr_allow, 1 << fi); 561 vfs_flagopt(opts, pr_allow_nonames[fi], &ch_allow, 1 << fi); 562 } 563 ch_allow |= pr_allow; 564 565 error = vfs_getopt(opts, "name", (void **)&name, &len); 566 if (error == ENOENT) 567 name = NULL; 568 else if (error != 0) 569 goto done_free; 570 else { 571 if (len == 0 || name[len - 1] != '\0') { 572 error = EINVAL; 573 goto done_free; 574 } 575 if (len > MAXHOSTNAMELEN) { 576 error = ENAMETOOLONG; 577 goto done_free; 578 } 579 } 580 581 error = vfs_getopt(opts, "host.hostname", (void **)&host, &len); 582 if (error == ENOENT) 583 host = NULL; 584 else if (error != 0) 585 goto done_free; 586 else { 587 ch_flags |= PR_HOST; 588 pr_flags |= PR_HOST; 589 if (len == 0 || host[len - 1] != '\0') { 590 error = EINVAL; 591 goto done_free; 592 } 593 if (len > MAXHOSTNAMELEN) { 594 error = ENAMETOOLONG; 595 goto done_free; 596 } 597 } 598 599 error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len); 600 if (error == ENOENT) 601 domain = NULL; 602 else if (error != 0) 603 goto done_free; 604 else { 605 ch_flags |= PR_HOST; 606 pr_flags |= PR_HOST; 607 if (len == 0 || domain[len - 1] != '\0') { 608 error = EINVAL; 609 goto done_free; 610 } 611 if (len > MAXHOSTNAMELEN) { 612 error = ENAMETOOLONG; 613 goto done_free; 614 } 615 } 616 617 error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len); 618 if (error == ENOENT) 619 uuid = NULL; 620 else if (error != 0) 621 goto done_free; 622 else { 623 ch_flags |= PR_HOST; 624 pr_flags |= PR_HOST; 625 if (len == 0 || uuid[len - 1] != '\0') { 626 error = EINVAL; 627 goto done_free; 628 } 629 if (len > HOSTUUIDLEN) { 630 error = ENAMETOOLONG; 631 goto done_free; 632 } 633 } 634 635 #ifdef COMPAT_IA32 636 if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 637 uint32_t hid32; 638 639 error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32)); 640 hid = hid32; 641 } else 642 #endif 643 error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid)); 644 if (error == ENOENT) 645 gothid = 0; 646 else if (error != 0) 647 goto done_free; 648 else { 649 gothid = 1; 650 ch_flags |= PR_HOST; 651 pr_flags |= PR_HOST; 652 } 653 654 /* This might be the second time around for this option. */ 655 #ifdef INET 656 error = vfs_getopt(opts, "ip4.addr", &op, &ip4s); 657 if (error == ENOENT) 658 ip4s = -1; 659 else if (error != 0) 660 goto done_free; 661 else if (ip4s & (sizeof(*ip4) - 1)) { 662 error = EINVAL; 663 goto done_free; 664 } else { 665 ch_flags |= PR_IP4_USER; 666 pr_flags |= PR_IP4_USER; 667 if (ip4s > 0) { 668 ip4s /= sizeof(*ip4); 669 if (ip4s > jail_max_af_ips) { 670 error = EINVAL; 671 vfs_opterror(opts, "too many IPv4 addresses"); 672 goto done_errmsg; 673 } 674 if (ip4a < ip4s) { 675 ip4a = ip4s; 676 free(ip4, M_PRISON); 677 ip4 = NULL; 678 } 679 if (ip4 == NULL) 680 ip4 = malloc(ip4a * sizeof(*ip4), M_PRISON, 681 M_WAITOK); 682 bcopy(op, ip4, ip4s * sizeof(*ip4)); 683 /* 684 * IP addresses are all sorted but ip[0] to preserve 685 * the primary IP address as given from userland. 686 * This special IP is used for unbound outgoing 687 * connections as well for "loopback" traffic. 688 */ 689 if (ip4s > 1) 690 qsort(ip4 + 1, ip4s - 1, sizeof(*ip4), qcmp_v4); 691 /* 692 * Check for duplicate addresses and do some simple 693 * zero and broadcast checks. If users give other bogus 694 * addresses it is their problem. 695 * 696 * We do not have to care about byte order for these 697 * checks so we will do them in NBO. 698 */ 699 for (ii = 0; ii < ip4s; ii++) { 700 if (ip4[ii].s_addr == INADDR_ANY || 701 ip4[ii].s_addr == INADDR_BROADCAST) { 702 error = EINVAL; 703 goto done_free; 704 } 705 if ((ii+1) < ip4s && 706 (ip4[0].s_addr == ip4[ii+1].s_addr || 707 ip4[ii].s_addr == ip4[ii+1].s_addr)) { 708 error = EINVAL; 709 goto done_free; 710 } 711 } 712 } 713 } 714 #endif 715 716 #ifdef INET6 717 error = vfs_getopt(opts, "ip6.addr", &op, &ip6s); 718 if (error == ENOENT) 719 ip6s = -1; 720 else if (error != 0) 721 goto done_free; 722 else if (ip6s & (sizeof(*ip6) - 1)) { 723 error = EINVAL; 724 goto done_free; 725 } else { 726 ch_flags |= PR_IP6_USER; 727 pr_flags |= PR_IP6_USER; 728 if (ip6s > 0) { 729 ip6s /= sizeof(*ip6); 730 if (ip6s > jail_max_af_ips) { 731 error = EINVAL; 732 vfs_opterror(opts, "too many IPv6 addresses"); 733 goto done_errmsg; 734 } 735 if (ip6a < ip6s) { 736 ip6a = ip6s; 737 free(ip6, M_PRISON); 738 ip6 = NULL; 739 } 740 if (ip6 == NULL) 741 ip6 = malloc(ip6a * sizeof(*ip6), M_PRISON, 742 M_WAITOK); 743 bcopy(op, ip6, ip6s * sizeof(*ip6)); 744 if (ip6s > 1) 745 qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), qcmp_v6); 746 for (ii = 0; ii < ip6s; ii++) { 747 if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) { 748 error = EINVAL; 749 goto done_free; 750 } 751 if ((ii+1) < ip6s && 752 (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) || 753 IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1]))) 754 { 755 error = EINVAL; 756 goto done_free; 757 } 758 } 759 } 760 } 761 #endif 762 763 root = NULL; 764 error = vfs_getopt(opts, "path", (void **)&path, &len); 765 if (error == ENOENT) 766 path = NULL; 767 else if (error != 0) 768 goto done_free; 769 else { 770 if (flags & JAIL_UPDATE) { 771 error = EINVAL; 772 vfs_opterror(opts, 773 "path cannot be changed after creation"); 774 goto done_errmsg; 775 } 776 if (len == 0 || path[len - 1] != '\0') { 777 error = EINVAL; 778 goto done_free; 779 } 780 if (len < 2 || (len == 2 && path[0] == '/')) 781 path = NULL; 782 else { 783 /* Leave room for a real-root full pathname. */ 784 if (len + (path[0] == '/' && strcmp(mypr->pr_path, "/") 785 ? strlen(mypr->pr_path) : 0) > MAXPATHLEN) { 786 error = ENAMETOOLONG; 787 goto done_free; 788 } 789 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_SYSSPACE, 790 path, td); 791 error = namei(&nd); 792 if (error) 793 goto done_free; 794 vfslocked = NDHASGIANT(&nd); 795 root = nd.ni_vp; 796 NDFREE(&nd, NDF_ONLY_PNBUF); 797 if (root->v_type != VDIR) { 798 error = ENOTDIR; 799 vrele(root); 800 VFS_UNLOCK_GIANT(vfslocked); 801 goto done_free; 802 } 803 VFS_UNLOCK_GIANT(vfslocked); 804 } 805 } 806 807 /* 808 * Grab the allprison lock before letting modules check their 809 * parameters. Once we have it, do not let go so we'll have a 810 * consistent view of the OSD list. 811 */ 812 sx_xlock(&allprison_lock); 813 error = osd_jail_call(NULL, PR_METHOD_CHECK, opts); 814 if (error) 815 goto done_unlock_list; 816 817 /* By now, all parameters should have been noted. */ 818 TAILQ_FOREACH(opt, opts, link) { 819 if (!opt->seen && strcmp(opt->name, "errmsg")) { 820 error = EINVAL; 821 vfs_opterror(opts, "unknown parameter: %s", opt->name); 822 goto done_unlock_list; 823 } 824 } 825 826 /* 827 * See if we are creating a new record or updating an existing one. 828 * This abuses the file error codes ENOENT and EEXIST. 829 */ 830 cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); 831 if (!cuflags) { 832 error = EINVAL; 833 vfs_opterror(opts, "no valid operation (create or update)"); 834 goto done_unlock_list; 835 } 836 pr = NULL; 837 if (jid != 0) { 838 /* 839 * See if a requested jid already exists. There is an 840 * information leak here if the jid exists but is not within 841 * the caller's jail hierarchy. Jail creators will get EEXIST 842 * even though they cannot see the jail, and CREATE | UPDATE 843 * will return ENOENT which is not normally a valid error. 844 */ 845 if (jid < 0) { 846 error = EINVAL; 847 vfs_opterror(opts, "negative jid"); 848 goto done_unlock_list; 849 } 850 pr = prison_find(jid); 851 if (pr != NULL) { 852 ppr = pr->pr_parent; 853 /* Create: jid must not exist. */ 854 if (cuflags == JAIL_CREATE) { 855 mtx_unlock(&pr->pr_mtx); 856 error = EEXIST; 857 vfs_opterror(opts, "jail %d already exists", 858 jid); 859 goto done_unlock_list; 860 } 861 if (!prison_ischild(mypr, pr)) { 862 mtx_unlock(&pr->pr_mtx); 863 pr = NULL; 864 } else if (pr->pr_uref == 0) { 865 if (!(flags & JAIL_DYING)) { 866 mtx_unlock(&pr->pr_mtx); 867 error = ENOENT; 868 vfs_opterror(opts, "jail %d is dying", 869 jid); 870 goto done_unlock_list; 871 } else if ((flags & JAIL_ATTACH) || 872 (pr_flags & PR_PERSIST)) { 873 /* 874 * A dying jail might be resurrected 875 * (via attach or persist), but first 876 * it must determine if another jail 877 * has claimed its name. Accomplish 878 * this by implicitly re-setting the 879 * name. 880 */ 881 if (name == NULL) 882 name = prison_name(mypr, pr); 883 } 884 } 885 } 886 if (pr == NULL) { 887 /* Update: jid must exist. */ 888 if (cuflags == JAIL_UPDATE) { 889 error = ENOENT; 890 vfs_opterror(opts, "jail %d not found", jid); 891 goto done_unlock_list; 892 } 893 } 894 } 895 /* 896 * If the caller provided a name, look for a jail by that name. 897 * This has different semantics for creates and updates keyed by jid 898 * (where the name must not already exist in a different jail), 899 * and updates keyed by the name itself (where the name must exist 900 * because that is the jail being updated). 901 */ 902 if (name != NULL) { 903 p = strrchr(name, '.'); 904 if (p != NULL) { 905 /* 906 * This is a hierarchical name. Split it into the 907 * parent and child names, and make sure the parent 908 * exists or matches an already found jail. 909 */ 910 *p = '\0'; 911 if (pr != NULL) { 912 if (strncmp(name, ppr->pr_name, p - name) || 913 ppr->pr_name[p - name] != '\0') { 914 mtx_unlock(&pr->pr_mtx); 915 error = EINVAL; 916 vfs_opterror(opts, 917 "cannot change jail's parent"); 918 goto done_unlock_list; 919 } 920 } else { 921 ppr = prison_find_name(mypr, name); 922 if (ppr == NULL) { 923 error = ENOENT; 924 vfs_opterror(opts, 925 "jail \"%s\" not found", name); 926 goto done_unlock_list; 927 } 928 mtx_unlock(&ppr->pr_mtx); 929 } 930 name = p + 1; 931 } 932 if (name[0] != '\0') { 933 namelen = 934 (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1; 935 name_again: 936 deadpr = NULL; 937 FOREACH_PRISON_CHILD(ppr, tpr) { 938 if (tpr != pr && tpr->pr_ref > 0 && 939 !strcmp(tpr->pr_name + namelen, name)) { 940 if (pr == NULL && 941 cuflags != JAIL_CREATE) { 942 mtx_lock(&tpr->pr_mtx); 943 if (tpr->pr_ref > 0) { 944 /* 945 * Use this jail 946 * for updates. 947 */ 948 if (tpr->pr_uref > 0) { 949 pr = tpr; 950 break; 951 } 952 deadpr = tpr; 953 } 954 mtx_unlock(&tpr->pr_mtx); 955 } else if (tpr->pr_uref > 0) { 956 /* 957 * Create, or update(jid): 958 * name must not exist in an 959 * active sibling jail. 960 */ 961 error = EEXIST; 962 if (pr != NULL) 963 mtx_unlock(&pr->pr_mtx); 964 vfs_opterror(opts, 965 "jail \"%s\" already exists", 966 name); 967 goto done_unlock_list; 968 } 969 } 970 } 971 /* If no active jail is found, use a dying one. */ 972 if (deadpr != NULL && pr == NULL) { 973 if (flags & JAIL_DYING) { 974 mtx_lock(&deadpr->pr_mtx); 975 if (deadpr->pr_ref == 0) { 976 mtx_unlock(&deadpr->pr_mtx); 977 goto name_again; 978 } 979 pr = deadpr; 980 } else if (cuflags == JAIL_UPDATE) { 981 error = ENOENT; 982 vfs_opterror(opts, 983 "jail \"%s\" is dying", name); 984 goto done_unlock_list; 985 } 986 } 987 /* Update: name must exist if no jid. */ 988 else if (cuflags == JAIL_UPDATE && pr == NULL) { 989 error = ENOENT; 990 vfs_opterror(opts, "jail \"%s\" not found", 991 name); 992 goto done_unlock_list; 993 } 994 } 995 } 996 /* Update: must provide a jid or name. */ 997 else if (cuflags == JAIL_UPDATE && pr == NULL) { 998 error = ENOENT; 999 vfs_opterror(opts, "update specified no jail"); 1000 goto done_unlock_list; 1001 } 1002 1003 /* If there's no prison to update, create a new one and link it in. */ 1004 if (pr == NULL) { 1005 created = 1; 1006 mtx_lock(&ppr->pr_mtx); 1007 if (ppr->pr_ref == 0 || (ppr->pr_flags & PR_REMOVE)) { 1008 mtx_unlock(&ppr->pr_mtx); 1009 error = ENOENT; 1010 vfs_opterror(opts, "parent jail went away!"); 1011 goto done_unlock_list; 1012 } 1013 ppr->pr_ref++; 1014 ppr->pr_uref++; 1015 mtx_unlock(&ppr->pr_mtx); 1016 pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); 1017 if (jid == 0) { 1018 /* Find the next free jid. */ 1019 jid = lastprid + 1; 1020 findnext: 1021 if (jid == JAIL_MAX) 1022 jid = 1; 1023 TAILQ_FOREACH(tpr, &allprison, pr_list) { 1024 if (tpr->pr_id < jid) 1025 continue; 1026 if (tpr->pr_id > jid || tpr->pr_ref == 0) { 1027 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1028 break; 1029 } 1030 if (jid == lastprid) { 1031 error = EAGAIN; 1032 vfs_opterror(opts, 1033 "no available jail IDs"); 1034 free(pr, M_PRISON); 1035 prison_deref(ppr, PD_DEREF | 1036 PD_DEUREF | PD_LIST_XLOCKED); 1037 goto done_releroot; 1038 } 1039 jid++; 1040 goto findnext; 1041 } 1042 lastprid = jid; 1043 } else { 1044 /* 1045 * The jail already has a jid (that did not yet exist), 1046 * so just find where to insert it. 1047 */ 1048 TAILQ_FOREACH(tpr, &allprison, pr_list) 1049 if (tpr->pr_id >= jid) { 1050 TAILQ_INSERT_BEFORE(tpr, pr, pr_list); 1051 break; 1052 } 1053 } 1054 if (tpr == NULL) 1055 TAILQ_INSERT_TAIL(&allprison, pr, pr_list); 1056 LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling); 1057 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 1058 tpr->pr_prisoncount++; 1059 1060 pr->pr_parent = ppr; 1061 pr->pr_id = jid; 1062 1063 /* Set some default values, and inherit some from the parent. */ 1064 if (name == NULL) 1065 name = ""; 1066 if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1067 if (host == NULL) 1068 host = ppr->pr_host; 1069 if (domain == NULL) 1070 domain = ppr->pr_domain; 1071 if (uuid == NULL) 1072 uuid = ppr->pr_uuid; 1073 if (!gothid) 1074 hid = ppr->pr_hostid; 1075 } 1076 if (path == NULL) { 1077 path = "/"; 1078 root = mypr->pr_root; 1079 vref(root); 1080 } 1081 #ifdef INET 1082 pr->pr_flags |= ppr->pr_flags & PR_IP4; 1083 pr->pr_ip4s = ppr->pr_ip4s; 1084 if (ppr->pr_ip4 != NULL) { 1085 pr->pr_ip4 = malloc(pr->pr_ip4s * 1086 sizeof(struct in_addr), M_PRISON, M_WAITOK); 1087 bcopy(ppr->pr_ip4, pr->pr_ip4, 1088 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1089 } 1090 #endif 1091 #ifdef INET6 1092 pr->pr_flags |= ppr->pr_flags & PR_IP6; 1093 pr->pr_ip6s = ppr->pr_ip6s; 1094 if (ppr->pr_ip6 != NULL) { 1095 pr->pr_ip6 = malloc(pr->pr_ip6s * 1096 sizeof(struct in6_addr), M_PRISON, M_WAITOK); 1097 bcopy(ppr->pr_ip6, pr->pr_ip6, 1098 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1099 } 1100 #endif 1101 pr->pr_securelevel = ppr->pr_securelevel; 1102 pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow; 1103 pr->pr_enforce_statfs = ppr->pr_enforce_statfs; 1104 1105 LIST_INIT(&pr->pr_children); 1106 mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK); 1107 1108 /* 1109 * Allocate a dedicated cpuset for each jail. 1110 * Unlike other initial settings, this may return an erorr. 1111 */ 1112 error = cpuset_create_root(ppr, &pr->pr_cpuset); 1113 if (error) { 1114 prison_deref(pr, PD_LIST_XLOCKED); 1115 goto done_releroot; 1116 } 1117 1118 mtx_lock(&pr->pr_mtx); 1119 /* 1120 * New prisons do not yet have a reference, because we do not 1121 * want other to see the incomplete prison once the 1122 * allprison_lock is downgraded. 1123 */ 1124 } else { 1125 created = 0; 1126 /* 1127 * Grab a reference for existing prisons, to ensure they 1128 * continue to exist for the duration of the call. 1129 */ 1130 pr->pr_ref++; 1131 } 1132 1133 /* Do final error checking before setting anything. */ 1134 if (gotslevel) { 1135 if (slevel < ppr->pr_securelevel) { 1136 error = EPERM; 1137 goto done_deref_locked; 1138 } 1139 } 1140 if (gotenforce) { 1141 if (enforce < ppr->pr_enforce_statfs) { 1142 error = EPERM; 1143 goto done_deref_locked; 1144 } 1145 } 1146 #ifdef INET 1147 if (ch_flags & PR_IP4_USER) { 1148 if (ppr->pr_flags & PR_IP4) { 1149 if (!(pr_flags & PR_IP4_USER)) { 1150 /* 1151 * Silently ignore attempts to make the IP 1152 * addresses unrestricted when the parent is 1153 * restricted; in other words, interpret 1154 * "unrestricted" as "as unrestricted as 1155 * possible". 1156 */ 1157 ip4s = ppr->pr_ip4s; 1158 if (ip4s == 0) { 1159 free(ip4, M_PRISON); 1160 ip4 = NULL; 1161 } else if (ip4s <= ip4a) { 1162 /* Inherit the parent's address(es). */ 1163 bcopy(ppr->pr_ip4, ip4, 1164 ip4s * sizeof(*ip4)); 1165 } else { 1166 /* 1167 * There's no room for the parent's 1168 * address list. Allocate some more. 1169 */ 1170 ip4a = ip4s; 1171 free(ip4, M_PRISON); 1172 ip4 = malloc(ip4a * sizeof(*ip4), 1173 M_PRISON, M_NOWAIT); 1174 if (ip4 != NULL) 1175 bcopy(ppr->pr_ip4, ip4, 1176 ip4s * sizeof(*ip4)); 1177 else { 1178 /* Allocation failed without 1179 * sleeping. Unlocking the 1180 * prison now will invalidate 1181 * some checks and prematurely 1182 * show an unfinished new jail. 1183 * So let go of everything and 1184 * start over. 1185 */ 1186 prison_deref(pr, created 1187 ? PD_LOCKED | 1188 PD_LIST_XLOCKED 1189 : PD_DEREF | PD_LOCKED | 1190 PD_LIST_XLOCKED); 1191 if (root != NULL) { 1192 vfslocked = 1193 VFS_LOCK_GIANT( 1194 root->v_mount); 1195 vrele(root); 1196 VFS_UNLOCK_GIANT( 1197 vfslocked); 1198 } 1199 ip4 = malloc(ip4a * 1200 sizeof(*ip4), M_PRISON, 1201 M_WAITOK); 1202 goto again; 1203 } 1204 } 1205 } else if (ip4s > 0) { 1206 /* 1207 * Make sure the new set of IP addresses is a 1208 * subset of the parent's list. Don't worry 1209 * about the parent being unlocked, as any 1210 * setting is done with allprison_lock held. 1211 */ 1212 for (ij = 0; ij < ppr->pr_ip4s; ij++) 1213 if (ip4[0].s_addr == 1214 ppr->pr_ip4[ij].s_addr) 1215 break; 1216 if (ij == ppr->pr_ip4s) { 1217 error = EPERM; 1218 goto done_deref_locked; 1219 } 1220 if (ip4s > 1) { 1221 for (ii = ij = 1; ii < ip4s; ii++) { 1222 if (ip4[ii].s_addr == 1223 ppr->pr_ip4[0].s_addr) 1224 continue; 1225 for (; ij < ppr->pr_ip4s; ij++) 1226 if (ip4[ii].s_addr == 1227 ppr->pr_ip4[ij].s_addr) 1228 break; 1229 if (ij == ppr->pr_ip4s) 1230 break; 1231 } 1232 if (ij == ppr->pr_ip4s) { 1233 error = EPERM; 1234 goto done_deref_locked; 1235 } 1236 } 1237 } 1238 } 1239 if (ip4s > 0) { 1240 /* 1241 * Check for conflicting IP addresses. We permit them 1242 * if there is no more than one IP on each jail. If 1243 * there is a duplicate on a jail with more than one 1244 * IP stop checking and return error. 1245 */ 1246 FOREACH_PRISON_DESCENDANT(&prison0, tpr, descend) { 1247 if (tpr == pr || tpr->pr_uref == 0) { 1248 descend = 0; 1249 continue; 1250 } 1251 if (!(tpr->pr_flags & PR_IP4_USER)) 1252 continue; 1253 descend = 0; 1254 if (tpr->pr_ip4 == NULL || 1255 (ip4s == 1 && tpr->pr_ip4s == 1)) 1256 continue; 1257 for (ii = 0; ii < ip4s; ii++) { 1258 if (_prison_check_ip4(tpr, 1259 &ip4[ii]) == 0) { 1260 error = EADDRINUSE; 1261 vfs_opterror(opts, 1262 "IPv4 addresses clash"); 1263 goto done_deref_locked; 1264 } 1265 } 1266 } 1267 } 1268 } 1269 #endif 1270 #ifdef INET6 1271 if (ch_flags & PR_IP6_USER) { 1272 if (ppr->pr_flags & PR_IP6) { 1273 if (!(pr_flags & PR_IP6_USER)) { 1274 /* 1275 * Silently ignore attempts to make the IP 1276 * addresses unrestricted when the parent is 1277 * restricted. 1278 */ 1279 ip6s = ppr->pr_ip6s; 1280 if (ip6s == 0) { 1281 free(ip6, M_PRISON); 1282 ip6 = NULL; 1283 } else if (ip6s <= ip6a) { 1284 /* Inherit the parent's address(es). */ 1285 bcopy(ppr->pr_ip6, ip6, 1286 ip6s * sizeof(*ip6)); 1287 } else { 1288 /* 1289 * There's no room for the parent's 1290 * address list. 1291 */ 1292 ip6a = ip6s; 1293 free(ip6, M_PRISON); 1294 ip6 = malloc(ip6a * sizeof(*ip6), 1295 M_PRISON, M_NOWAIT); 1296 if (ip6 != NULL) 1297 bcopy(ppr->pr_ip6, ip6, 1298 ip6s * sizeof(*ip6)); 1299 else { 1300 prison_deref(pr, created 1301 ? PD_LOCKED | 1302 PD_LIST_XLOCKED 1303 : PD_DEREF | PD_LOCKED | 1304 PD_LIST_XLOCKED); 1305 if (root != NULL) { 1306 vfslocked = 1307 VFS_LOCK_GIANT( 1308 root->v_mount); 1309 vrele(root); 1310 VFS_UNLOCK_GIANT( 1311 vfslocked); 1312 } 1313 ip6 = malloc(ip6a * 1314 sizeof(*ip6), M_PRISON, 1315 M_WAITOK); 1316 goto again; 1317 } 1318 } 1319 } else if (ip6s > 0) { 1320 /* 1321 * Make sure the new set of IP addresses is a 1322 * subset of the parent's list. 1323 */ 1324 for (ij = 0; ij < ppr->pr_ip6s; ij++) 1325 if (IN6_ARE_ADDR_EQUAL(&ip6[0], 1326 &ppr->pr_ip6[ij])) 1327 break; 1328 if (ij == ppr->pr_ip6s) { 1329 error = EPERM; 1330 goto done_deref_locked; 1331 } 1332 if (ip6s > 1) { 1333 for (ii = ij = 1; ii < ip6s; ii++) { 1334 if (IN6_ARE_ADDR_EQUAL(&ip6[ii], 1335 &ppr->pr_ip6[0])) 1336 continue; 1337 for (; ij < ppr->pr_ip6s; ij++) 1338 if (IN6_ARE_ADDR_EQUAL( 1339 &ip6[ii], 1340 &ppr->pr_ip6[ij])) 1341 break; 1342 if (ij == ppr->pr_ip6s) 1343 break; 1344 } 1345 if (ij == ppr->pr_ip6s) { 1346 error = EPERM; 1347 goto done_deref_locked; 1348 } 1349 } 1350 } 1351 } 1352 if (ip6s > 0) { 1353 /* Check for conflicting IP addresses. */ 1354 FOREACH_PRISON_DESCENDANT(&prison0, tpr, descend) { 1355 if (tpr == pr || tpr->pr_uref == 0) { 1356 descend = 0; 1357 continue; 1358 } 1359 if (!(tpr->pr_flags & PR_IP6_USER)) 1360 continue; 1361 descend = 0; 1362 if (tpr->pr_ip6 == NULL || 1363 (ip6s == 1 && tpr->pr_ip6s == 1)) 1364 continue; 1365 for (ii = 0; ii < ip6s; ii++) { 1366 if (_prison_check_ip6(tpr, 1367 &ip6[ii]) == 0) { 1368 error = EADDRINUSE; 1369 vfs_opterror(opts, 1370 "IPv6 addresses clash"); 1371 goto done_deref_locked; 1372 } 1373 } 1374 } 1375 } 1376 } 1377 #endif 1378 onamelen = namelen = 0; 1379 if (name != NULL) { 1380 /* Give a default name of the jid. */ 1381 if (name[0] == '\0') 1382 snprintf(name = numbuf, sizeof(numbuf), "%d", jid); 1383 else if (strtoul(name, &p, 10) != jid && *p == '\0') { 1384 error = EINVAL; 1385 vfs_opterror(opts, "name cannot be numeric"); 1386 goto done_deref_locked; 1387 } 1388 /* 1389 * Make sure the name isn't too long for the prison or its 1390 * children. 1391 */ 1392 onamelen = strlen(pr->pr_name); 1393 namelen = strlen(name); 1394 if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) { 1395 error = ENAMETOOLONG; 1396 goto done_deref_locked; 1397 } 1398 FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { 1399 if (strlen(tpr->pr_name) + (namelen - onamelen) >= 1400 sizeof(pr->pr_name)) { 1401 error = ENAMETOOLONG; 1402 goto done_deref_locked; 1403 } 1404 } 1405 } 1406 if (pr_allow & ~ppr->pr_allow) { 1407 error = EPERM; 1408 goto done_deref_locked; 1409 } 1410 1411 /* Set the parameters of the prison. */ 1412 #ifdef INET 1413 redo_ip4 = 0; 1414 if (ch_flags & PR_IP4_USER) { 1415 if (pr_flags & PR_IP4_USER) { 1416 /* Some restriction set. */ 1417 pr->pr_flags |= PR_IP4; 1418 if (ip4s >= 0) { 1419 free(pr->pr_ip4, M_PRISON); 1420 pr->pr_ip4s = ip4s; 1421 pr->pr_ip4 = ip4; 1422 ip4 = NULL; 1423 } 1424 } else if (ppr->pr_flags & PR_IP4) { 1425 /* This restriction cleared, but keep inherited. */ 1426 free(pr->pr_ip4, M_PRISON); 1427 pr->pr_ip4s = ip4s; 1428 pr->pr_ip4 = ip4; 1429 ip4 = NULL; 1430 } else { 1431 /* Restriction cleared, now unrestricted. */ 1432 pr->pr_flags &= ~PR_IP4; 1433 free(pr->pr_ip4, M_PRISON); 1434 pr->pr_ip4s = 0; 1435 } 1436 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1437 if (prison_restrict_ip4(tpr, NULL)) { 1438 redo_ip4 = 1; 1439 descend = 0; 1440 } 1441 } 1442 } 1443 #endif 1444 #ifdef INET6 1445 redo_ip6 = 0; 1446 if (ch_flags & PR_IP6_USER) { 1447 if (pr_flags & PR_IP6_USER) { 1448 /* Some restriction set. */ 1449 pr->pr_flags |= PR_IP6; 1450 if (ip6s >= 0) { 1451 free(pr->pr_ip6, M_PRISON); 1452 pr->pr_ip6s = ip6s; 1453 pr->pr_ip6 = ip6; 1454 ip6 = NULL; 1455 } 1456 } else if (ppr->pr_flags & PR_IP6) { 1457 /* This restriction cleared, but keep inherited. */ 1458 free(pr->pr_ip6, M_PRISON); 1459 pr->pr_ip6s = ip6s; 1460 pr->pr_ip6 = ip6; 1461 ip6 = NULL; 1462 } else { 1463 /* Restriction cleared, now unrestricted. */ 1464 pr->pr_flags &= ~PR_IP6; 1465 free(pr->pr_ip6, M_PRISON); 1466 pr->pr_ip6s = 0; 1467 } 1468 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1469 if (prison_restrict_ip6(tpr, NULL)) { 1470 redo_ip6 = 1; 1471 descend = 0; 1472 } 1473 } 1474 } 1475 #endif 1476 if (gotslevel) { 1477 pr->pr_securelevel = slevel; 1478 /* Set all child jails to be at least this level. */ 1479 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1480 if (tpr->pr_securelevel < slevel) 1481 tpr->pr_securelevel = slevel; 1482 } 1483 if (gotenforce) { 1484 pr->pr_enforce_statfs = enforce; 1485 /* Pass this restriction on to the children. */ 1486 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1487 if (tpr->pr_enforce_statfs < enforce) 1488 tpr->pr_enforce_statfs = enforce; 1489 } 1490 if (name != NULL) { 1491 if (ppr == &prison0) 1492 strlcpy(pr->pr_name, name, sizeof(pr->pr_name)); 1493 else 1494 snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s", 1495 ppr->pr_name, name); 1496 /* Change this component of child names. */ 1497 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1498 bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen, 1499 strlen(tpr->pr_name + onamelen) + 1); 1500 bcopy(pr->pr_name, tpr->pr_name, namelen); 1501 } 1502 } 1503 if (path != NULL) { 1504 /* Try to keep a real-rooted full pathname. */ 1505 if (path[0] == '/' && strcmp(mypr->pr_path, "/")) 1506 snprintf(pr->pr_path, sizeof(pr->pr_path), "%s%s", 1507 mypr->pr_path, path); 1508 else 1509 strlcpy(pr->pr_path, path, sizeof(pr->pr_path)); 1510 pr->pr_root = root; 1511 } 1512 if (PR_HOST & ch_flags & ~pr_flags) { 1513 if (pr->pr_flags & PR_HOST) { 1514 /* 1515 * Copy the parent's host info. As with pr_ip4 above, 1516 * the lack of a lock on the parent is not a problem; 1517 * it is always set with allprison_lock at least 1518 * shared, and is held exclusively here. 1519 */ 1520 strlcpy(pr->pr_host, pr->pr_parent->pr_host, 1521 sizeof(pr->pr_host)); 1522 strlcpy(pr->pr_domain, pr->pr_parent->pr_domain, 1523 sizeof(pr->pr_domain)); 1524 strlcpy(pr->pr_uuid, pr->pr_parent->pr_uuid, 1525 sizeof(pr->pr_uuid)); 1526 pr->pr_hostid = pr->pr_parent->pr_hostid; 1527 } 1528 } else if (host != NULL || domain != NULL || uuid != NULL || gothid) { 1529 /* Set this prison, and any descendants without PR_HOST. */ 1530 if (host != NULL) 1531 strlcpy(pr->pr_host, host, sizeof(pr->pr_host)); 1532 if (domain != NULL) 1533 strlcpy(pr->pr_domain, domain, sizeof(pr->pr_domain)); 1534 if (uuid != NULL) 1535 strlcpy(pr->pr_uuid, uuid, sizeof(pr->pr_uuid)); 1536 if (gothid) 1537 pr->pr_hostid = hid; 1538 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1539 if (tpr->pr_flags & PR_HOST) 1540 descend = 0; 1541 else { 1542 if (host != NULL) 1543 strlcpy(tpr->pr_host, pr->pr_host, 1544 sizeof(tpr->pr_host)); 1545 if (domain != NULL) 1546 strlcpy(tpr->pr_domain, pr->pr_domain, 1547 sizeof(tpr->pr_domain)); 1548 if (uuid != NULL) 1549 strlcpy(tpr->pr_uuid, pr->pr_uuid, 1550 sizeof(tpr->pr_uuid)); 1551 if (gothid) 1552 tpr->pr_hostid = hid; 1553 } 1554 } 1555 } 1556 if ((tallow = ch_allow & ~pr_allow)) { 1557 /* Clear allow bits in all children. */ 1558 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) 1559 tpr->pr_allow &= ~tallow; 1560 } 1561 pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow; 1562 /* 1563 * Persistent prisons get an extra reference, and prisons losing their 1564 * persist flag lose that reference. Only do this for existing prisons 1565 * for now, so new ones will remain unseen until after the module 1566 * handlers have completed. 1567 */ 1568 if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) { 1569 if (pr_flags & PR_PERSIST) { 1570 pr->pr_ref++; 1571 pr->pr_uref++; 1572 } else { 1573 pr->pr_ref--; 1574 pr->pr_uref--; 1575 } 1576 } 1577 pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags; 1578 mtx_unlock(&pr->pr_mtx); 1579 1580 /* Locks may have prevented a complete restriction of child IP 1581 * addresses. If so, allocate some more memory and try again. 1582 */ 1583 #ifdef INET 1584 while (redo_ip4) { 1585 ip4s = pr->pr_ip4s; 1586 ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); 1587 mtx_lock(&pr->pr_mtx); 1588 redo_ip4 = 0; 1589 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1590 if (prison_restrict_ip4(tpr, ip4)) { 1591 if (ip4 != NULL) 1592 ip4 = NULL; 1593 else 1594 redo_ip4 = 1; 1595 } 1596 } 1597 mtx_unlock(&pr->pr_mtx); 1598 } 1599 #endif 1600 #ifdef INET6 1601 while (redo_ip6) { 1602 ip6s = pr->pr_ip6s; 1603 ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); 1604 mtx_lock(&pr->pr_mtx); 1605 redo_ip6 = 0; 1606 FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { 1607 if (prison_restrict_ip6(tpr, ip6)) { 1608 if (ip6 != NULL) 1609 ip6 = NULL; 1610 else 1611 redo_ip6 = 1; 1612 } 1613 } 1614 mtx_unlock(&pr->pr_mtx); 1615 } 1616 #endif 1617 1618 /* Let the modules do their work. */ 1619 sx_downgrade(&allprison_lock); 1620 if (created) { 1621 error = osd_jail_call(pr, PR_METHOD_CREATE, opts); 1622 if (error) { 1623 prison_deref(pr, PD_LIST_SLOCKED); 1624 goto done_errmsg; 1625 } 1626 } 1627 error = osd_jail_call(pr, PR_METHOD_SET, opts); 1628 if (error) { 1629 prison_deref(pr, created 1630 ? PD_LIST_SLOCKED 1631 : PD_DEREF | PD_LIST_SLOCKED); 1632 goto done_errmsg; 1633 } 1634 1635 /* Attach this process to the prison if requested. */ 1636 if (flags & JAIL_ATTACH) { 1637 mtx_lock(&pr->pr_mtx); 1638 error = do_jail_attach(td, pr); 1639 if (error) { 1640 vfs_opterror(opts, "attach failed"); 1641 if (!created) 1642 prison_deref(pr, PD_DEREF); 1643 goto done_errmsg; 1644 } 1645 } 1646 1647 /* 1648 * Now that it is all there, drop the temporary reference from existing 1649 * prisons. Or add a reference to newly created persistent prisons 1650 * (which was not done earlier so that the prison would not be publicly 1651 * visible). 1652 */ 1653 if (!created) { 1654 prison_deref(pr, (flags & JAIL_ATTACH) 1655 ? PD_DEREF 1656 : PD_DEREF | PD_LIST_SLOCKED); 1657 } else { 1658 if (pr_flags & PR_PERSIST) { 1659 mtx_lock(&pr->pr_mtx); 1660 pr->pr_ref++; 1661 pr->pr_uref++; 1662 mtx_unlock(&pr->pr_mtx); 1663 } 1664 if (!(flags & JAIL_ATTACH)) 1665 sx_sunlock(&allprison_lock); 1666 } 1667 td->td_retval[0] = pr->pr_id; 1668 goto done_errmsg; 1669 1670 done_deref_locked: 1671 prison_deref(pr, created 1672 ? PD_LOCKED | PD_LIST_XLOCKED 1673 : PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 1674 goto done_releroot; 1675 done_unlock_list: 1676 sx_xunlock(&allprison_lock); 1677 done_releroot: 1678 if (root != NULL) { 1679 vfslocked = VFS_LOCK_GIANT(root->v_mount); 1680 vrele(root); 1681 VFS_UNLOCK_GIANT(vfslocked); 1682 } 1683 done_errmsg: 1684 if (error) { 1685 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1686 if (errmsg_len > 0) { 1687 errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1; 1688 if (errmsg_pos > 0) { 1689 if (optuio->uio_segflg == UIO_SYSSPACE) 1690 bcopy(errmsg, 1691 optuio->uio_iov[errmsg_pos].iov_base, 1692 errmsg_len); 1693 else 1694 copyout(errmsg, 1695 optuio->uio_iov[errmsg_pos].iov_base, 1696 errmsg_len); 1697 } 1698 } 1699 } 1700 done_free: 1701 #ifdef INET 1702 free(ip4, M_PRISON); 1703 #endif 1704 #ifdef INET6 1705 free(ip6, M_PRISON); 1706 #endif 1707 vfs_freeopts(opts); 1708 return (error); 1709 } 1710 1711 1712 /* 1713 * struct jail_get_args { 1714 * struct iovec *iovp; 1715 * unsigned int iovcnt; 1716 * int flags; 1717 * }; 1718 */ 1719 int 1720 jail_get(struct thread *td, struct jail_get_args *uap) 1721 { 1722 struct uio *auio; 1723 int error; 1724 1725 /* Check that we have an even number of iovecs. */ 1726 if (uap->iovcnt & 1) 1727 return (EINVAL); 1728 1729 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 1730 if (error) 1731 return (error); 1732 error = kern_jail_get(td, auio, uap->flags); 1733 if (error == 0) 1734 error = copyout(auio->uio_iov, uap->iovp, 1735 uap->iovcnt * sizeof (struct iovec)); 1736 free(auio, M_IOV); 1737 return (error); 1738 } 1739 1740 int 1741 kern_jail_get(struct thread *td, struct uio *optuio, int flags) 1742 { 1743 struct prison *pr, *mypr; 1744 struct vfsopt *opt; 1745 struct vfsoptlist *opts; 1746 char *errmsg, *name; 1747 int error, errmsg_len, errmsg_pos, fi, i, jid, len, locked, pos; 1748 1749 if (flags & ~JAIL_GET_MASK) 1750 return (EINVAL); 1751 1752 /* Get the parameter list. */ 1753 error = vfs_buildopts(optuio, &opts); 1754 if (error) 1755 return (error); 1756 errmsg_pos = vfs_getopt_pos(opts, "errmsg"); 1757 mypr = td->td_ucred->cr_prison; 1758 1759 /* 1760 * Find the prison specified by one of: lastjid, jid, name. 1761 */ 1762 sx_slock(&allprison_lock); 1763 error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid)); 1764 if (error == 0) { 1765 TAILQ_FOREACH(pr, &allprison, pr_list) { 1766 if (pr->pr_id > jid && prison_ischild(mypr, pr)) { 1767 mtx_lock(&pr->pr_mtx); 1768 if (pr->pr_ref > 0 && 1769 (pr->pr_uref > 0 || (flags & JAIL_DYING))) 1770 break; 1771 mtx_unlock(&pr->pr_mtx); 1772 } 1773 } 1774 if (pr != NULL) 1775 goto found_prison; 1776 error = ENOENT; 1777 vfs_opterror(opts, "no jail after %d", jid); 1778 goto done_unlock_list; 1779 } else if (error != ENOENT) 1780 goto done_unlock_list; 1781 1782 error = vfs_copyopt(opts, "jid", &jid, sizeof(jid)); 1783 if (error == 0) { 1784 if (jid != 0) { 1785 pr = prison_find_child(mypr, jid); 1786 if (pr != NULL) { 1787 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1788 mtx_unlock(&pr->pr_mtx); 1789 error = ENOENT; 1790 vfs_opterror(opts, "jail %d is dying", 1791 jid); 1792 goto done_unlock_list; 1793 } 1794 goto found_prison; 1795 } 1796 error = ENOENT; 1797 vfs_opterror(opts, "jail %d not found", jid); 1798 goto done_unlock_list; 1799 } 1800 } else if (error != ENOENT) 1801 goto done_unlock_list; 1802 1803 error = vfs_getopt(opts, "name", (void **)&name, &len); 1804 if (error == 0) { 1805 if (len == 0 || name[len - 1] != '\0') { 1806 error = EINVAL; 1807 goto done_unlock_list; 1808 } 1809 pr = prison_find_name(mypr, name); 1810 if (pr != NULL) { 1811 if (pr->pr_uref == 0 && !(flags & JAIL_DYING)) { 1812 mtx_unlock(&pr->pr_mtx); 1813 error = ENOENT; 1814 vfs_opterror(opts, "jail \"%s\" is dying", 1815 name); 1816 goto done_unlock_list; 1817 } 1818 goto found_prison; 1819 } 1820 error = ENOENT; 1821 vfs_opterror(opts, "jail \"%s\" not found", name); 1822 goto done_unlock_list; 1823 } else if (error != ENOENT) 1824 goto done_unlock_list; 1825 1826 vfs_opterror(opts, "no jail specified"); 1827 error = ENOENT; 1828 goto done_unlock_list; 1829 1830 found_prison: 1831 /* Get the parameters of the prison. */ 1832 pr->pr_ref++; 1833 locked = PD_LOCKED; 1834 td->td_retval[0] = pr->pr_id; 1835 error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id)); 1836 if (error != 0 && error != ENOENT) 1837 goto done_deref; 1838 i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id; 1839 error = vfs_setopt(opts, "parent", &i, sizeof(i)); 1840 if (error != 0 && error != ENOENT) 1841 goto done_deref; 1842 error = vfs_setopts(opts, "name", prison_name(mypr, pr)); 1843 if (error != 0 && error != ENOENT) 1844 goto done_deref; 1845 error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id, 1846 sizeof(pr->pr_cpuset->cs_id)); 1847 if (error != 0 && error != ENOENT) 1848 goto done_deref; 1849 error = vfs_setopts(opts, "path", prison_path(mypr, pr)); 1850 if (error != 0 && error != ENOENT) 1851 goto done_deref; 1852 #ifdef INET 1853 error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4, 1854 pr->pr_ip4s * sizeof(*pr->pr_ip4)); 1855 if (error != 0 && error != ENOENT) 1856 goto done_deref; 1857 #endif 1858 #ifdef INET6 1859 error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6, 1860 pr->pr_ip6s * sizeof(*pr->pr_ip6)); 1861 if (error != 0 && error != ENOENT) 1862 goto done_deref; 1863 #endif 1864 error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel, 1865 sizeof(pr->pr_securelevel)); 1866 if (error != 0 && error != ENOENT) 1867 goto done_deref; 1868 error = vfs_setopts(opts, "host.hostname", pr->pr_host); 1869 if (error != 0 && error != ENOENT) 1870 goto done_deref; 1871 error = vfs_setopts(opts, "host.domainname", pr->pr_domain); 1872 if (error != 0 && error != ENOENT) 1873 goto done_deref; 1874 error = vfs_setopts(opts, "host.hostuuid", pr->pr_uuid); 1875 if (error != 0 && error != ENOENT) 1876 goto done_deref; 1877 #ifdef COMPAT_IA32 1878 if (td->td_proc->p_sysent->sv_flags & SV_IA32) { 1879 uint32_t hid32 = pr->pr_hostid; 1880 1881 error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32)); 1882 } else 1883 #endif 1884 error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid, 1885 sizeof(pr->pr_hostid)); 1886 if (error != 0 && error != ENOENT) 1887 goto done_deref; 1888 error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs, 1889 sizeof(pr->pr_enforce_statfs)); 1890 if (error != 0 && error != ENOENT) 1891 goto done_deref; 1892 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 1893 fi++) { 1894 if (pr_flag_names[fi] == NULL) 1895 continue; 1896 i = (pr->pr_flags & (1 << fi)) ? 1 : 0; 1897 error = vfs_setopt(opts, pr_flag_names[fi], &i, sizeof(i)); 1898 if (error != 0 && error != ENOENT) 1899 goto done_deref; 1900 i = !i; 1901 error = vfs_setopt(opts, pr_flag_nonames[fi], &i, sizeof(i)); 1902 if (error != 0 && error != ENOENT) 1903 goto done_deref; 1904 } 1905 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 1906 fi++) { 1907 if (pr_allow_names[fi] == NULL) 1908 continue; 1909 i = (pr->pr_allow & (1 << fi)) ? 1 : 0; 1910 error = vfs_setopt(opts, pr_allow_names[fi], &i, sizeof(i)); 1911 if (error != 0 && error != ENOENT) 1912 goto done_deref; 1913 i = !i; 1914 error = vfs_setopt(opts, pr_allow_nonames[fi], &i, sizeof(i)); 1915 if (error != 0 && error != ENOENT) 1916 goto done_deref; 1917 } 1918 i = (pr->pr_uref == 0); 1919 error = vfs_setopt(opts, "dying", &i, sizeof(i)); 1920 if (error != 0 && error != ENOENT) 1921 goto done_deref; 1922 i = !i; 1923 error = vfs_setopt(opts, "nodying", &i, sizeof(i)); 1924 if (error != 0 && error != ENOENT) 1925 goto done_deref; 1926 1927 /* Get the module parameters. */ 1928 mtx_unlock(&pr->pr_mtx); 1929 locked = 0; 1930 error = osd_jail_call(pr, PR_METHOD_GET, opts); 1931 if (error) 1932 goto done_deref; 1933 prison_deref(pr, PD_DEREF | PD_LIST_SLOCKED); 1934 1935 /* By now, all parameters should have been noted. */ 1936 TAILQ_FOREACH(opt, opts, link) { 1937 if (!opt->seen && strcmp(opt->name, "errmsg")) { 1938 error = EINVAL; 1939 vfs_opterror(opts, "unknown parameter: %s", opt->name); 1940 goto done_errmsg; 1941 } 1942 } 1943 1944 /* Write the fetched parameters back to userspace. */ 1945 error = 0; 1946 TAILQ_FOREACH(opt, opts, link) { 1947 if (opt->pos >= 0 && opt->pos != errmsg_pos) { 1948 pos = 2 * opt->pos + 1; 1949 optuio->uio_iov[pos].iov_len = opt->len; 1950 if (opt->value != NULL) { 1951 if (optuio->uio_segflg == UIO_SYSSPACE) { 1952 bcopy(opt->value, 1953 optuio->uio_iov[pos].iov_base, 1954 opt->len); 1955 } else { 1956 error = copyout(opt->value, 1957 optuio->uio_iov[pos].iov_base, 1958 opt->len); 1959 if (error) 1960 break; 1961 } 1962 } 1963 } 1964 } 1965 goto done_errmsg; 1966 1967 done_deref: 1968 prison_deref(pr, locked | PD_DEREF | PD_LIST_SLOCKED); 1969 goto done_errmsg; 1970 1971 done_unlock_list: 1972 sx_sunlock(&allprison_lock); 1973 done_errmsg: 1974 if (error && errmsg_pos >= 0) { 1975 vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len); 1976 errmsg_pos = 2 * errmsg_pos + 1; 1977 if (errmsg_len > 0) { 1978 if (optuio->uio_segflg == UIO_SYSSPACE) 1979 bcopy(errmsg, 1980 optuio->uio_iov[errmsg_pos].iov_base, 1981 errmsg_len); 1982 else 1983 copyout(errmsg, 1984 optuio->uio_iov[errmsg_pos].iov_base, 1985 errmsg_len); 1986 } 1987 } 1988 vfs_freeopts(opts); 1989 return (error); 1990 } 1991 1992 1993 /* 1994 * struct jail_remove_args { 1995 * int jid; 1996 * }; 1997 */ 1998 int 1999 jail_remove(struct thread *td, struct jail_remove_args *uap) 2000 { 2001 struct prison *pr, *cpr, *lpr, *tpr; 2002 int descend, error; 2003 2004 error = priv_check(td, PRIV_JAIL_REMOVE); 2005 if (error) 2006 return (error); 2007 2008 sx_xlock(&allprison_lock); 2009 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2010 if (pr == NULL) { 2011 sx_xunlock(&allprison_lock); 2012 return (EINVAL); 2013 } 2014 2015 /* Remove all descendants of this prison, then remove this prison. */ 2016 pr->pr_ref++; 2017 pr->pr_flags |= PR_REMOVE; 2018 if (!LIST_EMPTY(&pr->pr_children)) { 2019 mtx_unlock(&pr->pr_mtx); 2020 lpr = NULL; 2021 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 2022 mtx_lock(&cpr->pr_mtx); 2023 if (cpr->pr_ref > 0) { 2024 tpr = cpr; 2025 cpr->pr_ref++; 2026 cpr->pr_flags |= PR_REMOVE; 2027 } else { 2028 /* Already removed - do not do it again. */ 2029 tpr = NULL; 2030 } 2031 mtx_unlock(&cpr->pr_mtx); 2032 if (lpr != NULL) { 2033 mtx_lock(&lpr->pr_mtx); 2034 prison_remove_one(lpr); 2035 sx_xlock(&allprison_lock); 2036 } 2037 lpr = tpr; 2038 } 2039 if (lpr != NULL) { 2040 mtx_lock(&lpr->pr_mtx); 2041 prison_remove_one(lpr); 2042 sx_xlock(&allprison_lock); 2043 } 2044 mtx_lock(&pr->pr_mtx); 2045 } 2046 prison_remove_one(pr); 2047 return (0); 2048 } 2049 2050 static void 2051 prison_remove_one(struct prison *pr) 2052 { 2053 struct proc *p; 2054 int deuref; 2055 2056 /* If the prison was persistent, it is not anymore. */ 2057 deuref = 0; 2058 if (pr->pr_flags & PR_PERSIST) { 2059 pr->pr_ref--; 2060 deuref = PD_DEUREF; 2061 pr->pr_flags &= ~PR_PERSIST; 2062 } 2063 2064 /* 2065 * jail_remove added a reference. If that's the only one, remove 2066 * the prison now. 2067 */ 2068 KASSERT(pr->pr_ref > 0, 2069 ("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id)); 2070 if (pr->pr_ref == 1) { 2071 prison_deref(pr, 2072 deuref | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); 2073 return; 2074 } 2075 2076 mtx_unlock(&pr->pr_mtx); 2077 sx_xunlock(&allprison_lock); 2078 /* 2079 * Kill all processes unfortunate enough to be attached to this prison. 2080 */ 2081 sx_slock(&allproc_lock); 2082 LIST_FOREACH(p, &allproc, p_list) { 2083 PROC_LOCK(p); 2084 if (p->p_state != PRS_NEW && p->p_ucred && 2085 p->p_ucred->cr_prison == pr) 2086 psignal(p, SIGKILL); 2087 PROC_UNLOCK(p); 2088 } 2089 sx_sunlock(&allproc_lock); 2090 /* Remove the temporary reference added by jail_remove. */ 2091 prison_deref(pr, deuref | PD_DEREF); 2092 } 2093 2094 2095 /* 2096 * struct jail_attach_args { 2097 * int jid; 2098 * }; 2099 */ 2100 int 2101 jail_attach(struct thread *td, struct jail_attach_args *uap) 2102 { 2103 struct prison *pr; 2104 int error; 2105 2106 error = priv_check(td, PRIV_JAIL_ATTACH); 2107 if (error) 2108 return (error); 2109 2110 sx_slock(&allprison_lock); 2111 pr = prison_find_child(td->td_ucred->cr_prison, uap->jid); 2112 if (pr == NULL) { 2113 sx_sunlock(&allprison_lock); 2114 return (EINVAL); 2115 } 2116 2117 /* 2118 * Do not allow a process to attach to a prison that is not 2119 * considered to be "alive". 2120 */ 2121 if (pr->pr_uref == 0) { 2122 mtx_unlock(&pr->pr_mtx); 2123 sx_sunlock(&allprison_lock); 2124 return (EINVAL); 2125 } 2126 2127 return (do_jail_attach(td, pr)); 2128 } 2129 2130 static int 2131 do_jail_attach(struct thread *td, struct prison *pr) 2132 { 2133 struct prison *ppr; 2134 struct proc *p; 2135 struct ucred *newcred, *oldcred; 2136 int vfslocked, error; 2137 2138 /* 2139 * XXX: Note that there is a slight race here if two threads 2140 * in the same privileged process attempt to attach to two 2141 * different jails at the same time. It is important for 2142 * user processes not to do this, or they might end up with 2143 * a process root from one prison, but attached to the jail 2144 * of another. 2145 */ 2146 pr->pr_ref++; 2147 pr->pr_uref++; 2148 mtx_unlock(&pr->pr_mtx); 2149 2150 /* Let modules do whatever they need to prepare for attaching. */ 2151 error = osd_jail_call(pr, PR_METHOD_ATTACH, td); 2152 if (error) { 2153 prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED); 2154 return (error); 2155 } 2156 sx_sunlock(&allprison_lock); 2157 2158 /* 2159 * Reparent the newly attached process to this jail. 2160 */ 2161 ppr = td->td_ucred->cr_prison; 2162 p = td->td_proc; 2163 error = cpuset_setproc_update_set(p, pr->pr_cpuset); 2164 if (error) 2165 goto e_revert_osd; 2166 2167 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2168 vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); 2169 if ((error = change_dir(pr->pr_root, td)) != 0) 2170 goto e_unlock; 2171 #ifdef MAC 2172 if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) 2173 goto e_unlock; 2174 #endif 2175 VOP_UNLOCK(pr->pr_root, 0); 2176 if ((error = change_root(pr->pr_root, td))) 2177 goto e_unlock_giant; 2178 VFS_UNLOCK_GIANT(vfslocked); 2179 2180 newcred = crget(); 2181 PROC_LOCK(p); 2182 oldcred = p->p_ucred; 2183 setsugid(p); 2184 crcopy(newcred, oldcred); 2185 newcred->cr_prison = pr; 2186 p->p_ucred = newcred; 2187 PROC_UNLOCK(p); 2188 crfree(oldcred); 2189 prison_deref(ppr, PD_DEREF | PD_DEUREF); 2190 return (0); 2191 e_unlock: 2192 VOP_UNLOCK(pr->pr_root, 0); 2193 e_unlock_giant: 2194 VFS_UNLOCK_GIANT(vfslocked); 2195 e_revert_osd: 2196 /* Tell modules this thread is still in its old jail after all. */ 2197 (void)osd_jail_call(ppr, PR_METHOD_ATTACH, td); 2198 prison_deref(pr, PD_DEREF | PD_DEUREF); 2199 return (error); 2200 } 2201 2202 2203 /* 2204 * Returns a locked prison instance, or NULL on failure. 2205 */ 2206 struct prison * 2207 prison_find(int prid) 2208 { 2209 struct prison *pr; 2210 2211 sx_assert(&allprison_lock, SX_LOCKED); 2212 TAILQ_FOREACH(pr, &allprison, pr_list) { 2213 if (pr->pr_id == prid) { 2214 mtx_lock(&pr->pr_mtx); 2215 if (pr->pr_ref > 0) 2216 return (pr); 2217 mtx_unlock(&pr->pr_mtx); 2218 } 2219 } 2220 return (NULL); 2221 } 2222 2223 /* 2224 * Find a prison that is a descendant of mypr. Returns a locked prison or NULL. 2225 */ 2226 struct prison * 2227 prison_find_child(struct prison *mypr, int prid) 2228 { 2229 struct prison *pr; 2230 int descend; 2231 2232 sx_assert(&allprison_lock, SX_LOCKED); 2233 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2234 if (pr->pr_id == prid) { 2235 mtx_lock(&pr->pr_mtx); 2236 if (pr->pr_ref > 0) 2237 return (pr); 2238 mtx_unlock(&pr->pr_mtx); 2239 } 2240 } 2241 return (NULL); 2242 } 2243 2244 /* 2245 * Look for the name relative to mypr. Returns a locked prison or NULL. 2246 */ 2247 struct prison * 2248 prison_find_name(struct prison *mypr, const char *name) 2249 { 2250 struct prison *pr, *deadpr; 2251 size_t mylen; 2252 int descend; 2253 2254 sx_assert(&allprison_lock, SX_LOCKED); 2255 mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1; 2256 again: 2257 deadpr = NULL; 2258 FOREACH_PRISON_DESCENDANT(mypr, pr, descend) { 2259 if (!strcmp(pr->pr_name + mylen, name)) { 2260 mtx_lock(&pr->pr_mtx); 2261 if (pr->pr_ref > 0) { 2262 if (pr->pr_uref > 0) 2263 return (pr); 2264 deadpr = pr; 2265 } 2266 mtx_unlock(&pr->pr_mtx); 2267 } 2268 } 2269 /* There was no valid prison - perhaps there was a dying one. */ 2270 if (deadpr != NULL) { 2271 mtx_lock(&deadpr->pr_mtx); 2272 if (deadpr->pr_ref == 0) { 2273 mtx_unlock(&deadpr->pr_mtx); 2274 goto again; 2275 } 2276 } 2277 return (deadpr); 2278 } 2279 2280 /* 2281 * See if a prison has the specific flag set. 2282 */ 2283 int 2284 prison_flag(struct ucred *cred, unsigned flag) 2285 { 2286 2287 /* This is an atomic read, so no locking is necessary. */ 2288 return (cred->cr_prison->pr_flags & flag); 2289 } 2290 2291 int 2292 prison_allow(struct ucred *cred, unsigned flag) 2293 { 2294 2295 /* This is an atomic read, so no locking is necessary. */ 2296 return (cred->cr_prison->pr_allow & flag); 2297 } 2298 2299 /* 2300 * Remove a prison reference. If that was the last reference, remove the 2301 * prison itself - but not in this context in case there are locks held. 2302 */ 2303 void 2304 prison_free_locked(struct prison *pr) 2305 { 2306 2307 mtx_assert(&pr->pr_mtx, MA_OWNED); 2308 pr->pr_ref--; 2309 if (pr->pr_ref == 0) { 2310 mtx_unlock(&pr->pr_mtx); 2311 TASK_INIT(&pr->pr_task, 0, prison_complete, pr); 2312 taskqueue_enqueue(taskqueue_thread, &pr->pr_task); 2313 return; 2314 } 2315 mtx_unlock(&pr->pr_mtx); 2316 } 2317 2318 void 2319 prison_free(struct prison *pr) 2320 { 2321 2322 mtx_lock(&pr->pr_mtx); 2323 prison_free_locked(pr); 2324 } 2325 2326 static void 2327 prison_complete(void *context, int pending) 2328 { 2329 2330 prison_deref((struct prison *)context, 0); 2331 } 2332 2333 /* 2334 * Remove a prison reference (usually). This internal version assumes no 2335 * mutexes are held, except perhaps the prison itself. If there are no more 2336 * references, release and delist the prison. On completion, the prison lock 2337 * and the allprison lock are both unlocked. 2338 */ 2339 static void 2340 prison_deref(struct prison *pr, int flags) 2341 { 2342 struct prison *ppr, *tpr; 2343 int vfslocked; 2344 2345 if (!(flags & PD_LOCKED)) 2346 mtx_lock(&pr->pr_mtx); 2347 /* Decrement the user references in a separate loop. */ 2348 if (flags & PD_DEUREF) { 2349 for (tpr = pr;; tpr = tpr->pr_parent) { 2350 if (tpr != pr) 2351 mtx_lock(&tpr->pr_mtx); 2352 if (--tpr->pr_uref > 0) 2353 break; 2354 KASSERT(tpr != &prison0, ("prison0 pr_uref=0")); 2355 mtx_unlock(&tpr->pr_mtx); 2356 } 2357 /* Done if there were only user references to remove. */ 2358 if (!(flags & PD_DEREF)) { 2359 mtx_unlock(&tpr->pr_mtx); 2360 if (flags & PD_LIST_SLOCKED) 2361 sx_sunlock(&allprison_lock); 2362 else if (flags & PD_LIST_XLOCKED) 2363 sx_xunlock(&allprison_lock); 2364 return; 2365 } 2366 if (tpr != pr) { 2367 mtx_unlock(&tpr->pr_mtx); 2368 mtx_lock(&pr->pr_mtx); 2369 } 2370 } 2371 2372 for (;;) { 2373 if (flags & PD_DEREF) 2374 pr->pr_ref--; 2375 /* If the prison still has references, nothing else to do. */ 2376 if (pr->pr_ref > 0) { 2377 mtx_unlock(&pr->pr_mtx); 2378 if (flags & PD_LIST_SLOCKED) 2379 sx_sunlock(&allprison_lock); 2380 else if (flags & PD_LIST_XLOCKED) 2381 sx_xunlock(&allprison_lock); 2382 return; 2383 } 2384 2385 mtx_unlock(&pr->pr_mtx); 2386 if (flags & PD_LIST_SLOCKED) { 2387 if (!sx_try_upgrade(&allprison_lock)) { 2388 sx_sunlock(&allprison_lock); 2389 sx_xlock(&allprison_lock); 2390 } 2391 } else if (!(flags & PD_LIST_XLOCKED)) 2392 sx_xlock(&allprison_lock); 2393 2394 TAILQ_REMOVE(&allprison, pr, pr_list); 2395 LIST_REMOVE(pr, pr_sibling); 2396 ppr = pr->pr_parent; 2397 for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent) 2398 tpr->pr_prisoncount--; 2399 sx_downgrade(&allprison_lock); 2400 2401 if (pr->pr_root != NULL) { 2402 vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); 2403 vrele(pr->pr_root); 2404 VFS_UNLOCK_GIANT(vfslocked); 2405 } 2406 mtx_destroy(&pr->pr_mtx); 2407 #ifdef INET 2408 free(pr->pr_ip4, M_PRISON); 2409 #endif 2410 #ifdef INET6 2411 free(pr->pr_ip6, M_PRISON); 2412 #endif 2413 if (pr->pr_cpuset != NULL) 2414 cpuset_rel(pr->pr_cpuset); 2415 osd_jail_exit(pr); 2416 free(pr, M_PRISON); 2417 2418 /* Removing a prison frees a reference on its parent. */ 2419 pr = ppr; 2420 mtx_lock(&pr->pr_mtx); 2421 flags = PD_DEREF | PD_LIST_SLOCKED; 2422 } 2423 } 2424 2425 void 2426 prison_hold_locked(struct prison *pr) 2427 { 2428 2429 mtx_assert(&pr->pr_mtx, MA_OWNED); 2430 KASSERT(pr->pr_ref > 0, 2431 ("Trying to hold dead prison (jid=%d).", pr->pr_id)); 2432 pr->pr_ref++; 2433 } 2434 2435 void 2436 prison_hold(struct prison *pr) 2437 { 2438 2439 mtx_lock(&pr->pr_mtx); 2440 prison_hold_locked(pr); 2441 mtx_unlock(&pr->pr_mtx); 2442 } 2443 2444 void 2445 prison_proc_hold(struct prison *pr) 2446 { 2447 2448 mtx_lock(&pr->pr_mtx); 2449 KASSERT(pr->pr_uref > 0, 2450 ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id)); 2451 pr->pr_uref++; 2452 mtx_unlock(&pr->pr_mtx); 2453 } 2454 2455 void 2456 prison_proc_free(struct prison *pr) 2457 { 2458 2459 mtx_lock(&pr->pr_mtx); 2460 KASSERT(pr->pr_uref > 0, 2461 ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id)); 2462 prison_deref(pr, PD_DEUREF | PD_LOCKED); 2463 } 2464 2465 2466 #ifdef INET 2467 /* 2468 * Restrict a prison's IP address list with its parent's, possibly replacing 2469 * it. Return true if the replacement buffer was used (or would have been). 2470 */ 2471 static int 2472 prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) 2473 { 2474 int ii, ij, used; 2475 struct prison *ppr; 2476 2477 ppr = pr->pr_parent; 2478 if (!(pr->pr_flags & PR_IP4_USER)) { 2479 /* This has no user settings, so just copy the parent's list. */ 2480 if (pr->pr_ip4s < ppr->pr_ip4s) { 2481 /* 2482 * There's no room for the parent's list. Use the 2483 * new list buffer, which is assumed to be big enough 2484 * (if it was passed). If there's no buffer, try to 2485 * allocate one. 2486 */ 2487 used = 1; 2488 if (newip4 == NULL) { 2489 newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), 2490 M_PRISON, M_NOWAIT); 2491 if (newip4 != NULL) 2492 used = 0; 2493 } 2494 if (newip4 != NULL) { 2495 bcopy(ppr->pr_ip4, newip4, 2496 ppr->pr_ip4s * sizeof(*newip4)); 2497 free(pr->pr_ip4, M_PRISON); 2498 pr->pr_ip4 = newip4; 2499 pr->pr_ip4s = ppr->pr_ip4s; 2500 pr->pr_flags |= PR_IP4; 2501 } 2502 return (used); 2503 } 2504 pr->pr_ip4s = ppr->pr_ip4s; 2505 if (pr->pr_ip4s > 0) 2506 bcopy(ppr->pr_ip4, pr->pr_ip4, 2507 pr->pr_ip4s * sizeof(*newip4)); 2508 else if (pr->pr_ip4 != NULL) { 2509 free(pr->pr_ip4, M_PRISON); 2510 pr->pr_ip4 = NULL; 2511 } 2512 pr->pr_flags = 2513 (pr->pr_flags & ~PR_IP4) | (ppr->pr_flags & PR_IP4); 2514 } else if (pr->pr_ip4s > 0 && (ppr->pr_flags & PR_IP4)) { 2515 /* Remove addresses that aren't in the parent. */ 2516 for (ij = 0; ij < ppr->pr_ip4s; ij++) 2517 if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) 2518 break; 2519 if (ij < ppr->pr_ip4s) 2520 ii = 1; 2521 else { 2522 bcopy(pr->pr_ip4 + 1, pr->pr_ip4, 2523 --pr->pr_ip4s * sizeof(*pr->pr_ip4)); 2524 ii = 0; 2525 } 2526 for (ij = 1; ii < pr->pr_ip4s; ) { 2527 if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { 2528 ii++; 2529 continue; 2530 } 2531 switch (ij >= ppr->pr_ip4s ? -1 : 2532 qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { 2533 case -1: 2534 bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, 2535 (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); 2536 break; 2537 case 0: 2538 ii++; 2539 ij++; 2540 break; 2541 case 1: 2542 ij++; 2543 break; 2544 } 2545 } 2546 if (pr->pr_ip4s == 0) { 2547 free(pr->pr_ip4, M_PRISON); 2548 pr->pr_ip4 = NULL; 2549 } 2550 } 2551 return (0); 2552 } 2553 2554 /* 2555 * Pass back primary IPv4 address of this jail. 2556 * 2557 * If not restricted return success but do not alter the address. Caller has 2558 * to make sure to initialize it correctly (e.g. INADDR_ANY). 2559 * 2560 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2561 * Address returned in NBO. 2562 */ 2563 int 2564 prison_get_ip4(struct ucred *cred, struct in_addr *ia) 2565 { 2566 struct prison *pr; 2567 2568 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2569 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2570 2571 pr = cred->cr_prison; 2572 if (!(pr->pr_flags & PR_IP4)) 2573 return (0); 2574 mtx_lock(&pr->pr_mtx); 2575 if (!(pr->pr_flags & PR_IP4)) { 2576 mtx_unlock(&pr->pr_mtx); 2577 return (0); 2578 } 2579 if (pr->pr_ip4 == NULL) { 2580 mtx_unlock(&pr->pr_mtx); 2581 return (EAFNOSUPPORT); 2582 } 2583 2584 ia->s_addr = pr->pr_ip4[0].s_addr; 2585 mtx_unlock(&pr->pr_mtx); 2586 return (0); 2587 } 2588 2589 /* 2590 * Return true if pr1 and pr2 have the same IPv4 address restrictions. 2591 */ 2592 int 2593 prison_equal_ip4(struct prison *pr1, struct prison *pr2) 2594 { 2595 2596 if (pr1 == pr2) 2597 return (1); 2598 2599 /* 2600 * jail_set maintains an exclusive hold on allprison_lock while it 2601 * changes the IP addresses, so only a shared hold is needed. This is 2602 * easier than locking the two prisons which would require finding the 2603 * proper locking order and end up needing allprison_lock anyway. 2604 */ 2605 sx_slock(&allprison_lock); 2606 while (pr1 != &prison0 && !(pr1->pr_flags & PR_IP4_USER)) 2607 pr1 = pr1->pr_parent; 2608 while (pr2 != &prison0 && !(pr2->pr_flags & PR_IP4_USER)) 2609 pr2 = pr2->pr_parent; 2610 sx_sunlock(&allprison_lock); 2611 return (pr1 == pr2); 2612 } 2613 2614 /* 2615 * Make sure our (source) address is set to something meaningful to this 2616 * jail. 2617 * 2618 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2619 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2620 * doesn't allow IPv4. Address passed in in NBO and returned in NBO. 2621 */ 2622 int 2623 prison_local_ip4(struct ucred *cred, struct in_addr *ia) 2624 { 2625 struct prison *pr; 2626 struct in_addr ia0; 2627 int error; 2628 2629 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2630 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2631 2632 pr = cred->cr_prison; 2633 if (!(pr->pr_flags & PR_IP4)) 2634 return (0); 2635 mtx_lock(&pr->pr_mtx); 2636 if (!(pr->pr_flags & PR_IP4)) { 2637 mtx_unlock(&pr->pr_mtx); 2638 return (0); 2639 } 2640 if (pr->pr_ip4 == NULL) { 2641 mtx_unlock(&pr->pr_mtx); 2642 return (EAFNOSUPPORT); 2643 } 2644 2645 ia0.s_addr = ntohl(ia->s_addr); 2646 if (ia0.s_addr == INADDR_LOOPBACK) { 2647 ia->s_addr = pr->pr_ip4[0].s_addr; 2648 mtx_unlock(&pr->pr_mtx); 2649 return (0); 2650 } 2651 2652 if (ia0.s_addr == INADDR_ANY) { 2653 /* 2654 * In case there is only 1 IPv4 address, bind directly. 2655 */ 2656 if (pr->pr_ip4s == 1) 2657 ia->s_addr = pr->pr_ip4[0].s_addr; 2658 mtx_unlock(&pr->pr_mtx); 2659 return (0); 2660 } 2661 2662 error = _prison_check_ip4(pr, ia); 2663 mtx_unlock(&pr->pr_mtx); 2664 return (error); 2665 } 2666 2667 /* 2668 * Rewrite destination address in case we will connect to loopback address. 2669 * 2670 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv4. 2671 * Address passed in in NBO and returned in NBO. 2672 */ 2673 int 2674 prison_remote_ip4(struct ucred *cred, struct in_addr *ia) 2675 { 2676 struct prison *pr; 2677 2678 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2679 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2680 2681 pr = cred->cr_prison; 2682 if (!(pr->pr_flags & PR_IP4)) 2683 return (0); 2684 mtx_lock(&pr->pr_mtx); 2685 if (!(pr->pr_flags & PR_IP4)) { 2686 mtx_unlock(&pr->pr_mtx); 2687 return (0); 2688 } 2689 if (pr->pr_ip4 == NULL) { 2690 mtx_unlock(&pr->pr_mtx); 2691 return (EAFNOSUPPORT); 2692 } 2693 2694 if (ntohl(ia->s_addr) == INADDR_LOOPBACK) { 2695 ia->s_addr = pr->pr_ip4[0].s_addr; 2696 mtx_unlock(&pr->pr_mtx); 2697 return (0); 2698 } 2699 2700 /* 2701 * Return success because nothing had to be changed. 2702 */ 2703 mtx_unlock(&pr->pr_mtx); 2704 return (0); 2705 } 2706 2707 /* 2708 * Check if given address belongs to the jail referenced by cred/prison. 2709 * 2710 * Returns 0 if jail doesn't restrict IPv4 or if address belongs to jail, 2711 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2712 * doesn't allow IPv4. Address passed in in NBO. 2713 */ 2714 static int 2715 _prison_check_ip4(struct prison *pr, struct in_addr *ia) 2716 { 2717 int i, a, z, d; 2718 2719 /* 2720 * Check the primary IP. 2721 */ 2722 if (pr->pr_ip4[0].s_addr == ia->s_addr) 2723 return (0); 2724 2725 /* 2726 * All the other IPs are sorted so we can do a binary search. 2727 */ 2728 a = 0; 2729 z = pr->pr_ip4s - 2; 2730 while (a <= z) { 2731 i = (a + z) / 2; 2732 d = qcmp_v4(&pr->pr_ip4[i+1], ia); 2733 if (d > 0) 2734 z = i - 1; 2735 else if (d < 0) 2736 a = i + 1; 2737 else 2738 return (0); 2739 } 2740 2741 return (EADDRNOTAVAIL); 2742 } 2743 2744 int 2745 prison_check_ip4(struct ucred *cred, struct in_addr *ia) 2746 { 2747 struct prison *pr; 2748 int error; 2749 2750 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2751 KASSERT(ia != NULL, ("%s: ia is NULL", __func__)); 2752 2753 pr = cred->cr_prison; 2754 if (!(pr->pr_flags & PR_IP4)) 2755 return (0); 2756 mtx_lock(&pr->pr_mtx); 2757 if (!(pr->pr_flags & PR_IP4)) { 2758 mtx_unlock(&pr->pr_mtx); 2759 return (0); 2760 } 2761 if (pr->pr_ip4 == NULL) { 2762 mtx_unlock(&pr->pr_mtx); 2763 return (EAFNOSUPPORT); 2764 } 2765 2766 error = _prison_check_ip4(pr, ia); 2767 mtx_unlock(&pr->pr_mtx); 2768 return (error); 2769 } 2770 #endif 2771 2772 #ifdef INET6 2773 static int 2774 prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) 2775 { 2776 int ii, ij, used; 2777 struct prison *ppr; 2778 2779 ppr = pr->pr_parent; 2780 if (!(pr->pr_flags & PR_IP6_USER)) { 2781 /* This has no user settings, so just copy the parent's list. */ 2782 if (pr->pr_ip6s < ppr->pr_ip6s) { 2783 /* 2784 * There's no room for the parent's list. Use the 2785 * new list buffer, which is assumed to be big enough 2786 * (if it was passed). If there's no buffer, try to 2787 * allocate one. 2788 */ 2789 used = 1; 2790 if (newip6 == NULL) { 2791 newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), 2792 M_PRISON, M_NOWAIT); 2793 if (newip6 != NULL) 2794 used = 0; 2795 } 2796 if (newip6 != NULL) { 2797 bcopy(ppr->pr_ip6, newip6, 2798 ppr->pr_ip6s * sizeof(*newip6)); 2799 free(pr->pr_ip6, M_PRISON); 2800 pr->pr_ip6 = newip6; 2801 pr->pr_ip6s = ppr->pr_ip6s; 2802 pr->pr_flags |= PR_IP6; 2803 } 2804 return (used); 2805 } 2806 pr->pr_ip6s = ppr->pr_ip6s; 2807 if (pr->pr_ip6s > 0) 2808 bcopy(ppr->pr_ip6, pr->pr_ip6, 2809 pr->pr_ip6s * sizeof(*newip6)); 2810 else if (pr->pr_ip6 != NULL) { 2811 free(pr->pr_ip6, M_PRISON); 2812 pr->pr_ip6 = NULL; 2813 } 2814 pr->pr_flags = 2815 (pr->pr_flags & ~PR_IP6) | (ppr->pr_flags & PR_IP6); 2816 } else if (pr->pr_ip6s > 0 && (ppr->pr_flags & PR_IP6)) { 2817 /* Remove addresses that aren't in the parent. */ 2818 for (ij = 0; ij < ppr->pr_ip6s; ij++) 2819 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], 2820 &ppr->pr_ip6[ij])) 2821 break; 2822 if (ij < ppr->pr_ip6s) 2823 ii = 1; 2824 else { 2825 bcopy(pr->pr_ip6 + 1, pr->pr_ip6, 2826 --pr->pr_ip6s * sizeof(*pr->pr_ip6)); 2827 ii = 0; 2828 } 2829 for (ij = 1; ii < pr->pr_ip6s; ) { 2830 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], 2831 &ppr->pr_ip6[0])) { 2832 ii++; 2833 continue; 2834 } 2835 switch (ij >= ppr->pr_ip4s ? -1 : 2836 qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { 2837 case -1: 2838 bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, 2839 (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); 2840 break; 2841 case 0: 2842 ii++; 2843 ij++; 2844 break; 2845 case 1: 2846 ij++; 2847 break; 2848 } 2849 } 2850 if (pr->pr_ip6s == 0) { 2851 free(pr->pr_ip6, M_PRISON); 2852 pr->pr_ip6 = NULL; 2853 } 2854 } 2855 return 0; 2856 } 2857 2858 /* 2859 * Pass back primary IPv6 address for this jail. 2860 * 2861 * If not restricted return success but do not alter the address. Caller has 2862 * to make sure to initialize it correctly (e.g. IN6ADDR_ANY_INIT). 2863 * 2864 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2865 */ 2866 int 2867 prison_get_ip6(struct ucred *cred, struct in6_addr *ia6) 2868 { 2869 struct prison *pr; 2870 2871 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2872 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2873 2874 pr = cred->cr_prison; 2875 if (!(pr->pr_flags & PR_IP6)) 2876 return (0); 2877 mtx_lock(&pr->pr_mtx); 2878 if (!(pr->pr_flags & PR_IP6)) { 2879 mtx_unlock(&pr->pr_mtx); 2880 return (0); 2881 } 2882 if (pr->pr_ip6 == NULL) { 2883 mtx_unlock(&pr->pr_mtx); 2884 return (EAFNOSUPPORT); 2885 } 2886 2887 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2888 mtx_unlock(&pr->pr_mtx); 2889 return (0); 2890 } 2891 2892 /* 2893 * Return true if pr1 and pr2 have the same IPv6 address restrictions. 2894 */ 2895 int 2896 prison_equal_ip6(struct prison *pr1, struct prison *pr2) 2897 { 2898 2899 if (pr1 == pr2) 2900 return (1); 2901 2902 sx_slock(&allprison_lock); 2903 while (pr1 != &prison0 && !(pr1->pr_flags & PR_IP6_USER)) 2904 pr1 = pr1->pr_parent; 2905 while (pr2 != &prison0 && !(pr2->pr_flags & PR_IP6_USER)) 2906 pr2 = pr2->pr_parent; 2907 sx_sunlock(&allprison_lock); 2908 return (pr1 == pr2); 2909 } 2910 2911 /* 2912 * Make sure our (source) address is set to something meaningful to this jail. 2913 * 2914 * v6only should be set based on (inp->inp_flags & IN6P_IPV6_V6ONLY != 0) 2915 * when needed while binding. 2916 * 2917 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 2918 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 2919 * doesn't allow IPv6. 2920 */ 2921 int 2922 prison_local_ip6(struct ucred *cred, struct in6_addr *ia6, int v6only) 2923 { 2924 struct prison *pr; 2925 int error; 2926 2927 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2928 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2929 2930 pr = cred->cr_prison; 2931 if (!(pr->pr_flags & PR_IP6)) 2932 return (0); 2933 mtx_lock(&pr->pr_mtx); 2934 if (!(pr->pr_flags & PR_IP6)) { 2935 mtx_unlock(&pr->pr_mtx); 2936 return (0); 2937 } 2938 if (pr->pr_ip6 == NULL) { 2939 mtx_unlock(&pr->pr_mtx); 2940 return (EAFNOSUPPORT); 2941 } 2942 2943 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 2944 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2945 mtx_unlock(&pr->pr_mtx); 2946 return (0); 2947 } 2948 2949 if (IN6_IS_ADDR_UNSPECIFIED(ia6)) { 2950 /* 2951 * In case there is only 1 IPv6 address, and v6only is true, 2952 * then bind directly. 2953 */ 2954 if (v6only != 0 && pr->pr_ip6s == 1) 2955 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2956 mtx_unlock(&pr->pr_mtx); 2957 return (0); 2958 } 2959 2960 error = _prison_check_ip6(pr, ia6); 2961 mtx_unlock(&pr->pr_mtx); 2962 return (error); 2963 } 2964 2965 /* 2966 * Rewrite destination address in case we will connect to loopback address. 2967 * 2968 * Returns 0 on success, EAFNOSUPPORT if the jail doesn't allow IPv6. 2969 */ 2970 int 2971 prison_remote_ip6(struct ucred *cred, struct in6_addr *ia6) 2972 { 2973 struct prison *pr; 2974 2975 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 2976 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 2977 2978 pr = cred->cr_prison; 2979 if (!(pr->pr_flags & PR_IP6)) 2980 return (0); 2981 mtx_lock(&pr->pr_mtx); 2982 if (!(pr->pr_flags & PR_IP6)) { 2983 mtx_unlock(&pr->pr_mtx); 2984 return (0); 2985 } 2986 if (pr->pr_ip6 == NULL) { 2987 mtx_unlock(&pr->pr_mtx); 2988 return (EAFNOSUPPORT); 2989 } 2990 2991 if (IN6_IS_ADDR_LOOPBACK(ia6)) { 2992 bcopy(&pr->pr_ip6[0], ia6, sizeof(struct in6_addr)); 2993 mtx_unlock(&pr->pr_mtx); 2994 return (0); 2995 } 2996 2997 /* 2998 * Return success because nothing had to be changed. 2999 */ 3000 mtx_unlock(&pr->pr_mtx); 3001 return (0); 3002 } 3003 3004 /* 3005 * Check if given address belongs to the jail referenced by cred/prison. 3006 * 3007 * Returns 0 if jail doesn't restrict IPv6 or if address belongs to jail, 3008 * EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if the jail 3009 * doesn't allow IPv6. 3010 */ 3011 static int 3012 _prison_check_ip6(struct prison *pr, struct in6_addr *ia6) 3013 { 3014 int i, a, z, d; 3015 3016 /* 3017 * Check the primary IP. 3018 */ 3019 if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) 3020 return (0); 3021 3022 /* 3023 * All the other IPs are sorted so we can do a binary search. 3024 */ 3025 a = 0; 3026 z = pr->pr_ip6s - 2; 3027 while (a <= z) { 3028 i = (a + z) / 2; 3029 d = qcmp_v6(&pr->pr_ip6[i+1], ia6); 3030 if (d > 0) 3031 z = i - 1; 3032 else if (d < 0) 3033 a = i + 1; 3034 else 3035 return (0); 3036 } 3037 3038 return (EADDRNOTAVAIL); 3039 } 3040 3041 int 3042 prison_check_ip6(struct ucred *cred, struct in6_addr *ia6) 3043 { 3044 struct prison *pr; 3045 int error; 3046 3047 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3048 KASSERT(ia6 != NULL, ("%s: ia6 is NULL", __func__)); 3049 3050 pr = cred->cr_prison; 3051 if (!(pr->pr_flags & PR_IP6)) 3052 return (0); 3053 mtx_lock(&pr->pr_mtx); 3054 if (!(pr->pr_flags & PR_IP6)) { 3055 mtx_unlock(&pr->pr_mtx); 3056 return (0); 3057 } 3058 if (pr->pr_ip6 == NULL) { 3059 mtx_unlock(&pr->pr_mtx); 3060 return (EAFNOSUPPORT); 3061 } 3062 3063 error = _prison_check_ip6(pr, ia6); 3064 mtx_unlock(&pr->pr_mtx); 3065 return (error); 3066 } 3067 #endif 3068 3069 /* 3070 * Check if a jail supports the given address family. 3071 * 3072 * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT 3073 * if not. 3074 */ 3075 int 3076 prison_check_af(struct ucred *cred, int af) 3077 { 3078 struct prison *pr; 3079 int error; 3080 3081 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3082 3083 pr = cred->cr_prison; 3084 error = 0; 3085 switch (af) 3086 { 3087 #ifdef INET 3088 case AF_INET: 3089 if (pr->pr_flags & PR_IP4) 3090 { 3091 mtx_lock(&pr->pr_mtx); 3092 if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL) 3093 error = EAFNOSUPPORT; 3094 mtx_unlock(&pr->pr_mtx); 3095 } 3096 break; 3097 #endif 3098 #ifdef INET6 3099 case AF_INET6: 3100 if (pr->pr_flags & PR_IP6) 3101 { 3102 mtx_lock(&pr->pr_mtx); 3103 if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL) 3104 error = EAFNOSUPPORT; 3105 mtx_unlock(&pr->pr_mtx); 3106 } 3107 break; 3108 #endif 3109 case AF_LOCAL: 3110 case AF_ROUTE: 3111 break; 3112 default: 3113 if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF)) 3114 error = EAFNOSUPPORT; 3115 } 3116 return (error); 3117 } 3118 3119 /* 3120 * Check if given address belongs to the jail referenced by cred (wrapper to 3121 * prison_check_ip[46]). 3122 * 3123 * Returns 0 if jail doesn't restrict the address family or if address belongs 3124 * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if 3125 * the jail doesn't allow the address family. IPv4 Address passed in in NBO. 3126 */ 3127 int 3128 prison_if(struct ucred *cred, struct sockaddr *sa) 3129 { 3130 #ifdef INET 3131 struct sockaddr_in *sai; 3132 #endif 3133 #ifdef INET6 3134 struct sockaddr_in6 *sai6; 3135 #endif 3136 int error; 3137 3138 KASSERT(cred != NULL, ("%s: cred is NULL", __func__)); 3139 KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); 3140 3141 error = 0; 3142 switch (sa->sa_family) 3143 { 3144 #ifdef INET 3145 case AF_INET: 3146 sai = (struct sockaddr_in *)sa; 3147 error = prison_check_ip4(cred, &sai->sin_addr); 3148 break; 3149 #endif 3150 #ifdef INET6 3151 case AF_INET6: 3152 sai6 = (struct sockaddr_in6 *)sa; 3153 error = prison_check_ip6(cred, &sai6->sin6_addr); 3154 break; 3155 #endif 3156 default: 3157 if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF)) 3158 error = EAFNOSUPPORT; 3159 } 3160 return (error); 3161 } 3162 3163 /* 3164 * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. 3165 */ 3166 int 3167 prison_check(struct ucred *cred1, struct ucred *cred2) 3168 { 3169 3170 #ifdef VIMAGE 3171 if (cred2->cr_vimage->v_procg != cred1->cr_vimage->v_procg) 3172 return (ESRCH); 3173 #endif 3174 return ((cred1->cr_prison == cred2->cr_prison || 3175 prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH); 3176 } 3177 3178 /* 3179 * Return 1 if p2 is a child of p1, otherwise 0. 3180 */ 3181 int 3182 prison_ischild(struct prison *pr1, struct prison *pr2) 3183 { 3184 3185 for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent) 3186 if (pr1 == pr2) 3187 return (1); 3188 return (0); 3189 } 3190 3191 /* 3192 * Return 1 if the passed credential is in a jail, otherwise 0. 3193 */ 3194 int 3195 jailed(struct ucred *cred) 3196 { 3197 3198 return (cred->cr_prison != &prison0); 3199 } 3200 3201 /* 3202 * Return the correct hostname for the passed credential. 3203 */ 3204 void 3205 getcredhostname(struct ucred *cred, char *buf, size_t size) 3206 { 3207 struct prison *pr; 3208 3209 pr = (cred != NULL) ? cred->cr_prison : &prison0; 3210 mtx_lock(&pr->pr_mtx); 3211 strlcpy(buf, pr->pr_host, size); 3212 mtx_unlock(&pr->pr_mtx); 3213 } 3214 3215 /* 3216 * Determine whether the subject represented by cred can "see" 3217 * status of a mount point. 3218 * Returns: 0 for permitted, ENOENT otherwise. 3219 * XXX: This function should be called cr_canseemount() and should be 3220 * placed in kern_prot.c. 3221 */ 3222 int 3223 prison_canseemount(struct ucred *cred, struct mount *mp) 3224 { 3225 struct prison *pr; 3226 struct statfs *sp; 3227 size_t len; 3228 3229 pr = cred->cr_prison; 3230 if (pr->pr_enforce_statfs == 0) 3231 return (0); 3232 if (pr->pr_root->v_mount == mp) 3233 return (0); 3234 if (pr->pr_enforce_statfs == 2) 3235 return (ENOENT); 3236 /* 3237 * If jail's chroot directory is set to "/" we should be able to see 3238 * all mount-points from inside a jail. 3239 * This is ugly check, but this is the only situation when jail's 3240 * directory ends with '/'. 3241 */ 3242 if (strcmp(pr->pr_path, "/") == 0) 3243 return (0); 3244 len = strlen(pr->pr_path); 3245 sp = &mp->mnt_stat; 3246 if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) 3247 return (ENOENT); 3248 /* 3249 * Be sure that we don't have situation where jail's root directory 3250 * is "/some/path" and mount point is "/some/pathpath". 3251 */ 3252 if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') 3253 return (ENOENT); 3254 return (0); 3255 } 3256 3257 void 3258 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) 3259 { 3260 char jpath[MAXPATHLEN]; 3261 struct prison *pr; 3262 size_t len; 3263 3264 pr = cred->cr_prison; 3265 if (pr->pr_enforce_statfs == 0) 3266 return; 3267 if (prison_canseemount(cred, mp) != 0) { 3268 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3269 strlcpy(sp->f_mntonname, "[restricted]", 3270 sizeof(sp->f_mntonname)); 3271 return; 3272 } 3273 if (pr->pr_root->v_mount == mp) { 3274 /* 3275 * Clear current buffer data, so we are sure nothing from 3276 * the valid path left there. 3277 */ 3278 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3279 *sp->f_mntonname = '/'; 3280 return; 3281 } 3282 /* 3283 * If jail's chroot directory is set to "/" we should be able to see 3284 * all mount-points from inside a jail. 3285 */ 3286 if (strcmp(pr->pr_path, "/") == 0) 3287 return; 3288 len = strlen(pr->pr_path); 3289 strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); 3290 /* 3291 * Clear current buffer data, so we are sure nothing from 3292 * the valid path left there. 3293 */ 3294 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3295 if (*jpath == '\0') { 3296 /* Should never happen. */ 3297 *sp->f_mntonname = '/'; 3298 } else { 3299 strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); 3300 } 3301 } 3302 3303 /* 3304 * Check with permission for a specific privilege is granted within jail. We 3305 * have a specific list of accepted privileges; the rest are denied. 3306 */ 3307 int 3308 prison_priv_check(struct ucred *cred, int priv) 3309 { 3310 3311 if (!jailed(cred)) 3312 return (0); 3313 3314 switch (priv) { 3315 3316 /* 3317 * Allow ktrace privileges for root in jail. 3318 */ 3319 case PRIV_KTRACE: 3320 3321 #if 0 3322 /* 3323 * Allow jailed processes to configure audit identity and 3324 * submit audit records (login, etc). In the future we may 3325 * want to further refine the relationship between audit and 3326 * jail. 3327 */ 3328 case PRIV_AUDIT_GETAUDIT: 3329 case PRIV_AUDIT_SETAUDIT: 3330 case PRIV_AUDIT_SUBMIT: 3331 #endif 3332 3333 /* 3334 * Allow jailed processes to manipulate process UNIX 3335 * credentials in any way they see fit. 3336 */ 3337 case PRIV_CRED_SETUID: 3338 case PRIV_CRED_SETEUID: 3339 case PRIV_CRED_SETGID: 3340 case PRIV_CRED_SETEGID: 3341 case PRIV_CRED_SETGROUPS: 3342 case PRIV_CRED_SETREUID: 3343 case PRIV_CRED_SETREGID: 3344 case PRIV_CRED_SETRESUID: 3345 case PRIV_CRED_SETRESGID: 3346 3347 /* 3348 * Jail implements visibility constraints already, so allow 3349 * jailed root to override uid/gid-based constraints. 3350 */ 3351 case PRIV_SEEOTHERGIDS: 3352 case PRIV_SEEOTHERUIDS: 3353 3354 /* 3355 * Jail implements inter-process debugging limits already, so 3356 * allow jailed root various debugging privileges. 3357 */ 3358 case PRIV_DEBUG_DIFFCRED: 3359 case PRIV_DEBUG_SUGID: 3360 case PRIV_DEBUG_UNPRIV: 3361 3362 /* 3363 * Allow jail to set various resource limits and login 3364 * properties, and for now, exceed process resource limits. 3365 */ 3366 case PRIV_PROC_LIMIT: 3367 case PRIV_PROC_SETLOGIN: 3368 case PRIV_PROC_SETRLIMIT: 3369 3370 /* 3371 * System V and POSIX IPC privileges are granted in jail. 3372 */ 3373 case PRIV_IPC_READ: 3374 case PRIV_IPC_WRITE: 3375 case PRIV_IPC_ADMIN: 3376 case PRIV_IPC_MSGSIZE: 3377 case PRIV_MQ_ADMIN: 3378 3379 /* 3380 * Jail operations within a jail work on child jails. 3381 */ 3382 case PRIV_JAIL_ATTACH: 3383 case PRIV_JAIL_SET: 3384 case PRIV_JAIL_REMOVE: 3385 3386 /* 3387 * Jail implements its own inter-process limits, so allow 3388 * root processes in jail to change scheduling on other 3389 * processes in the same jail. Likewise for signalling. 3390 */ 3391 case PRIV_SCHED_DIFFCRED: 3392 case PRIV_SCHED_CPUSET: 3393 case PRIV_SIGNAL_DIFFCRED: 3394 case PRIV_SIGNAL_SUGID: 3395 3396 /* 3397 * Allow jailed processes to write to sysctls marked as jail 3398 * writable. 3399 */ 3400 case PRIV_SYSCTL_WRITEJAIL: 3401 3402 /* 3403 * Allow root in jail to manage a variety of quota 3404 * properties. These should likely be conditional on a 3405 * configuration option. 3406 */ 3407 case PRIV_VFS_GETQUOTA: 3408 case PRIV_VFS_SETQUOTA: 3409 3410 /* 3411 * Since Jail relies on chroot() to implement file system 3412 * protections, grant many VFS privileges to root in jail. 3413 * Be careful to exclude mount-related and NFS-related 3414 * privileges. 3415 */ 3416 case PRIV_VFS_READ: 3417 case PRIV_VFS_WRITE: 3418 case PRIV_VFS_ADMIN: 3419 case PRIV_VFS_EXEC: 3420 case PRIV_VFS_LOOKUP: 3421 case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ 3422 case PRIV_VFS_CHFLAGS_DEV: 3423 case PRIV_VFS_CHOWN: 3424 case PRIV_VFS_CHROOT: 3425 case PRIV_VFS_RETAINSUGID: 3426 case PRIV_VFS_FCHROOT: 3427 case PRIV_VFS_LINK: 3428 case PRIV_VFS_SETGID: 3429 case PRIV_VFS_STAT: 3430 case PRIV_VFS_STICKYFILE: 3431 return (0); 3432 3433 /* 3434 * Depending on the global setting, allow privilege of 3435 * setting system flags. 3436 */ 3437 case PRIV_VFS_SYSFLAGS: 3438 if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS) 3439 return (0); 3440 else 3441 return (EPERM); 3442 3443 /* 3444 * Depending on the global setting, allow privilege of 3445 * mounting/unmounting file systems. 3446 */ 3447 case PRIV_VFS_MOUNT: 3448 case PRIV_VFS_UNMOUNT: 3449 case PRIV_VFS_MOUNT_NONUSER: 3450 case PRIV_VFS_MOUNT_OWNER: 3451 if (cred->cr_prison->pr_allow & PR_ALLOW_MOUNT) 3452 return (0); 3453 else 3454 return (EPERM); 3455 3456 /* 3457 * Allow jailed root to bind reserved ports and reuse in-use 3458 * ports. 3459 */ 3460 case PRIV_NETINET_RESERVEDPORT: 3461 case PRIV_NETINET_REUSEPORT: 3462 return (0); 3463 3464 /* 3465 * Allow jailed root to set certian IPv4/6 (option) headers. 3466 */ 3467 case PRIV_NETINET_SETHDROPTS: 3468 return (0); 3469 3470 /* 3471 * Conditionally allow creating raw sockets in jail. 3472 */ 3473 case PRIV_NETINET_RAW: 3474 if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS) 3475 return (0); 3476 else 3477 return (EPERM); 3478 3479 /* 3480 * Since jail implements its own visibility limits on netstat 3481 * sysctls, allow getcred. This allows identd to work in 3482 * jail. 3483 */ 3484 case PRIV_NETINET_GETCRED: 3485 return (0); 3486 3487 default: 3488 /* 3489 * In all remaining cases, deny the privilege request. This 3490 * includes almost all network privileges, many system 3491 * configuration privileges. 3492 */ 3493 return (EPERM); 3494 } 3495 } 3496 3497 /* 3498 * Return the part of pr2's name that is relative to pr1, or the whole name 3499 * if it does not directly follow. 3500 */ 3501 3502 char * 3503 prison_name(struct prison *pr1, struct prison *pr2) 3504 { 3505 char *name; 3506 3507 /* Jails see themselves as "0" (if they see themselves at all). */ 3508 if (pr1 == pr2) 3509 return "0"; 3510 name = pr2->pr_name; 3511 if (prison_ischild(pr1, pr2)) { 3512 /* 3513 * pr1 isn't locked (and allprison_lock may not be either) 3514 * so its length can't be counted on. But the number of dots 3515 * can be counted on - and counted. 3516 */ 3517 for (; pr1 != &prison0; pr1 = pr1->pr_parent) 3518 name = strchr(name, '.') + 1; 3519 } 3520 return (name); 3521 } 3522 3523 /* 3524 * Return the part of pr2's path that is relative to pr1, or the whole path 3525 * if it does not directly follow. 3526 */ 3527 static char * 3528 prison_path(struct prison *pr1, struct prison *pr2) 3529 { 3530 char *path1, *path2; 3531 int len1; 3532 3533 path1 = pr1->pr_path; 3534 path2 = pr2->pr_path; 3535 if (!strcmp(path1, "/")) 3536 return (path2); 3537 len1 = strlen(path1); 3538 if (strncmp(path1, path2, len1)) 3539 return (path2); 3540 if (path2[len1] == '\0') 3541 return "/"; 3542 if (path2[len1] == '/') 3543 return (path2 + len1); 3544 return (path2); 3545 } 3546 3547 3548 /* 3549 * Jail-related sysctls. 3550 */ 3551 SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, 3552 "Jails"); 3553 3554 static int 3555 sysctl_jail_list(SYSCTL_HANDLER_ARGS) 3556 { 3557 struct xprison *xp; 3558 struct prison *pr, *cpr; 3559 #ifdef INET 3560 struct in_addr *ip4 = NULL; 3561 int ip4s = 0; 3562 #endif 3563 #ifdef INET6 3564 struct in_addr *ip6 = NULL; 3565 int ip6s = 0; 3566 #endif 3567 int descend, error; 3568 3569 xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK); 3570 pr = req->td->td_ucred->cr_prison; 3571 error = 0; 3572 sx_slock(&allprison_lock); 3573 FOREACH_PRISON_DESCENDANT(pr, cpr, descend) { 3574 #if defined(INET) || defined(INET6) 3575 again: 3576 #endif 3577 mtx_lock(&cpr->pr_mtx); 3578 #ifdef INET 3579 if (cpr->pr_ip4s > 0) { 3580 if (ip4s < cpr->pr_ip4s) { 3581 ip4s = cpr->pr_ip4s; 3582 mtx_unlock(&cpr->pr_mtx); 3583 ip4 = realloc(ip4, ip4s * 3584 sizeof(struct in_addr), M_TEMP, M_WAITOK); 3585 goto again; 3586 } 3587 bcopy(cpr->pr_ip4, ip4, 3588 cpr->pr_ip4s * sizeof(struct in_addr)); 3589 } 3590 #endif 3591 #ifdef INET6 3592 if (cpr->pr_ip6s > 0) { 3593 if (ip6s < cpr->pr_ip6s) { 3594 ip6s = cpr->pr_ip6s; 3595 mtx_unlock(&cpr->pr_mtx); 3596 ip6 = realloc(ip6, ip6s * 3597 sizeof(struct in6_addr), M_TEMP, M_WAITOK); 3598 goto again; 3599 } 3600 bcopy(cpr->pr_ip6, ip6, 3601 cpr->pr_ip6s * sizeof(struct in6_addr)); 3602 } 3603 #endif 3604 if (cpr->pr_ref == 0) { 3605 mtx_unlock(&cpr->pr_mtx); 3606 continue; 3607 } 3608 bzero(xp, sizeof(*xp)); 3609 xp->pr_version = XPRISON_VERSION; 3610 xp->pr_id = cpr->pr_id; 3611 xp->pr_state = cpr->pr_uref > 0 3612 ? PRISON_STATE_ALIVE : PRISON_STATE_DYING; 3613 strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path)); 3614 strlcpy(xp->pr_host, cpr->pr_host, sizeof(xp->pr_host)); 3615 strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name)); 3616 #ifdef INET 3617 xp->pr_ip4s = cpr->pr_ip4s; 3618 #endif 3619 #ifdef INET6 3620 xp->pr_ip6s = cpr->pr_ip6s; 3621 #endif 3622 mtx_unlock(&cpr->pr_mtx); 3623 error = SYSCTL_OUT(req, xp, sizeof(*xp)); 3624 if (error) 3625 break; 3626 #ifdef INET 3627 if (xp->pr_ip4s > 0) { 3628 error = SYSCTL_OUT(req, ip4, 3629 xp->pr_ip4s * sizeof(struct in_addr)); 3630 if (error) 3631 break; 3632 } 3633 #endif 3634 #ifdef INET6 3635 if (xp->pr_ip6s > 0) { 3636 error = SYSCTL_OUT(req, ip6, 3637 xp->pr_ip6s * sizeof(struct in6_addr)); 3638 if (error) 3639 break; 3640 } 3641 #endif 3642 } 3643 sx_sunlock(&allprison_lock); 3644 free(xp, M_TEMP); 3645 #ifdef INET 3646 free(ip4, M_TEMP); 3647 #endif 3648 #ifdef INET6 3649 free(ip6, M_TEMP); 3650 #endif 3651 return (error); 3652 } 3653 3654 SYSCTL_OID(_security_jail, OID_AUTO, list, 3655 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3656 sysctl_jail_list, "S", "List of active jails"); 3657 3658 static int 3659 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) 3660 { 3661 int error, injail; 3662 3663 injail = jailed(req->td->td_ucred); 3664 error = SYSCTL_OUT(req, &injail, sizeof(injail)); 3665 3666 return (error); 3667 } 3668 3669 SYSCTL_PROC(_security_jail, OID_AUTO, jailed, 3670 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 3671 sysctl_jail_jailed, "I", "Process in jail?"); 3672 3673 #if defined(INET) || defined(INET6) 3674 SYSCTL_INT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW, 3675 &jail_max_af_ips, 0, 3676 "Number of IP addresses a jail may have at most per address family"); 3677 #endif 3678 3679 /* 3680 * Default parameters for jail(2) compatability. For historical reasons, 3681 * the sysctl names have varying similarity to the parameter names. Prisons 3682 * just see their own parameters, and can't change them. 3683 */ 3684 static int 3685 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS) 3686 { 3687 struct prison *pr; 3688 int allow, error, i; 3689 3690 pr = req->td->td_ucred->cr_prison; 3691 allow = (pr == &prison0) ? jail_default_allow : pr->pr_allow; 3692 3693 /* Get the current flag value, and convert it to a boolean. */ 3694 i = (allow & arg2) ? 1 : 0; 3695 if (arg1 != NULL) 3696 i = !i; 3697 error = sysctl_handle_int(oidp, &i, 0, req); 3698 if (error || !req->newptr) 3699 return (error); 3700 i = i ? arg2 : 0; 3701 if (arg1 != NULL) 3702 i ^= arg2; 3703 /* 3704 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0 3705 * for writing. 3706 */ 3707 mtx_lock(&prison0.pr_mtx); 3708 jail_default_allow = (jail_default_allow & ~arg2) | i; 3709 mtx_unlock(&prison0.pr_mtx); 3710 return (0); 3711 } 3712 3713 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed, 3714 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3715 NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I", 3716 "Processes in jail can set their hostnames"); 3717 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only, 3718 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3719 (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I", 3720 "Processes in jail are limited to creating UNIX/IP/route sockets only"); 3721 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed, 3722 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3723 NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I", 3724 "Processes in jail can use System V IPC primitives"); 3725 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets, 3726 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3727 NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I", 3728 "Prison root can create raw sockets"); 3729 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed, 3730 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3731 NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I", 3732 "Processes in jail can alter system file flags"); 3733 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed, 3734 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3735 NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I", 3736 "Processes in jail can mount/unmount jail-friendly file systems"); 3737 3738 static int 3739 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS) 3740 { 3741 struct prison *pr; 3742 int level, error; 3743 3744 pr = req->td->td_ucred->cr_prison; 3745 level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2); 3746 error = sysctl_handle_int(oidp, &level, 0, req); 3747 if (error || !req->newptr) 3748 return (error); 3749 *(int *)arg1 = level; 3750 return (0); 3751 } 3752 3753 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs, 3754 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 3755 &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs), 3756 sysctl_jail_default_level, "I", 3757 "Processes in jail cannot see all mounted file systems"); 3758 3759 /* 3760 * Nodes to describe jail parameters. Maximum length of string parameters 3761 * is returned in the string itself, and the other parameters exist merely 3762 * to make themselves and their types known. 3763 */ 3764 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW, 0, 3765 "Jail parameters"); 3766 3767 int 3768 sysctl_jail_param(SYSCTL_HANDLER_ARGS) 3769 { 3770 int i; 3771 long l; 3772 size_t s; 3773 char numbuf[12]; 3774 3775 switch (oidp->oid_kind & CTLTYPE) 3776 { 3777 case CTLTYPE_LONG: 3778 case CTLTYPE_ULONG: 3779 l = 0; 3780 #ifdef SCTL_MASK32 3781 if (!(req->flags & SCTL_MASK32)) 3782 #endif 3783 return (SYSCTL_OUT(req, &l, sizeof(l))); 3784 case CTLTYPE_INT: 3785 case CTLTYPE_UINT: 3786 i = 0; 3787 return (SYSCTL_OUT(req, &i, sizeof(i))); 3788 case CTLTYPE_STRING: 3789 snprintf(numbuf, sizeof(numbuf), "%d", arg2); 3790 return 3791 (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req)); 3792 case CTLTYPE_STRUCT: 3793 s = (size_t)arg2; 3794 return (SYSCTL_OUT(req, &s, sizeof(s))); 3795 } 3796 return (0); 3797 } 3798 3799 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID"); 3800 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID"); 3801 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name"); 3802 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path"); 3803 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW, 3804 "I", "Jail secure level"); 3805 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW, 3806 "I", "Jail cannot see all mounted file systems"); 3807 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW, 3808 "B", "Jail persistence"); 3809 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD, 3810 "B", "Jail is in the process of shutting down"); 3811 3812 SYSCTL_JAIL_PARAM_NODE(host, "Jail host info"); 3813 SYSCTL_JAIL_PARAM(, nohost, CTLTYPE_INT | CTLFLAG_RW, 3814 "BN", "Jail w/ no host info"); 3815 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN, 3816 "Jail hostname"); 3817 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN, 3818 "Jail NIS domainname"); 3819 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN, 3820 "Jail host UUID"); 3821 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW, 3822 "LU", "Jail host ID"); 3823 3824 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset"); 3825 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID"); 3826 3827 #ifdef INET 3828 SYSCTL_JAIL_PARAM_NODE(ip4, "Jail IPv4 address virtualization"); 3829 SYSCTL_JAIL_PARAM(, noip4, CTLTYPE_INT | CTLFLAG_RW, 3830 "BN", "Jail w/ no IP address virtualization"); 3831 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr), 3832 "S,in_addr,a", "Jail IPv4 addresses"); 3833 #endif 3834 #ifdef INET6 3835 SYSCTL_JAIL_PARAM_NODE(ip6, "Jail IPv6 address virtualization"); 3836 SYSCTL_JAIL_PARAM(, noip6, CTLTYPE_INT | CTLFLAG_RW, 3837 "BN", "Jail w/ no IP address virtualization"); 3838 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr), 3839 "S,in6_addr,a", "Jail IPv6 addresses"); 3840 #endif 3841 3842 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags"); 3843 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW, 3844 "B", "Jail may set hostname"); 3845 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW, 3846 "B", "Jail may use SYSV IPC"); 3847 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW, 3848 "B", "Jail may create raw sockets"); 3849 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW, 3850 "B", "Jail may alter system file flags"); 3851 SYSCTL_JAIL_PARAM(_allow, mount, CTLTYPE_INT | CTLFLAG_RW, 3852 "B", "Jail may mount/unmount jail-friendly file systems"); 3853 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW, 3854 "B", "Jail may set file quotas"); 3855 SYSCTL_JAIL_PARAM(_allow, jails, CTLTYPE_INT | CTLFLAG_RW, 3856 "B", "Jail may create child jails"); 3857 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW, 3858 "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route"); 3859 3860 3861 #ifdef DDB 3862 3863 static void 3864 db_show_prison(struct prison *pr) 3865 { 3866 int fi; 3867 #if defined(INET) || defined(INET6) 3868 int ii; 3869 #endif 3870 #ifdef INET6 3871 char ip6buf[INET6_ADDRSTRLEN]; 3872 #endif 3873 3874 db_printf("prison %p:\n", pr); 3875 db_printf(" jid = %d\n", pr->pr_id); 3876 db_printf(" name = %s\n", pr->pr_name); 3877 db_printf(" parent = %p\n", pr->pr_parent); 3878 db_printf(" ref = %d\n", pr->pr_ref); 3879 db_printf(" uref = %d\n", pr->pr_uref); 3880 db_printf(" path = %s\n", pr->pr_path); 3881 db_printf(" cpuset = %d\n", pr->pr_cpuset 3882 ? pr->pr_cpuset->cs_id : -1); 3883 db_printf(" root = %p\n", pr->pr_root); 3884 db_printf(" securelevel = %d\n", pr->pr_securelevel); 3885 db_printf(" child = %p\n", LIST_FIRST(&pr->pr_children)); 3886 db_printf(" sibling = %p\n", LIST_NEXT(pr, pr_sibling)); 3887 db_printf(" flags = %x", pr->pr_flags); 3888 for (fi = 0; fi < sizeof(pr_flag_names) / sizeof(pr_flag_names[0]); 3889 fi++) 3890 if (pr_flag_names[fi] != NULL && (pr->pr_flags & (1 << fi))) 3891 db_printf(" %s", pr_flag_names[fi]); 3892 db_printf(" allow = %x", pr->pr_allow); 3893 for (fi = 0; fi < sizeof(pr_allow_names) / sizeof(pr_allow_names[0]); 3894 fi++) 3895 if (pr_allow_names[fi] != NULL && (pr->pr_allow & (1 << fi))) 3896 db_printf(" %s", pr_allow_names[fi]); 3897 db_printf("\n"); 3898 db_printf(" enforce_statfs = %d\n", pr->pr_enforce_statfs); 3899 db_printf(" host.hostname = %s\n", pr->pr_host); 3900 db_printf(" host.domainname = %s\n", pr->pr_domain); 3901 db_printf(" host.hostuuid = %s\n", pr->pr_uuid); 3902 db_printf(" host.hostid = %lu\n", pr->pr_hostid); 3903 #ifdef INET 3904 db_printf(" ip4s = %d\n", pr->pr_ip4s); 3905 for (ii = 0; ii < pr->pr_ip4s; ii++) 3906 db_printf(" %s %s\n", 3907 ii == 0 ? "ip4 =" : " ", 3908 inet_ntoa(pr->pr_ip4[ii])); 3909 #endif 3910 #ifdef INET6 3911 db_printf(" ip6s = %d\n", pr->pr_ip6s); 3912 for (ii = 0; ii < pr->pr_ip6s; ii++) 3913 db_printf(" %s %s\n", 3914 ii == 0 ? "ip6 =" : " ", 3915 ip6_sprintf(ip6buf, &pr->pr_ip6[ii])); 3916 #endif 3917 } 3918 3919 DB_SHOW_COMMAND(prison, db_show_prison_command) 3920 { 3921 struct prison *pr; 3922 3923 if (!have_addr) { 3924 /* 3925 * Show all prisons in the list, and prison0 which is not 3926 * listed. 3927 */ 3928 db_show_prison(&prison0); 3929 if (!db_pager_quit) { 3930 TAILQ_FOREACH(pr, &allprison, pr_list) { 3931 db_show_prison(pr); 3932 if (db_pager_quit) 3933 break; 3934 } 3935 } 3936 return; 3937 } 3938 3939 if (addr == 0) 3940 pr = &prison0; 3941 else { 3942 /* Look for a prison with the ID and with references. */ 3943 TAILQ_FOREACH(pr, &allprison, pr_list) 3944 if (pr->pr_id == addr && pr->pr_ref > 0) 3945 break; 3946 if (pr == NULL) 3947 /* Look again, without requiring a reference. */ 3948 TAILQ_FOREACH(pr, &allprison, pr_list) 3949 if (pr->pr_id == addr) 3950 break; 3951 if (pr == NULL) 3952 /* Assume address points to a valid prison. */ 3953 pr = (struct prison *)addr; 3954 } 3955 db_show_prison(pr); 3956 } 3957 3958 #endif /* DDB */ 3959