1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_mac.h" 36 37 #include <sys/param.h> 38 #include <sys/domain.h> 39 #include <sys/fcntl.h> 40 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mbuf.h> 48 #include <sys/mutex.h> 49 #include <sys/namei.h> 50 #include <sys/proc.h> 51 #include <sys/protosw.h> 52 #include <sys/resourcevar.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/signalvar.h> 56 #include <sys/stat.h> 57 #include <sys/sx.h> 58 #include <sys/sysctl.h> 59 #include <sys/systm.h> 60 #include <sys/un.h> 61 #include <sys/unpcb.h> 62 #include <sys/vnode.h> 63 64 #include <vm/uma.h> 65 66 static uma_zone_t unp_zone; 67 static unp_gen_t unp_gencnt; 68 static u_int unp_count; 69 70 static struct unp_head unp_shead, unp_dhead; 71 72 /* 73 * Unix communications domain. 74 * 75 * TODO: 76 * SEQPACKET, RDM 77 * rethink name space problems 78 * need a proper out-of-band 79 * lock pushdown 80 */ 81 static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 82 static ino_t unp_ino; /* prototype for fake inode numbers */ 83 84 static struct mtx unp_mtx; 85 #define UNP_LOCK_INIT() \ 86 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) 87 #define UNP_LOCK() mtx_lock(&unp_mtx) 88 #define UNP_UNLOCK() mtx_unlock(&unp_mtx) 89 #define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) 90 91 static int unp_attach(struct socket *); 92 static void unp_detach(struct unpcb *); 93 static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); 94 static int unp_connect(struct socket *,struct sockaddr *, struct thread *); 95 static int unp_connect2(struct socket *so, struct socket *so2); 96 static void unp_disconnect(struct unpcb *); 97 static void unp_shutdown(struct unpcb *); 98 static void unp_drop(struct unpcb *, int); 99 static void unp_gc(void); 100 static void unp_scan(struct mbuf *, void (*)(struct file *)); 101 static void unp_mark(struct file *); 102 static void unp_discard(struct file *); 103 static void unp_freerights(struct file **, int); 104 static int unp_internalize(struct mbuf **, struct thread *); 105 static int unp_listen(struct unpcb *, struct thread *); 106 107 static int 108 uipc_abort(struct socket *so) 109 { 110 struct unpcb *unp = sotounpcb(so); 111 112 if (unp == NULL) 113 return (EINVAL); 114 UNP_LOCK(); 115 unp_drop(unp, ECONNABORTED); 116 unp_detach(unp); /* NB: unlocks */ 117 SOCK_LOCK(so); 118 sotryfree(so); 119 return (0); 120 } 121 122 static int 123 uipc_accept(struct socket *so, struct sockaddr **nam) 124 { 125 struct unpcb *unp = sotounpcb(so); 126 const struct sockaddr *sa; 127 128 if (unp == NULL) 129 return (EINVAL); 130 131 /* 132 * Pass back name of connected socket, 133 * if it was bound and we are still connected 134 * (our peer may have closed already!). 135 */ 136 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 137 UNP_LOCK(); 138 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) 139 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 140 else 141 sa = &sun_noname; 142 bcopy(sa, *nam, sa->sa_len); 143 UNP_UNLOCK(); 144 return (0); 145 } 146 147 static int 148 uipc_attach(struct socket *so, int proto, struct thread *td) 149 { 150 struct unpcb *unp = sotounpcb(so); 151 152 if (unp != NULL) 153 return (EISCONN); 154 return (unp_attach(so)); 155 } 156 157 static int 158 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 159 { 160 struct unpcb *unp = sotounpcb(so); 161 162 if (unp == NULL) 163 return (EINVAL); 164 165 return (unp_bind(unp, nam, td)); 166 } 167 168 static int 169 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 170 { 171 struct unpcb *unp = sotounpcb(so); 172 int error; 173 174 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 175 176 if (unp == NULL) 177 return (EINVAL); 178 UNP_LOCK(); 179 error = unp_connect(so, nam, td); 180 UNP_UNLOCK(); 181 return (error); 182 } 183 184 int 185 uipc_connect2(struct socket *so1, struct socket *so2) 186 { 187 struct unpcb *unp = sotounpcb(so1); 188 int error; 189 190 if (unp == NULL) 191 return (EINVAL); 192 193 UNP_LOCK(); 194 error = unp_connect2(so1, so2); 195 UNP_UNLOCK(); 196 return (error); 197 } 198 199 /* control is EOPNOTSUPP */ 200 201 static int 202 uipc_detach(struct socket *so) 203 { 204 struct unpcb *unp = sotounpcb(so); 205 206 if (unp == NULL) 207 return (EINVAL); 208 209 UNP_LOCK(); 210 unp_detach(unp); /* NB: unlocks unp */ 211 return (0); 212 } 213 214 static int 215 uipc_disconnect(struct socket *so) 216 { 217 struct unpcb *unp = sotounpcb(so); 218 219 if (unp == NULL) 220 return (EINVAL); 221 UNP_LOCK(); 222 unp_disconnect(unp); 223 UNP_UNLOCK(); 224 return (0); 225 } 226 227 static int 228 uipc_listen(struct socket *so, struct thread *td) 229 { 230 struct unpcb *unp = sotounpcb(so); 231 int error; 232 233 if (unp == NULL || unp->unp_vnode == NULL) 234 return (EINVAL); 235 UNP_LOCK(); 236 error = unp_listen(unp, td); 237 UNP_UNLOCK(); 238 return (error); 239 } 240 241 static int 242 uipc_peeraddr(struct socket *so, struct sockaddr **nam) 243 { 244 struct unpcb *unp = sotounpcb(so); 245 const struct sockaddr *sa; 246 247 if (unp == NULL) 248 return (EINVAL); 249 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 250 UNP_LOCK(); 251 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) 252 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 253 else { 254 /* 255 * XXX: It seems that this test always fails even when 256 * connection is established. So, this else clause is 257 * added as workaround to return PF_LOCAL sockaddr. 258 */ 259 sa = &sun_noname; 260 } 261 bcopy(sa, *nam, sa->sa_len); 262 UNP_UNLOCK(); 263 return (0); 264 } 265 266 static int 267 uipc_rcvd(struct socket *so, int flags) 268 { 269 struct unpcb *unp = sotounpcb(so); 270 struct socket *so2; 271 u_long newhiwat; 272 273 if (unp == NULL) 274 return (EINVAL); 275 UNP_LOCK(); 276 switch (so->so_type) { 277 case SOCK_DGRAM: 278 panic("uipc_rcvd DGRAM?"); 279 /*NOTREACHED*/ 280 281 case SOCK_STREAM: 282 if (unp->unp_conn == NULL) 283 break; 284 so2 = unp->unp_conn->unp_socket; 285 SOCKBUF_LOCK(&so2->so_snd); 286 SOCKBUF_LOCK(&so->so_rcv); 287 /* 288 * Adjust backpressure on sender 289 * and wakeup any waiting to write. 290 */ 291 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 292 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 293 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - 294 so->so_rcv.sb_cc; 295 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 296 newhiwat, RLIM_INFINITY); 297 unp->unp_cc = so->so_rcv.sb_cc; 298 SOCKBUF_UNLOCK(&so->so_rcv); 299 sowwakeup_locked(so2); 300 break; 301 302 default: 303 panic("uipc_rcvd unknown socktype"); 304 } 305 UNP_UNLOCK(); 306 return (0); 307 } 308 309 /* pru_rcvoob is EOPNOTSUPP */ 310 311 static int 312 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 313 struct mbuf *control, struct thread *td) 314 { 315 int error = 0; 316 struct unpcb *unp = sotounpcb(so); 317 struct socket *so2; 318 u_long newhiwat; 319 320 if (unp == NULL) { 321 error = EINVAL; 322 goto release; 323 } 324 if (flags & PRUS_OOB) { 325 error = EOPNOTSUPP; 326 goto release; 327 } 328 329 if (control != NULL && (error = unp_internalize(&control, td))) 330 goto release; 331 332 UNP_LOCK(); 333 switch (so->so_type) { 334 case SOCK_DGRAM: 335 { 336 const struct sockaddr *from; 337 338 if (nam != NULL) { 339 if (unp->unp_conn != NULL) { 340 error = EISCONN; 341 break; 342 } 343 error = unp_connect(so, nam, td); 344 if (error) 345 break; 346 } else { 347 if (unp->unp_conn == NULL) { 348 error = ENOTCONN; 349 break; 350 } 351 } 352 so2 = unp->unp_conn->unp_socket; 353 if (unp->unp_addr != NULL) 354 from = (struct sockaddr *)unp->unp_addr; 355 else 356 from = &sun_noname; 357 SOCKBUF_LOCK(&so2->so_rcv); 358 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 359 sorwakeup_locked(so2); 360 m = NULL; 361 control = NULL; 362 } else { 363 SOCKBUF_UNLOCK(&so2->so_rcv); 364 error = ENOBUFS; 365 } 366 if (nam != NULL) 367 unp_disconnect(unp); 368 break; 369 } 370 371 case SOCK_STREAM: 372 /* Connect if not connected yet. */ 373 /* 374 * Note: A better implementation would complain 375 * if not equal to the peer's address. 376 */ 377 if ((so->so_state & SS_ISCONNECTED) == 0) { 378 if (nam != NULL) { 379 error = unp_connect(so, nam, td); 380 if (error) 381 break; /* XXX */ 382 } else { 383 error = ENOTCONN; 384 break; 385 } 386 } 387 388 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 389 error = EPIPE; 390 break; 391 } 392 if (unp->unp_conn == NULL) 393 panic("uipc_send connected but no connection?"); 394 so2 = unp->unp_conn->unp_socket; 395 SOCKBUF_LOCK(&so2->so_rcv); 396 /* 397 * Send to paired receive port, and then reduce 398 * send buffer hiwater marks to maintain backpressure. 399 * Wake up readers. 400 */ 401 if (control != NULL) { 402 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 403 control = NULL; 404 } else { 405 sbappend_locked(&so2->so_rcv, m); 406 } 407 so->so_snd.sb_mbmax -= 408 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 409 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 410 newhiwat = so->so_snd.sb_hiwat - 411 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); 412 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 413 newhiwat, RLIM_INFINITY); 414 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 415 sorwakeup_locked(so2); 416 m = NULL; 417 break; 418 419 default: 420 panic("uipc_send unknown socktype"); 421 } 422 423 /* 424 * SEND_EOF is equivalent to a SEND followed by 425 * a SHUTDOWN. 426 */ 427 if (flags & PRUS_EOF) { 428 socantsendmore(so); 429 unp_shutdown(unp); 430 } 431 UNP_UNLOCK(); 432 433 if (control != NULL && error != 0) 434 unp_dispose(control); 435 436 release: 437 if (control != NULL) 438 m_freem(control); 439 if (m != NULL) 440 m_freem(m); 441 return (error); 442 } 443 444 static int 445 uipc_sense(struct socket *so, struct stat *sb) 446 { 447 struct unpcb *unp = sotounpcb(so); 448 struct socket *so2; 449 450 if (unp == NULL) 451 return (EINVAL); 452 UNP_LOCK(); 453 sb->st_blksize = so->so_snd.sb_hiwat; 454 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 455 so2 = unp->unp_conn->unp_socket; 456 sb->st_blksize += so2->so_rcv.sb_cc; 457 } 458 sb->st_dev = NODEV; 459 if (unp->unp_ino == 0) 460 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 461 sb->st_ino = unp->unp_ino; 462 UNP_UNLOCK(); 463 return (0); 464 } 465 466 static int 467 uipc_shutdown(struct socket *so) 468 { 469 struct unpcb *unp = sotounpcb(so); 470 471 if (unp == NULL) 472 return (EINVAL); 473 UNP_LOCK(); 474 socantsendmore(so); 475 unp_shutdown(unp); 476 UNP_UNLOCK(); 477 return (0); 478 } 479 480 static int 481 uipc_sockaddr(struct socket *so, struct sockaddr **nam) 482 { 483 struct unpcb *unp = sotounpcb(so); 484 const struct sockaddr *sa; 485 486 if (unp == NULL) 487 return (EINVAL); 488 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 489 UNP_LOCK(); 490 if (unp->unp_addr != NULL) 491 sa = (struct sockaddr *) unp->unp_addr; 492 else 493 sa = &sun_noname; 494 bcopy(sa, *nam, sa->sa_len); 495 UNP_UNLOCK(); 496 return (0); 497 } 498 499 struct pr_usrreqs uipc_usrreqs = { 500 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 501 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 502 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 503 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 504 sosend, soreceive, sopoll, pru_sosetlabel_null 505 }; 506 507 int 508 uipc_ctloutput(so, sopt) 509 struct socket *so; 510 struct sockopt *sopt; 511 { 512 struct unpcb *unp = sotounpcb(so); 513 struct xucred xu; 514 int error; 515 516 switch (sopt->sopt_dir) { 517 case SOPT_GET: 518 switch (sopt->sopt_name) { 519 case LOCAL_PEERCRED: 520 error = 0; 521 UNP_LOCK(); 522 if (unp->unp_flags & UNP_HAVEPC) 523 xu = unp->unp_peercred; 524 else { 525 if (so->so_type == SOCK_STREAM) 526 error = ENOTCONN; 527 else 528 error = EINVAL; 529 } 530 UNP_UNLOCK(); 531 if (error == 0) 532 error = sooptcopyout(sopt, &xu, sizeof(xu)); 533 break; 534 default: 535 error = EOPNOTSUPP; 536 break; 537 } 538 break; 539 case SOPT_SET: 540 default: 541 error = EOPNOTSUPP; 542 break; 543 } 544 return (error); 545 } 546 547 /* 548 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 549 * for stream sockets, although the total for sender and receiver is 550 * actually only PIPSIZ. 551 * Datagram sockets really use the sendspace as the maximum datagram size, 552 * and don't really want to reserve the sendspace. Their recvspace should 553 * be large enough for at least one max-size datagram plus address. 554 */ 555 #ifndef PIPSIZ 556 #define PIPSIZ 8192 557 #endif 558 static u_long unpst_sendspace = PIPSIZ; 559 static u_long unpst_recvspace = PIPSIZ; 560 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 561 static u_long unpdg_recvspace = 4*1024; 562 563 static int unp_rights; /* file descriptors in flight */ 564 565 SYSCTL_DECL(_net_local_stream); 566 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 567 &unpst_sendspace, 0, ""); 568 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 569 &unpst_recvspace, 0, ""); 570 SYSCTL_DECL(_net_local_dgram); 571 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 572 &unpdg_sendspace, 0, ""); 573 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 574 &unpdg_recvspace, 0, ""); 575 SYSCTL_DECL(_net_local); 576 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 577 578 static int 579 unp_attach(so) 580 struct socket *so; 581 { 582 register struct unpcb *unp; 583 int error; 584 585 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 586 switch (so->so_type) { 587 588 case SOCK_STREAM: 589 error = soreserve(so, unpst_sendspace, unpst_recvspace); 590 break; 591 592 case SOCK_DGRAM: 593 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 594 break; 595 596 default: 597 panic("unp_attach"); 598 } 599 if (error) 600 return (error); 601 } 602 unp = uma_zalloc(unp_zone, M_WAITOK); 603 if (unp == NULL) 604 return (ENOBUFS); 605 bzero(unp, sizeof *unp); 606 LIST_INIT(&unp->unp_refs); 607 unp->unp_socket = so; 608 609 UNP_LOCK(); 610 unp->unp_gencnt = ++unp_gencnt; 611 unp_count++; 612 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 613 : &unp_shead, unp, unp_link); 614 UNP_UNLOCK(); 615 616 so->so_pcb = unp; 617 return (0); 618 } 619 620 static void 621 unp_detach(unp) 622 register struct unpcb *unp; 623 { 624 struct vnode *vp; 625 626 UNP_LOCK_ASSERT(); 627 628 LIST_REMOVE(unp, unp_link); 629 unp->unp_gencnt = ++unp_gencnt; 630 --unp_count; 631 if ((vp = unp->unp_vnode) != NULL) { 632 /* 633 * XXXRW: should v_socket be frobbed only while holding 634 * Giant? 635 */ 636 unp->unp_vnode->v_socket = NULL; 637 unp->unp_vnode = NULL; 638 } 639 if (unp->unp_conn != NULL) 640 unp_disconnect(unp); 641 while (!LIST_EMPTY(&unp->unp_refs)) { 642 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 643 unp_drop(ref, ECONNRESET); 644 } 645 soisdisconnected(unp->unp_socket); 646 unp->unp_socket->so_pcb = NULL; 647 if (unp_rights) { 648 /* 649 * Normally the receive buffer is flushed later, 650 * in sofree, but if our receive buffer holds references 651 * to descriptors that are now garbage, we will dispose 652 * of those descriptor references after the garbage collector 653 * gets them (resulting in a "panic: closef: count < 0"). 654 */ 655 sorflush(unp->unp_socket); 656 unp_gc(); 657 } 658 UNP_UNLOCK(); 659 if (unp->unp_addr != NULL) 660 FREE(unp->unp_addr, M_SONAME); 661 uma_zfree(unp_zone, unp); 662 if (vp) { 663 mtx_lock(&Giant); 664 vrele(vp); 665 mtx_unlock(&Giant); 666 } 667 } 668 669 static int 670 unp_bind(unp, nam, td) 671 struct unpcb *unp; 672 struct sockaddr *nam; 673 struct thread *td; 674 { 675 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 676 struct vnode *vp; 677 struct mount *mp; 678 struct vattr vattr; 679 int error, namelen; 680 struct nameidata nd; 681 char *buf; 682 683 /* 684 * XXXRW: This test-and-set of unp_vnode is non-atomic; the 685 * unlocked read here is fine, but the value of unp_vnode needs 686 * to be tested again after we do all the lookups to see if the 687 * pcb is still unbound? 688 */ 689 if (unp->unp_vnode != NULL) 690 return (EINVAL); 691 692 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 693 if (namelen <= 0) 694 return (EINVAL); 695 696 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 697 strlcpy(buf, soun->sun_path, namelen + 1); 698 699 mtx_lock(&Giant); 700 restart: 701 mtx_assert(&Giant, MA_OWNED); 702 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, 703 buf, td); 704 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 705 error = namei(&nd); 706 if (error) 707 goto done; 708 vp = nd.ni_vp; 709 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 710 NDFREE(&nd, NDF_ONLY_PNBUF); 711 if (nd.ni_dvp == vp) 712 vrele(nd.ni_dvp); 713 else 714 vput(nd.ni_dvp); 715 if (vp != NULL) { 716 vrele(vp); 717 error = EADDRINUSE; 718 goto done; 719 } 720 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 721 if (error) 722 goto done; 723 goto restart; 724 } 725 VATTR_NULL(&vattr); 726 vattr.va_type = VSOCK; 727 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 728 #ifdef MAC 729 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 730 &vattr); 731 #endif 732 if (error == 0) { 733 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 734 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 735 } 736 NDFREE(&nd, NDF_ONLY_PNBUF); 737 vput(nd.ni_dvp); 738 if (error) 739 goto done; 740 vp = nd.ni_vp; 741 ASSERT_VOP_LOCKED(vp, "unp_bind"); 742 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 743 UNP_LOCK(); 744 vp->v_socket = unp->unp_socket; 745 unp->unp_vnode = vp; 746 unp->unp_addr = soun; 747 UNP_UNLOCK(); 748 VOP_UNLOCK(vp, 0, td); 749 vn_finished_write(mp); 750 done: 751 mtx_unlock(&Giant); 752 free(buf, M_TEMP); 753 return (error); 754 } 755 756 static int 757 unp_connect(so, nam, td) 758 struct socket *so; 759 struct sockaddr *nam; 760 struct thread *td; 761 { 762 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 763 register struct vnode *vp; 764 register struct socket *so2, *so3; 765 struct unpcb *unp = sotounpcb(so); 766 struct unpcb *unp2, *unp3; 767 int error, len; 768 struct nameidata nd; 769 char buf[SOCK_MAXADDRLEN]; 770 struct sockaddr *sa; 771 772 UNP_LOCK_ASSERT(); 773 774 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 775 if (len <= 0) 776 return (EINVAL); 777 strlcpy(buf, soun->sun_path, len + 1); 778 UNP_UNLOCK(); 779 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 780 mtx_lock(&Giant); 781 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 782 error = namei(&nd); 783 if (error) 784 vp = NULL; 785 else 786 vp = nd.ni_vp; 787 ASSERT_VOP_LOCKED(vp, "unp_connect"); 788 NDFREE(&nd, NDF_ONLY_PNBUF); 789 if (error) 790 goto bad; 791 792 if (vp->v_type != VSOCK) { 793 error = ENOTSOCK; 794 goto bad; 795 } 796 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 797 if (error) 798 goto bad; 799 mtx_unlock(&Giant); 800 UNP_LOCK(); 801 so2 = vp->v_socket; 802 if (so2 == NULL) { 803 error = ECONNREFUSED; 804 goto bad2; 805 } 806 if (so->so_type != so2->so_type) { 807 error = EPROTOTYPE; 808 goto bad2; 809 } 810 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 811 if (so2->so_options & SO_ACCEPTCONN) { 812 /* 813 * NB: drop locks here so unp_attach is entered 814 * w/o locks; this avoids a recursive lock 815 * of the head and holding sleep locks across 816 * a (potentially) blocking malloc. 817 */ 818 UNP_UNLOCK(); 819 so3 = sonewconn(so2, 0); 820 UNP_LOCK(); 821 } else 822 so3 = NULL; 823 if (so3 == NULL) { 824 error = ECONNREFUSED; 825 goto bad2; 826 } 827 unp = sotounpcb(so); 828 unp2 = sotounpcb(so2); 829 unp3 = sotounpcb(so3); 830 if (unp2->unp_addr != NULL) { 831 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 832 unp3->unp_addr = (struct sockaddr_un *) sa; 833 sa = NULL; 834 } 835 /* 836 * unp_peercred management: 837 * 838 * The connecter's (client's) credentials are copied 839 * from its process structure at the time of connect() 840 * (which is now). 841 */ 842 cru2x(td->td_ucred, &unp3->unp_peercred); 843 unp3->unp_flags |= UNP_HAVEPC; 844 /* 845 * The receiver's (server's) credentials are copied 846 * from the unp_peercred member of socket on which the 847 * former called listen(); unp_listen() cached that 848 * process's credentials at that time so we can use 849 * them now. 850 */ 851 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 852 ("unp_connect: listener without cached peercred")); 853 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 854 sizeof(unp->unp_peercred)); 855 unp->unp_flags |= UNP_HAVEPC; 856 #ifdef MAC 857 SOCK_LOCK(so); 858 mac_set_socket_peer_from_socket(so, so3); 859 mac_set_socket_peer_from_socket(so3, so); 860 SOCK_UNLOCK(so); 861 #endif 862 863 so2 = so3; 864 } 865 error = unp_connect2(so, so2); 866 bad2: 867 UNP_UNLOCK(); 868 mtx_lock(&Giant); 869 bad: 870 mtx_assert(&Giant, MA_OWNED); 871 if (vp != NULL) 872 vput(vp); 873 mtx_unlock(&Giant); 874 free(sa, M_SONAME); 875 UNP_LOCK(); 876 return (error); 877 } 878 879 static int 880 unp_connect2(so, so2) 881 register struct socket *so; 882 register struct socket *so2; 883 { 884 register struct unpcb *unp = sotounpcb(so); 885 register struct unpcb *unp2; 886 887 UNP_LOCK_ASSERT(); 888 889 if (so2->so_type != so->so_type) 890 return (EPROTOTYPE); 891 unp2 = sotounpcb(so2); 892 unp->unp_conn = unp2; 893 switch (so->so_type) { 894 895 case SOCK_DGRAM: 896 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 897 soisconnected(so); 898 break; 899 900 case SOCK_STREAM: 901 unp2->unp_conn = unp; 902 soisconnected(so); 903 soisconnected(so2); 904 break; 905 906 default: 907 panic("unp_connect2"); 908 } 909 return (0); 910 } 911 912 static void 913 unp_disconnect(unp) 914 struct unpcb *unp; 915 { 916 register struct unpcb *unp2 = unp->unp_conn; 917 struct socket *so; 918 919 UNP_LOCK_ASSERT(); 920 921 if (unp2 == NULL) 922 return; 923 unp->unp_conn = NULL; 924 switch (unp->unp_socket->so_type) { 925 926 case SOCK_DGRAM: 927 LIST_REMOVE(unp, unp_reflink); 928 so = unp->unp_socket; 929 SOCK_LOCK(so); 930 so->so_state &= ~SS_ISCONNECTED; 931 SOCK_UNLOCK(so); 932 break; 933 934 case SOCK_STREAM: 935 soisdisconnected(unp->unp_socket); 936 unp2->unp_conn = NULL; 937 soisdisconnected(unp2->unp_socket); 938 break; 939 } 940 } 941 942 #ifdef notdef 943 void 944 unp_abort(unp) 945 struct unpcb *unp; 946 { 947 948 unp_detach(unp); 949 } 950 #endif 951 952 /* 953 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed 954 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers 955 * are safe to reference. It first scans the list of struct unpcb's to 956 * generate a pointer list, then it rescans its list one entry at a time to 957 * externalize and copyout. It checks the generation number to see if a 958 * struct unpcb has been reused, and will skip it if so. 959 */ 960 static int 961 unp_pcblist(SYSCTL_HANDLER_ARGS) 962 { 963 int error, i, n; 964 struct unpcb *unp, **unp_list; 965 unp_gen_t gencnt; 966 struct xunpgen *xug; 967 struct unp_head *head; 968 struct xunpcb *xu; 969 970 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 971 972 /* 973 * The process of preparing the PCB list is too time-consuming and 974 * resource-intensive to repeat twice on every request. 975 */ 976 if (req->oldptr == NULL) { 977 n = unp_count; 978 req->oldidx = 2 * (sizeof *xug) 979 + (n + n/8) * sizeof(struct xunpcb); 980 return (0); 981 } 982 983 if (req->newptr != NULL) 984 return (EPERM); 985 986 /* 987 * OK, now we're committed to doing something. 988 */ 989 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 990 UNP_LOCK(); 991 gencnt = unp_gencnt; 992 n = unp_count; 993 UNP_UNLOCK(); 994 995 xug->xug_len = sizeof *xug; 996 xug->xug_count = n; 997 xug->xug_gen = gencnt; 998 xug->xug_sogen = so_gencnt; 999 error = SYSCTL_OUT(req, xug, sizeof *xug); 1000 if (error) { 1001 free(xug, M_TEMP); 1002 return (error); 1003 } 1004 1005 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1006 1007 UNP_LOCK(); 1008 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1009 unp = LIST_NEXT(unp, unp_link)) { 1010 if (unp->unp_gencnt <= gencnt) { 1011 if (cr_cansee(req->td->td_ucred, 1012 unp->unp_socket->so_cred)) 1013 continue; 1014 unp_list[i++] = unp; 1015 } 1016 } 1017 UNP_UNLOCK(); 1018 n = i; /* in case we lost some during malloc */ 1019 1020 error = 0; 1021 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK); 1022 for (i = 0; i < n; i++) { 1023 unp = unp_list[i]; 1024 if (unp->unp_gencnt <= gencnt) { 1025 xu->xu_len = sizeof *xu; 1026 xu->xu_unpp = unp; 1027 /* 1028 * XXX - need more locking here to protect against 1029 * connect/disconnect races for SMP. 1030 */ 1031 if (unp->unp_addr != NULL) 1032 bcopy(unp->unp_addr, &xu->xu_addr, 1033 unp->unp_addr->sun_len); 1034 if (unp->unp_conn != NULL && 1035 unp->unp_conn->unp_addr != NULL) 1036 bcopy(unp->unp_conn->unp_addr, 1037 &xu->xu_caddr, 1038 unp->unp_conn->unp_addr->sun_len); 1039 bcopy(unp, &xu->xu_unp, sizeof *unp); 1040 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1041 error = SYSCTL_OUT(req, xu, sizeof *xu); 1042 } 1043 } 1044 free(xu, M_TEMP); 1045 if (!error) { 1046 /* 1047 * Give the user an updated idea of our state. 1048 * If the generation differs from what we told 1049 * her before, she knows that something happened 1050 * while we were processing this request, and it 1051 * might be necessary to retry. 1052 */ 1053 xug->xug_gen = unp_gencnt; 1054 xug->xug_sogen = so_gencnt; 1055 xug->xug_count = unp_count; 1056 error = SYSCTL_OUT(req, xug, sizeof *xug); 1057 } 1058 free(unp_list, M_TEMP); 1059 free(xug, M_TEMP); 1060 return (error); 1061 } 1062 1063 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1064 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1065 "List of active local datagram sockets"); 1066 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1067 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1068 "List of active local stream sockets"); 1069 1070 static void 1071 unp_shutdown(unp) 1072 struct unpcb *unp; 1073 { 1074 struct socket *so; 1075 1076 UNP_LOCK_ASSERT(); 1077 1078 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 1079 (so = unp->unp_conn->unp_socket)) 1080 socantrcvmore(so); 1081 } 1082 1083 static void 1084 unp_drop(unp, errno) 1085 struct unpcb *unp; 1086 int errno; 1087 { 1088 struct socket *so = unp->unp_socket; 1089 1090 UNP_LOCK_ASSERT(); 1091 1092 so->so_error = errno; 1093 unp_disconnect(unp); 1094 } 1095 1096 #ifdef notdef 1097 void 1098 unp_drain() 1099 { 1100 1101 } 1102 #endif 1103 1104 static void 1105 unp_freerights(rp, fdcount) 1106 struct file **rp; 1107 int fdcount; 1108 { 1109 int i; 1110 struct file *fp; 1111 1112 for (i = 0; i < fdcount; i++) { 1113 fp = *rp; 1114 /* 1115 * zero the pointer before calling 1116 * unp_discard since it may end up 1117 * in unp_gc().. 1118 */ 1119 *rp++ = 0; 1120 unp_discard(fp); 1121 } 1122 } 1123 1124 int 1125 unp_externalize(control, controlp) 1126 struct mbuf *control, **controlp; 1127 { 1128 struct thread *td = curthread; /* XXX */ 1129 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1130 int i; 1131 int *fdp; 1132 struct file **rp; 1133 struct file *fp; 1134 void *data; 1135 socklen_t clen = control->m_len, datalen; 1136 int error, newfds; 1137 int f; 1138 u_int newlen; 1139 1140 error = 0; 1141 if (controlp != NULL) /* controlp == NULL => free control messages */ 1142 *controlp = NULL; 1143 1144 while (cm != NULL) { 1145 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1146 error = EINVAL; 1147 break; 1148 } 1149 1150 data = CMSG_DATA(cm); 1151 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1152 1153 if (cm->cmsg_level == SOL_SOCKET 1154 && cm->cmsg_type == SCM_RIGHTS) { 1155 newfds = datalen / sizeof(struct file *); 1156 rp = data; 1157 1158 /* If we're not outputting the descriptors free them. */ 1159 if (error || controlp == NULL) { 1160 unp_freerights(rp, newfds); 1161 goto next; 1162 } 1163 FILEDESC_LOCK(td->td_proc->p_fd); 1164 /* if the new FD's will not fit free them. */ 1165 if (!fdavail(td, newfds)) { 1166 FILEDESC_UNLOCK(td->td_proc->p_fd); 1167 error = EMSGSIZE; 1168 unp_freerights(rp, newfds); 1169 goto next; 1170 } 1171 /* 1172 * now change each pointer to an fd in the global 1173 * table to an integer that is the index to the 1174 * local fd table entry that we set up to point 1175 * to the global one we are transferring. 1176 */ 1177 newlen = newfds * sizeof(int); 1178 *controlp = sbcreatecontrol(NULL, newlen, 1179 SCM_RIGHTS, SOL_SOCKET); 1180 if (*controlp == NULL) { 1181 FILEDESC_UNLOCK(td->td_proc->p_fd); 1182 error = E2BIG; 1183 unp_freerights(rp, newfds); 1184 goto next; 1185 } 1186 1187 fdp = (int *) 1188 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1189 for (i = 0; i < newfds; i++) { 1190 if (fdalloc(td, 0, &f)) 1191 panic("unp_externalize fdalloc failed"); 1192 fp = *rp++; 1193 td->td_proc->p_fd->fd_ofiles[f] = fp; 1194 FILE_LOCK(fp); 1195 fp->f_msgcount--; 1196 FILE_UNLOCK(fp); 1197 unp_rights--; 1198 *fdp++ = f; 1199 } 1200 FILEDESC_UNLOCK(td->td_proc->p_fd); 1201 } else { /* We can just copy anything else across */ 1202 if (error || controlp == NULL) 1203 goto next; 1204 *controlp = sbcreatecontrol(NULL, datalen, 1205 cm->cmsg_type, cm->cmsg_level); 1206 if (*controlp == NULL) { 1207 error = ENOBUFS; 1208 goto next; 1209 } 1210 bcopy(data, 1211 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1212 datalen); 1213 } 1214 1215 controlp = &(*controlp)->m_next; 1216 1217 next: 1218 if (CMSG_SPACE(datalen) < clen) { 1219 clen -= CMSG_SPACE(datalen); 1220 cm = (struct cmsghdr *) 1221 ((caddr_t)cm + CMSG_SPACE(datalen)); 1222 } else { 1223 clen = 0; 1224 cm = NULL; 1225 } 1226 } 1227 1228 m_freem(control); 1229 1230 return (error); 1231 } 1232 1233 void 1234 unp_init(void) 1235 { 1236 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1237 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1238 if (unp_zone == NULL) 1239 panic("unp_init"); 1240 uma_zone_set_max(unp_zone, nmbclusters); 1241 LIST_INIT(&unp_dhead); 1242 LIST_INIT(&unp_shead); 1243 1244 UNP_LOCK_INIT(); 1245 } 1246 1247 static int 1248 unp_internalize(controlp, td) 1249 struct mbuf **controlp; 1250 struct thread *td; 1251 { 1252 struct mbuf *control = *controlp; 1253 struct proc *p = td->td_proc; 1254 struct filedesc *fdescp = p->p_fd; 1255 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1256 struct cmsgcred *cmcred; 1257 struct file **rp; 1258 struct file *fp; 1259 struct timeval *tv; 1260 int i, fd, *fdp; 1261 void *data; 1262 socklen_t clen = control->m_len, datalen; 1263 int error, oldfds; 1264 u_int newlen; 1265 1266 error = 0; 1267 *controlp = NULL; 1268 1269 while (cm != NULL) { 1270 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1271 || cm->cmsg_len > clen) { 1272 error = EINVAL; 1273 goto out; 1274 } 1275 1276 data = CMSG_DATA(cm); 1277 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1278 1279 switch (cm->cmsg_type) { 1280 /* 1281 * Fill in credential information. 1282 */ 1283 case SCM_CREDS: 1284 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1285 SCM_CREDS, SOL_SOCKET); 1286 if (*controlp == NULL) { 1287 error = ENOBUFS; 1288 goto out; 1289 } 1290 1291 cmcred = (struct cmsgcred *) 1292 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1293 cmcred->cmcred_pid = p->p_pid; 1294 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1295 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1296 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1297 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1298 CMGROUP_MAX); 1299 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1300 cmcred->cmcred_groups[i] = 1301 td->td_ucred->cr_groups[i]; 1302 break; 1303 1304 case SCM_RIGHTS: 1305 oldfds = datalen / sizeof (int); 1306 /* 1307 * check that all the FDs passed in refer to legal files 1308 * If not, reject the entire operation. 1309 */ 1310 fdp = data; 1311 FILEDESC_LOCK(fdescp); 1312 for (i = 0; i < oldfds; i++) { 1313 fd = *fdp++; 1314 if ((unsigned)fd >= fdescp->fd_nfiles || 1315 fdescp->fd_ofiles[fd] == NULL) { 1316 FILEDESC_UNLOCK(fdescp); 1317 error = EBADF; 1318 goto out; 1319 } 1320 fp = fdescp->fd_ofiles[fd]; 1321 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1322 FILEDESC_UNLOCK(fdescp); 1323 error = EOPNOTSUPP; 1324 goto out; 1325 } 1326 1327 } 1328 /* 1329 * Now replace the integer FDs with pointers to 1330 * the associated global file table entry.. 1331 */ 1332 newlen = oldfds * sizeof(struct file *); 1333 *controlp = sbcreatecontrol(NULL, newlen, 1334 SCM_RIGHTS, SOL_SOCKET); 1335 if (*controlp == NULL) { 1336 FILEDESC_UNLOCK(fdescp); 1337 error = E2BIG; 1338 goto out; 1339 } 1340 1341 fdp = data; 1342 rp = (struct file **) 1343 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1344 for (i = 0; i < oldfds; i++) { 1345 fp = fdescp->fd_ofiles[*fdp++]; 1346 *rp++ = fp; 1347 FILE_LOCK(fp); 1348 fp->f_count++; 1349 fp->f_msgcount++; 1350 FILE_UNLOCK(fp); 1351 unp_rights++; 1352 } 1353 FILEDESC_UNLOCK(fdescp); 1354 break; 1355 1356 case SCM_TIMESTAMP: 1357 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1358 SCM_TIMESTAMP, SOL_SOCKET); 1359 if (*controlp == NULL) { 1360 error = ENOBUFS; 1361 goto out; 1362 } 1363 tv = (struct timeval *) 1364 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1365 microtime(tv); 1366 break; 1367 1368 default: 1369 error = EINVAL; 1370 goto out; 1371 } 1372 1373 controlp = &(*controlp)->m_next; 1374 1375 if (CMSG_SPACE(datalen) < clen) { 1376 clen -= CMSG_SPACE(datalen); 1377 cm = (struct cmsghdr *) 1378 ((caddr_t)cm + CMSG_SPACE(datalen)); 1379 } else { 1380 clen = 0; 1381 cm = NULL; 1382 } 1383 } 1384 1385 out: 1386 m_freem(control); 1387 1388 return (error); 1389 } 1390 1391 static int unp_defer, unp_gcing; 1392 1393 static void 1394 unp_gc() 1395 { 1396 register struct file *fp, *nextfp; 1397 register struct socket *so; 1398 struct file **extra_ref, **fpp; 1399 int nunref, i; 1400 int nfiles_snap; 1401 int nfiles_slack = 20; 1402 1403 UNP_LOCK_ASSERT(); 1404 1405 if (unp_gcing) 1406 return; 1407 unp_gcing = 1; 1408 unp_defer = 0; 1409 /* 1410 * before going through all this, set all FDs to 1411 * be NOT defered and NOT externally accessible 1412 */ 1413 /* 1414 * XXXRW: Acquiring a sleep lock while holding UNP 1415 * mutex cannot be a good thing. 1416 */ 1417 sx_slock(&filelist_lock); 1418 LIST_FOREACH(fp, &filehead, f_list) 1419 fp->f_gcflag &= ~(FMARK|FDEFER); 1420 do { 1421 LIST_FOREACH(fp, &filehead, f_list) { 1422 FILE_LOCK(fp); 1423 /* 1424 * If the file is not open, skip it 1425 */ 1426 if (fp->f_count == 0) { 1427 FILE_UNLOCK(fp); 1428 continue; 1429 } 1430 /* 1431 * If we already marked it as 'defer' in a 1432 * previous pass, then try process it this time 1433 * and un-mark it 1434 */ 1435 if (fp->f_gcflag & FDEFER) { 1436 fp->f_gcflag &= ~FDEFER; 1437 unp_defer--; 1438 } else { 1439 /* 1440 * if it's not defered, then check if it's 1441 * already marked.. if so skip it 1442 */ 1443 if (fp->f_gcflag & FMARK) { 1444 FILE_UNLOCK(fp); 1445 continue; 1446 } 1447 /* 1448 * If all references are from messages 1449 * in transit, then skip it. it's not 1450 * externally accessible. 1451 */ 1452 if (fp->f_count == fp->f_msgcount) { 1453 FILE_UNLOCK(fp); 1454 continue; 1455 } 1456 /* 1457 * If it got this far then it must be 1458 * externally accessible. 1459 */ 1460 fp->f_gcflag |= FMARK; 1461 } 1462 /* 1463 * either it was defered, or it is externally 1464 * accessible and not already marked so. 1465 * Now check if it is possibly one of OUR sockets. 1466 */ 1467 if (fp->f_type != DTYPE_SOCKET || 1468 (so = fp->f_data) == NULL) { 1469 FILE_UNLOCK(fp); 1470 continue; 1471 } 1472 FILE_UNLOCK(fp); 1473 if (so->so_proto->pr_domain != &localdomain || 1474 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1475 continue; 1476 #ifdef notdef 1477 if (so->so_rcv.sb_flags & SB_LOCK) { 1478 /* 1479 * This is problematical; it's not clear 1480 * we need to wait for the sockbuf to be 1481 * unlocked (on a uniprocessor, at least), 1482 * and it's also not clear what to do 1483 * if sbwait returns an error due to receipt 1484 * of a signal. If sbwait does return 1485 * an error, we'll go into an infinite 1486 * loop. Delete all of this for now. 1487 */ 1488 (void) sbwait(&so->so_rcv); 1489 goto restart; 1490 } 1491 #endif 1492 /* 1493 * So, Ok, it's one of our sockets and it IS externally 1494 * accessible (or was defered). Now we look 1495 * to see if we hold any file descriptors in its 1496 * message buffers. Follow those links and mark them 1497 * as accessible too. 1498 */ 1499 SOCKBUF_LOCK(&so->so_rcv); 1500 unp_scan(so->so_rcv.sb_mb, unp_mark); 1501 SOCKBUF_UNLOCK(&so->so_rcv); 1502 } 1503 } while (unp_defer); 1504 sx_sunlock(&filelist_lock); 1505 /* 1506 * We grab an extra reference to each of the file table entries 1507 * that are not otherwise accessible and then free the rights 1508 * that are stored in messages on them. 1509 * 1510 * The bug in the orginal code is a little tricky, so I'll describe 1511 * what's wrong with it here. 1512 * 1513 * It is incorrect to simply unp_discard each entry for f_msgcount 1514 * times -- consider the case of sockets A and B that contain 1515 * references to each other. On a last close of some other socket, 1516 * we trigger a gc since the number of outstanding rights (unp_rights) 1517 * is non-zero. If during the sweep phase the gc code un_discards, 1518 * we end up doing a (full) closef on the descriptor. A closef on A 1519 * results in the following chain. Closef calls soo_close, which 1520 * calls soclose. Soclose calls first (through the switch 1521 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1522 * returns because the previous instance had set unp_gcing, and 1523 * we return all the way back to soclose, which marks the socket 1524 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1525 * to free up the rights that are queued in messages on the socket A, 1526 * i.e., the reference on B. The sorflush calls via the dom_dispose 1527 * switch unp_dispose, which unp_scans with unp_discard. This second 1528 * instance of unp_discard just calls closef on B. 1529 * 1530 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1531 * which results in another closef on A. Unfortunately, A is already 1532 * being closed, and the descriptor has already been marked with 1533 * SS_NOFDREF, and soclose panics at this point. 1534 * 1535 * Here, we first take an extra reference to each inaccessible 1536 * descriptor. Then, we call sorflush ourself, since we know 1537 * it is a Unix domain socket anyhow. After we destroy all the 1538 * rights carried in messages, we do a last closef to get rid 1539 * of our extra reference. This is the last close, and the 1540 * unp_detach etc will shut down the socket. 1541 * 1542 * 91/09/19, bsy@cs.cmu.edu 1543 */ 1544 again: 1545 nfiles_snap = nfiles + nfiles_slack; /* some slack */ 1546 extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP, 1547 M_WAITOK); 1548 sx_slock(&filelist_lock); 1549 if (nfiles_snap < nfiles) { 1550 sx_sunlock(&filelist_lock); 1551 free(extra_ref, M_TEMP); 1552 nfiles_slack += 20; 1553 goto again; 1554 } 1555 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 1556 fp != NULL; fp = nextfp) { 1557 nextfp = LIST_NEXT(fp, f_list); 1558 FILE_LOCK(fp); 1559 /* 1560 * If it's not open, skip it 1561 */ 1562 if (fp->f_count == 0) { 1563 FILE_UNLOCK(fp); 1564 continue; 1565 } 1566 /* 1567 * If all refs are from msgs, and it's not marked accessible 1568 * then it must be referenced from some unreachable cycle 1569 * of (shut-down) FDs, so include it in our 1570 * list of FDs to remove 1571 */ 1572 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { 1573 *fpp++ = fp; 1574 nunref++; 1575 fp->f_count++; 1576 } 1577 FILE_UNLOCK(fp); 1578 } 1579 sx_sunlock(&filelist_lock); 1580 /* 1581 * for each FD on our hit list, do the following two things 1582 */ 1583 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1584 struct file *tfp = *fpp; 1585 FILE_LOCK(tfp); 1586 if (tfp->f_type == DTYPE_SOCKET && 1587 tfp->f_data != NULL) { 1588 FILE_UNLOCK(tfp); 1589 sorflush(tfp->f_data); 1590 } else { 1591 FILE_UNLOCK(tfp); 1592 } 1593 } 1594 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1595 closef(*fpp, (struct thread *) NULL); 1596 free(extra_ref, M_TEMP); 1597 unp_gcing = 0; 1598 } 1599 1600 void 1601 unp_dispose(m) 1602 struct mbuf *m; 1603 { 1604 1605 if (m) 1606 unp_scan(m, unp_discard); 1607 } 1608 1609 static int 1610 unp_listen(unp, td) 1611 struct unpcb *unp; 1612 struct thread *td; 1613 { 1614 UNP_LOCK_ASSERT(); 1615 1616 /* 1617 * XXXRW: Why populate the local peer cred with our own credential? 1618 */ 1619 cru2x(td->td_ucred, &unp->unp_peercred); 1620 unp->unp_flags |= UNP_HAVEPCCACHED; 1621 return (0); 1622 } 1623 1624 static void 1625 unp_scan(m0, op) 1626 register struct mbuf *m0; 1627 void (*op)(struct file *); 1628 { 1629 struct mbuf *m; 1630 struct file **rp; 1631 struct cmsghdr *cm; 1632 void *data; 1633 int i; 1634 socklen_t clen, datalen; 1635 int qfds; 1636 1637 while (m0 != NULL) { 1638 for (m = m0; m; m = m->m_next) { 1639 if (m->m_type != MT_CONTROL) 1640 continue; 1641 1642 cm = mtod(m, struct cmsghdr *); 1643 clen = m->m_len; 1644 1645 while (cm != NULL) { 1646 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 1647 break; 1648 1649 data = CMSG_DATA(cm); 1650 datalen = (caddr_t)cm + cm->cmsg_len 1651 - (caddr_t)data; 1652 1653 if (cm->cmsg_level == SOL_SOCKET && 1654 cm->cmsg_type == SCM_RIGHTS) { 1655 qfds = datalen / sizeof (struct file *); 1656 rp = data; 1657 for (i = 0; i < qfds; i++) 1658 (*op)(*rp++); 1659 } 1660 1661 if (CMSG_SPACE(datalen) < clen) { 1662 clen -= CMSG_SPACE(datalen); 1663 cm = (struct cmsghdr *) 1664 ((caddr_t)cm + CMSG_SPACE(datalen)); 1665 } else { 1666 clen = 0; 1667 cm = NULL; 1668 } 1669 } 1670 } 1671 m0 = m0->m_act; 1672 } 1673 } 1674 1675 static void 1676 unp_mark(fp) 1677 struct file *fp; 1678 { 1679 if (fp->f_gcflag & FMARK) 1680 return; 1681 unp_defer++; 1682 fp->f_gcflag |= (FMARK|FDEFER); 1683 } 1684 1685 static void 1686 unp_discard(fp) 1687 struct file *fp; 1688 { 1689 FILE_LOCK(fp); 1690 fp->f_msgcount--; 1691 unp_rights--; 1692 FILE_UNLOCK(fp); 1693 (void) closef(fp, (struct thread *)NULL); 1694 } 1695