1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_mac.h" 36 37 #include <sys/param.h> 38 #include <sys/domain.h> 39 #include <sys/fcntl.h> 40 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 41 #include <sys/file.h> 42 #include <sys/filedesc.h> 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/mbuf.h> 48 #include <sys/mutex.h> 49 #include <sys/namei.h> 50 #include <sys/proc.h> 51 #include <sys/protosw.h> 52 #include <sys/resourcevar.h> 53 #include <sys/socket.h> 54 #include <sys/socketvar.h> 55 #include <sys/signalvar.h> 56 #include <sys/stat.h> 57 #include <sys/sx.h> 58 #include <sys/sysctl.h> 59 #include <sys/systm.h> 60 #include <sys/un.h> 61 #include <sys/unpcb.h> 62 #include <sys/vnode.h> 63 64 #include <vm/uma.h> 65 66 static uma_zone_t unp_zone; 67 static unp_gen_t unp_gencnt; 68 static u_int unp_count; 69 70 static struct unp_head unp_shead, unp_dhead; 71 72 /* 73 * Unix communications domain. 74 * 75 * TODO: 76 * SEQPACKET, RDM 77 * rethink name space problems 78 * need a proper out-of-band 79 * lock pushdown 80 */ 81 static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 82 static ino_t unp_ino; /* prototype for fake inode numbers */ 83 84 static struct mtx unp_mtx; 85 #define UNP_LOCK_INIT() \ 86 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) 87 #define UNP_LOCK() mtx_lock(&unp_mtx) 88 #define UNP_UNLOCK() mtx_unlock(&unp_mtx) 89 #define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) 90 91 static int unp_attach(struct socket *); 92 static void unp_detach(struct unpcb *); 93 static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); 94 static int unp_connect(struct socket *,struct sockaddr *, struct thread *); 95 static int unp_connect2(struct socket *so, struct socket *so2); 96 static void unp_disconnect(struct unpcb *); 97 static void unp_shutdown(struct unpcb *); 98 static void unp_drop(struct unpcb *, int); 99 static void unp_gc(void); 100 static void unp_scan(struct mbuf *, void (*)(struct file *)); 101 static void unp_mark(struct file *); 102 static void unp_discard(struct file *); 103 static void unp_freerights(struct file **, int); 104 static int unp_internalize(struct mbuf **, struct thread *); 105 static int unp_listen(struct unpcb *, struct thread *); 106 107 static int 108 uipc_abort(struct socket *so) 109 { 110 struct unpcb *unp = sotounpcb(so); 111 112 if (unp == NULL) 113 return (EINVAL); 114 UNP_LOCK(); 115 unp_drop(unp, ECONNABORTED); 116 unp_detach(unp); /* NB: unlocks */ 117 SOCK_LOCK(so); 118 sotryfree(so); 119 return (0); 120 } 121 122 static int 123 uipc_accept(struct socket *so, struct sockaddr **nam) 124 { 125 struct unpcb *unp = sotounpcb(so); 126 const struct sockaddr *sa; 127 128 if (unp == NULL) 129 return (EINVAL); 130 131 /* 132 * Pass back name of connected socket, 133 * if it was bound and we are still connected 134 * (our peer may have closed already!). 135 */ 136 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 137 UNP_LOCK(); 138 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) 139 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 140 else 141 sa = &sun_noname; 142 bcopy(sa, *nam, sa->sa_len); 143 UNP_UNLOCK(); 144 return (0); 145 } 146 147 static int 148 uipc_attach(struct socket *so, int proto, struct thread *td) 149 { 150 struct unpcb *unp = sotounpcb(so); 151 152 if (unp != NULL) 153 return (EISCONN); 154 return (unp_attach(so)); 155 } 156 157 static int 158 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 159 { 160 struct unpcb *unp = sotounpcb(so); 161 162 if (unp == NULL) 163 return (EINVAL); 164 165 return (unp_bind(unp, nam, td)); 166 } 167 168 static int 169 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 170 { 171 struct unpcb *unp = sotounpcb(so); 172 int error; 173 174 if (unp == NULL) 175 return (EINVAL); 176 UNP_LOCK(); 177 error = unp_connect(so, nam, curthread); 178 UNP_UNLOCK(); 179 return (error); 180 } 181 182 int 183 uipc_connect2(struct socket *so1, struct socket *so2) 184 { 185 struct unpcb *unp = sotounpcb(so1); 186 int error; 187 188 if (unp == NULL) 189 return (EINVAL); 190 191 UNP_LOCK(); 192 error = unp_connect2(so1, so2); 193 UNP_UNLOCK(); 194 return (error); 195 } 196 197 /* control is EOPNOTSUPP */ 198 199 static int 200 uipc_detach(struct socket *so) 201 { 202 struct unpcb *unp = sotounpcb(so); 203 204 if (unp == NULL) 205 return (EINVAL); 206 207 UNP_LOCK(); 208 unp_detach(unp); /* NB: unlocks unp */ 209 return (0); 210 } 211 212 static int 213 uipc_disconnect(struct socket *so) 214 { 215 struct unpcb *unp = sotounpcb(so); 216 217 if (unp == NULL) 218 return (EINVAL); 219 UNP_LOCK(); 220 unp_disconnect(unp); 221 UNP_UNLOCK(); 222 return (0); 223 } 224 225 static int 226 uipc_listen(struct socket *so, struct thread *td) 227 { 228 struct unpcb *unp = sotounpcb(so); 229 int error; 230 231 if (unp == NULL || unp->unp_vnode == NULL) 232 return (EINVAL); 233 UNP_LOCK(); 234 error = unp_listen(unp, td); 235 UNP_UNLOCK(); 236 return (error); 237 } 238 239 static int 240 uipc_peeraddr(struct socket *so, struct sockaddr **nam) 241 { 242 struct unpcb *unp = sotounpcb(so); 243 const struct sockaddr *sa; 244 245 if (unp == NULL) 246 return (EINVAL); 247 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 248 UNP_LOCK(); 249 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) 250 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 251 else { 252 /* 253 * XXX: It seems that this test always fails even when 254 * connection is established. So, this else clause is 255 * added as workaround to return PF_LOCAL sockaddr. 256 */ 257 sa = &sun_noname; 258 } 259 bcopy(sa, *nam, sa->sa_len); 260 UNP_UNLOCK(); 261 return (0); 262 } 263 264 static int 265 uipc_rcvd(struct socket *so, int flags) 266 { 267 struct unpcb *unp = sotounpcb(so); 268 struct socket *so2; 269 u_long newhiwat; 270 271 if (unp == NULL) 272 return (EINVAL); 273 UNP_LOCK(); 274 switch (so->so_type) { 275 case SOCK_DGRAM: 276 panic("uipc_rcvd DGRAM?"); 277 /*NOTREACHED*/ 278 279 case SOCK_STREAM: 280 if (unp->unp_conn == NULL) 281 break; 282 so2 = unp->unp_conn->unp_socket; 283 SOCKBUF_LOCK(&so2->so_snd); 284 SOCKBUF_LOCK(&so->so_rcv); 285 /* 286 * Adjust backpressure on sender 287 * and wakeup any waiting to write. 288 */ 289 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 290 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 291 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - 292 so->so_rcv.sb_cc; 293 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 294 newhiwat, RLIM_INFINITY); 295 unp->unp_cc = so->so_rcv.sb_cc; 296 SOCKBUF_UNLOCK(&so->so_rcv); 297 sowwakeup_locked(so2); 298 break; 299 300 default: 301 panic("uipc_rcvd unknown socktype"); 302 } 303 UNP_UNLOCK(); 304 return (0); 305 } 306 307 /* pru_rcvoob is EOPNOTSUPP */ 308 309 static int 310 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 311 struct mbuf *control, struct thread *td) 312 { 313 int error = 0; 314 struct unpcb *unp = sotounpcb(so); 315 struct socket *so2; 316 u_long newhiwat; 317 318 if (unp == NULL) { 319 error = EINVAL; 320 goto release; 321 } 322 if (flags & PRUS_OOB) { 323 error = EOPNOTSUPP; 324 goto release; 325 } 326 327 if (control != NULL && (error = unp_internalize(&control, td))) 328 goto release; 329 330 UNP_LOCK(); 331 switch (so->so_type) { 332 case SOCK_DGRAM: 333 { 334 const struct sockaddr *from; 335 336 if (nam != NULL) { 337 if (unp->unp_conn != NULL) { 338 error = EISCONN; 339 break; 340 } 341 error = unp_connect(so, nam, td); 342 if (error) 343 break; 344 } else { 345 if (unp->unp_conn == NULL) { 346 error = ENOTCONN; 347 break; 348 } 349 } 350 so2 = unp->unp_conn->unp_socket; 351 if (unp->unp_addr != NULL) 352 from = (struct sockaddr *)unp->unp_addr; 353 else 354 from = &sun_noname; 355 SOCKBUF_LOCK(&so2->so_rcv); 356 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 357 sorwakeup_locked(so2); 358 m = NULL; 359 control = NULL; 360 } else { 361 SOCKBUF_UNLOCK(&so2->so_rcv); 362 error = ENOBUFS; 363 } 364 if (nam != NULL) 365 unp_disconnect(unp); 366 break; 367 } 368 369 case SOCK_STREAM: 370 /* Connect if not connected yet. */ 371 /* 372 * Note: A better implementation would complain 373 * if not equal to the peer's address. 374 */ 375 if ((so->so_state & SS_ISCONNECTED) == 0) { 376 if (nam != NULL) { 377 error = unp_connect(so, nam, td); 378 if (error) 379 break; /* XXX */ 380 } else { 381 error = ENOTCONN; 382 break; 383 } 384 } 385 386 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 387 error = EPIPE; 388 break; 389 } 390 if (unp->unp_conn == NULL) 391 panic("uipc_send connected but no connection?"); 392 so2 = unp->unp_conn->unp_socket; 393 SOCKBUF_LOCK(&so2->so_rcv); 394 /* 395 * Send to paired receive port, and then reduce 396 * send buffer hiwater marks to maintain backpressure. 397 * Wake up readers. 398 */ 399 if (control != NULL) { 400 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 401 control = NULL; 402 } else { 403 sbappend_locked(&so2->so_rcv, m); 404 } 405 so->so_snd.sb_mbmax -= 406 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 407 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 408 newhiwat = so->so_snd.sb_hiwat - 409 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); 410 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 411 newhiwat, RLIM_INFINITY); 412 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 413 sorwakeup_locked(so2); 414 m = NULL; 415 break; 416 417 default: 418 panic("uipc_send unknown socktype"); 419 } 420 421 /* 422 * SEND_EOF is equivalent to a SEND followed by 423 * a SHUTDOWN. 424 */ 425 if (flags & PRUS_EOF) { 426 socantsendmore(so); 427 unp_shutdown(unp); 428 } 429 UNP_UNLOCK(); 430 431 if (control != NULL && error != 0) 432 unp_dispose(control); 433 434 release: 435 if (control != NULL) 436 m_freem(control); 437 if (m != NULL) 438 m_freem(m); 439 return (error); 440 } 441 442 static int 443 uipc_sense(struct socket *so, struct stat *sb) 444 { 445 struct unpcb *unp = sotounpcb(so); 446 struct socket *so2; 447 448 if (unp == NULL) 449 return (EINVAL); 450 UNP_LOCK(); 451 sb->st_blksize = so->so_snd.sb_hiwat; 452 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 453 so2 = unp->unp_conn->unp_socket; 454 sb->st_blksize += so2->so_rcv.sb_cc; 455 } 456 sb->st_dev = NODEV; 457 if (unp->unp_ino == 0) 458 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 459 sb->st_ino = unp->unp_ino; 460 UNP_UNLOCK(); 461 return (0); 462 } 463 464 static int 465 uipc_shutdown(struct socket *so) 466 { 467 struct unpcb *unp = sotounpcb(so); 468 469 if (unp == NULL) 470 return (EINVAL); 471 UNP_LOCK(); 472 socantsendmore(so); 473 unp_shutdown(unp); 474 UNP_UNLOCK(); 475 return (0); 476 } 477 478 static int 479 uipc_sockaddr(struct socket *so, struct sockaddr **nam) 480 { 481 struct unpcb *unp = sotounpcb(so); 482 const struct sockaddr *sa; 483 484 if (unp == NULL) 485 return (EINVAL); 486 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 487 UNP_LOCK(); 488 if (unp->unp_addr != NULL) 489 sa = (struct sockaddr *) unp->unp_addr; 490 else 491 sa = &sun_noname; 492 bcopy(sa, *nam, sa->sa_len); 493 UNP_UNLOCK(); 494 return (0); 495 } 496 497 struct pr_usrreqs uipc_usrreqs = { 498 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 499 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 500 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 501 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 502 sosend, soreceive, sopoll, pru_sosetlabel_null 503 }; 504 505 int 506 uipc_ctloutput(so, sopt) 507 struct socket *so; 508 struct sockopt *sopt; 509 { 510 struct unpcb *unp = sotounpcb(so); 511 struct xucred xu; 512 int error; 513 514 switch (sopt->sopt_dir) { 515 case SOPT_GET: 516 switch (sopt->sopt_name) { 517 case LOCAL_PEERCRED: 518 error = 0; 519 UNP_LOCK(); 520 if (unp->unp_flags & UNP_HAVEPC) 521 xu = unp->unp_peercred; 522 else { 523 if (so->so_type == SOCK_STREAM) 524 error = ENOTCONN; 525 else 526 error = EINVAL; 527 } 528 UNP_UNLOCK(); 529 if (error == 0) 530 error = sooptcopyout(sopt, &xu, sizeof(xu)); 531 break; 532 default: 533 error = EOPNOTSUPP; 534 break; 535 } 536 break; 537 case SOPT_SET: 538 default: 539 error = EOPNOTSUPP; 540 break; 541 } 542 return (error); 543 } 544 545 /* 546 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 547 * for stream sockets, although the total for sender and receiver is 548 * actually only PIPSIZ. 549 * Datagram sockets really use the sendspace as the maximum datagram size, 550 * and don't really want to reserve the sendspace. Their recvspace should 551 * be large enough for at least one max-size datagram plus address. 552 */ 553 #ifndef PIPSIZ 554 #define PIPSIZ 8192 555 #endif 556 static u_long unpst_sendspace = PIPSIZ; 557 static u_long unpst_recvspace = PIPSIZ; 558 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 559 static u_long unpdg_recvspace = 4*1024; 560 561 static int unp_rights; /* file descriptors in flight */ 562 563 SYSCTL_DECL(_net_local_stream); 564 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 565 &unpst_sendspace, 0, ""); 566 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 567 &unpst_recvspace, 0, ""); 568 SYSCTL_DECL(_net_local_dgram); 569 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 570 &unpdg_sendspace, 0, ""); 571 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 572 &unpdg_recvspace, 0, ""); 573 SYSCTL_DECL(_net_local); 574 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 575 576 static int 577 unp_attach(so) 578 struct socket *so; 579 { 580 register struct unpcb *unp; 581 int error; 582 583 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 584 switch (so->so_type) { 585 586 case SOCK_STREAM: 587 error = soreserve(so, unpst_sendspace, unpst_recvspace); 588 break; 589 590 case SOCK_DGRAM: 591 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 592 break; 593 594 default: 595 panic("unp_attach"); 596 } 597 if (error) 598 return (error); 599 } 600 unp = uma_zalloc(unp_zone, M_WAITOK); 601 if (unp == NULL) 602 return (ENOBUFS); 603 bzero(unp, sizeof *unp); 604 LIST_INIT(&unp->unp_refs); 605 unp->unp_socket = so; 606 607 UNP_LOCK(); 608 unp->unp_gencnt = ++unp_gencnt; 609 unp_count++; 610 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 611 : &unp_shead, unp, unp_link); 612 UNP_UNLOCK(); 613 614 so->so_pcb = unp; 615 return (0); 616 } 617 618 static void 619 unp_detach(unp) 620 register struct unpcb *unp; 621 { 622 struct vnode *vp; 623 624 UNP_LOCK_ASSERT(); 625 626 LIST_REMOVE(unp, unp_link); 627 unp->unp_gencnt = ++unp_gencnt; 628 --unp_count; 629 if ((vp = unp->unp_vnode) != NULL) { 630 /* 631 * XXXRW: should v_socket be frobbed only while holding 632 * Giant? 633 */ 634 unp->unp_vnode->v_socket = NULL; 635 unp->unp_vnode = NULL; 636 } 637 if (unp->unp_conn != NULL) 638 unp_disconnect(unp); 639 while (!LIST_EMPTY(&unp->unp_refs)) { 640 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 641 unp_drop(ref, ECONNRESET); 642 } 643 soisdisconnected(unp->unp_socket); 644 unp->unp_socket->so_pcb = NULL; 645 if (unp_rights) { 646 /* 647 * Normally the receive buffer is flushed later, 648 * in sofree, but if our receive buffer holds references 649 * to descriptors that are now garbage, we will dispose 650 * of those descriptor references after the garbage collector 651 * gets them (resulting in a "panic: closef: count < 0"). 652 */ 653 sorflush(unp->unp_socket); 654 unp_gc(); 655 } 656 UNP_UNLOCK(); 657 if (unp->unp_addr != NULL) 658 FREE(unp->unp_addr, M_SONAME); 659 uma_zfree(unp_zone, unp); 660 if (vp) { 661 mtx_lock(&Giant); 662 vrele(vp); 663 mtx_unlock(&Giant); 664 } 665 } 666 667 static int 668 unp_bind(unp, nam, td) 669 struct unpcb *unp; 670 struct sockaddr *nam; 671 struct thread *td; 672 { 673 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 674 struct vnode *vp; 675 struct mount *mp; 676 struct vattr vattr; 677 int error, namelen; 678 struct nameidata nd; 679 char *buf; 680 681 /* 682 * XXXRW: This test-and-set of unp_vnode is non-atomic; the 683 * unlocked read here is fine, but the value of unp_vnode needs 684 * to be tested again after we do all the lookups to see if the 685 * pcb is still unbound? 686 */ 687 if (unp->unp_vnode != NULL) 688 return (EINVAL); 689 690 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 691 if (namelen <= 0) 692 return (EINVAL); 693 694 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 695 strlcpy(buf, soun->sun_path, namelen + 1); 696 697 mtx_lock(&Giant); 698 restart: 699 mtx_assert(&Giant, MA_OWNED); 700 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, 701 buf, td); 702 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 703 error = namei(&nd); 704 if (error) 705 goto done; 706 vp = nd.ni_vp; 707 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 708 NDFREE(&nd, NDF_ONLY_PNBUF); 709 if (nd.ni_dvp == vp) 710 vrele(nd.ni_dvp); 711 else 712 vput(nd.ni_dvp); 713 if (vp != NULL) { 714 vrele(vp); 715 error = EADDRINUSE; 716 goto done; 717 } 718 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 719 if (error) 720 goto done; 721 goto restart; 722 } 723 VATTR_NULL(&vattr); 724 vattr.va_type = VSOCK; 725 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 726 #ifdef MAC 727 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 728 &vattr); 729 #endif 730 if (error == 0) { 731 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 732 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 733 } 734 NDFREE(&nd, NDF_ONLY_PNBUF); 735 vput(nd.ni_dvp); 736 if (error) 737 goto done; 738 vp = nd.ni_vp; 739 ASSERT_VOP_LOCKED(vp, "unp_bind"); 740 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 741 UNP_LOCK(); 742 vp->v_socket = unp->unp_socket; 743 unp->unp_vnode = vp; 744 unp->unp_addr = soun; 745 UNP_UNLOCK(); 746 VOP_UNLOCK(vp, 0, td); 747 vn_finished_write(mp); 748 done: 749 mtx_unlock(&Giant); 750 free(buf, M_TEMP); 751 return (error); 752 } 753 754 static int 755 unp_connect(so, nam, td) 756 struct socket *so; 757 struct sockaddr *nam; 758 struct thread *td; 759 { 760 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 761 register struct vnode *vp; 762 register struct socket *so2, *so3; 763 struct unpcb *unp = sotounpcb(so); 764 struct unpcb *unp2, *unp3; 765 int error, len; 766 struct nameidata nd; 767 char buf[SOCK_MAXADDRLEN]; 768 struct sockaddr *sa; 769 770 UNP_LOCK_ASSERT(); 771 772 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 773 if (len <= 0) 774 return (EINVAL); 775 strlcpy(buf, soun->sun_path, len + 1); 776 UNP_UNLOCK(); 777 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 778 mtx_lock(&Giant); 779 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 780 error = namei(&nd); 781 if (error) 782 vp = NULL; 783 else 784 vp = nd.ni_vp; 785 ASSERT_VOP_LOCKED(vp, "unp_connect"); 786 NDFREE(&nd, NDF_ONLY_PNBUF); 787 if (error) 788 goto bad; 789 790 if (vp->v_type != VSOCK) { 791 error = ENOTSOCK; 792 goto bad; 793 } 794 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 795 if (error) 796 goto bad; 797 so2 = vp->v_socket; 798 if (so2 == NULL) { 799 error = ECONNREFUSED; 800 goto bad; 801 } 802 if (so->so_type != so2->so_type) { 803 error = EPROTOTYPE; 804 goto bad; 805 } 806 mtx_unlock(&Giant); 807 UNP_LOCK(); 808 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 809 if (so2->so_options & SO_ACCEPTCONN) { 810 /* 811 * NB: drop locks here so unp_attach is entered 812 * w/o locks; this avoids a recursive lock 813 * of the head and holding sleep locks across 814 * a (potentially) blocking malloc. 815 */ 816 UNP_UNLOCK(); 817 so3 = sonewconn(so2, 0); 818 UNP_LOCK(); 819 } else 820 so3 = NULL; 821 if (so3 == NULL) { 822 error = ECONNREFUSED; 823 goto bad2; 824 } 825 unp = sotounpcb(so); 826 unp2 = sotounpcb(so2); 827 unp3 = sotounpcb(so3); 828 if (unp2->unp_addr != NULL) { 829 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 830 unp3->unp_addr = (struct sockaddr_un *) sa; 831 sa = NULL; 832 } 833 /* 834 * unp_peercred management: 835 * 836 * The connecter's (client's) credentials are copied 837 * from its process structure at the time of connect() 838 * (which is now). 839 */ 840 cru2x(td->td_ucred, &unp3->unp_peercred); 841 unp3->unp_flags |= UNP_HAVEPC; 842 /* 843 * The receiver's (server's) credentials are copied 844 * from the unp_peercred member of socket on which the 845 * former called listen(); unp_listen() cached that 846 * process's credentials at that time so we can use 847 * them now. 848 */ 849 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 850 ("unp_connect: listener without cached peercred")); 851 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 852 sizeof(unp->unp_peercred)); 853 unp->unp_flags |= UNP_HAVEPC; 854 #ifdef MAC 855 SOCK_LOCK(so); 856 mac_set_socket_peer_from_socket(so, so3); 857 mac_set_socket_peer_from_socket(so3, so); 858 SOCK_UNLOCK(so); 859 #endif 860 861 so2 = so3; 862 } 863 error = unp_connect2(so, so2); 864 bad2: 865 UNP_UNLOCK(); 866 mtx_lock(&Giant); 867 bad: 868 mtx_assert(&Giant, MA_OWNED); 869 if (vp != NULL) 870 vput(vp); 871 mtx_unlock(&Giant); 872 free(sa, M_SONAME); 873 UNP_LOCK(); 874 return (error); 875 } 876 877 static int 878 unp_connect2(so, so2) 879 register struct socket *so; 880 register struct socket *so2; 881 { 882 register struct unpcb *unp = sotounpcb(so); 883 register struct unpcb *unp2; 884 885 UNP_LOCK_ASSERT(); 886 887 if (so2->so_type != so->so_type) 888 return (EPROTOTYPE); 889 unp2 = sotounpcb(so2); 890 unp->unp_conn = unp2; 891 switch (so->so_type) { 892 893 case SOCK_DGRAM: 894 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 895 soisconnected(so); 896 break; 897 898 case SOCK_STREAM: 899 unp2->unp_conn = unp; 900 soisconnected(so); 901 soisconnected(so2); 902 break; 903 904 default: 905 panic("unp_connect2"); 906 } 907 return (0); 908 } 909 910 static void 911 unp_disconnect(unp) 912 struct unpcb *unp; 913 { 914 register struct unpcb *unp2 = unp->unp_conn; 915 struct socket *so; 916 917 UNP_LOCK_ASSERT(); 918 919 if (unp2 == NULL) 920 return; 921 unp->unp_conn = NULL; 922 switch (unp->unp_socket->so_type) { 923 924 case SOCK_DGRAM: 925 LIST_REMOVE(unp, unp_reflink); 926 so = unp->unp_socket; 927 SOCK_LOCK(so); 928 so->so_state &= ~SS_ISCONNECTED; 929 SOCK_UNLOCK(so); 930 break; 931 932 case SOCK_STREAM: 933 soisdisconnected(unp->unp_socket); 934 unp2->unp_conn = NULL; 935 soisdisconnected(unp2->unp_socket); 936 break; 937 } 938 } 939 940 #ifdef notdef 941 void 942 unp_abort(unp) 943 struct unpcb *unp; 944 { 945 946 unp_detach(unp); 947 } 948 #endif 949 950 /* 951 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed 952 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers 953 * are safe to reference. It first scans the list of struct unpcb's to 954 * generate a pointer list, then it rescans its list one entry at a time to 955 * externalize and copyout. It checks the generation number to see if a 956 * struct unpcb has been reused, and will skip it if so. 957 */ 958 static int 959 unp_pcblist(SYSCTL_HANDLER_ARGS) 960 { 961 int error, i, n; 962 struct unpcb *unp, **unp_list; 963 unp_gen_t gencnt; 964 struct xunpgen *xug; 965 struct unp_head *head; 966 struct xunpcb *xu; 967 968 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 969 970 /* 971 * The process of preparing the PCB list is too time-consuming and 972 * resource-intensive to repeat twice on every request. 973 */ 974 if (req->oldptr == NULL) { 975 n = unp_count; 976 req->oldidx = 2 * (sizeof *xug) 977 + (n + n/8) * sizeof(struct xunpcb); 978 return (0); 979 } 980 981 if (req->newptr != NULL) 982 return (EPERM); 983 984 /* 985 * OK, now we're committed to doing something. 986 */ 987 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 988 UNP_LOCK(); 989 gencnt = unp_gencnt; 990 n = unp_count; 991 UNP_UNLOCK(); 992 993 xug->xug_len = sizeof *xug; 994 xug->xug_count = n; 995 xug->xug_gen = gencnt; 996 xug->xug_sogen = so_gencnt; 997 error = SYSCTL_OUT(req, xug, sizeof *xug); 998 if (error) { 999 free(xug, M_TEMP); 1000 return (error); 1001 } 1002 1003 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1004 1005 UNP_LOCK(); 1006 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1007 unp = LIST_NEXT(unp, unp_link)) { 1008 if (unp->unp_gencnt <= gencnt) { 1009 if (cr_cansee(req->td->td_ucred, 1010 unp->unp_socket->so_cred)) 1011 continue; 1012 unp_list[i++] = unp; 1013 } 1014 } 1015 UNP_UNLOCK(); 1016 n = i; /* in case we lost some during malloc */ 1017 1018 error = 0; 1019 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK); 1020 for (i = 0; i < n; i++) { 1021 unp = unp_list[i]; 1022 if (unp->unp_gencnt <= gencnt) { 1023 xu->xu_len = sizeof *xu; 1024 xu->xu_unpp = unp; 1025 /* 1026 * XXX - need more locking here to protect against 1027 * connect/disconnect races for SMP. 1028 */ 1029 if (unp->unp_addr != NULL) 1030 bcopy(unp->unp_addr, &xu->xu_addr, 1031 unp->unp_addr->sun_len); 1032 if (unp->unp_conn != NULL && 1033 unp->unp_conn->unp_addr != NULL) 1034 bcopy(unp->unp_conn->unp_addr, 1035 &xu->xu_caddr, 1036 unp->unp_conn->unp_addr->sun_len); 1037 bcopy(unp, &xu->xu_unp, sizeof *unp); 1038 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1039 error = SYSCTL_OUT(req, xu, sizeof *xu); 1040 } 1041 } 1042 free(xu, M_TEMP); 1043 if (!error) { 1044 /* 1045 * Give the user an updated idea of our state. 1046 * If the generation differs from what we told 1047 * her before, she knows that something happened 1048 * while we were processing this request, and it 1049 * might be necessary to retry. 1050 */ 1051 xug->xug_gen = unp_gencnt; 1052 xug->xug_sogen = so_gencnt; 1053 xug->xug_count = unp_count; 1054 error = SYSCTL_OUT(req, xug, sizeof *xug); 1055 } 1056 free(unp_list, M_TEMP); 1057 free(xug, M_TEMP); 1058 return (error); 1059 } 1060 1061 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1062 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1063 "List of active local datagram sockets"); 1064 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1065 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1066 "List of active local stream sockets"); 1067 1068 static void 1069 unp_shutdown(unp) 1070 struct unpcb *unp; 1071 { 1072 struct socket *so; 1073 1074 UNP_LOCK_ASSERT(); 1075 1076 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 1077 (so = unp->unp_conn->unp_socket)) 1078 socantrcvmore(so); 1079 } 1080 1081 static void 1082 unp_drop(unp, errno) 1083 struct unpcb *unp; 1084 int errno; 1085 { 1086 struct socket *so = unp->unp_socket; 1087 1088 UNP_LOCK_ASSERT(); 1089 1090 so->so_error = errno; 1091 unp_disconnect(unp); 1092 } 1093 1094 #ifdef notdef 1095 void 1096 unp_drain() 1097 { 1098 1099 } 1100 #endif 1101 1102 static void 1103 unp_freerights(rp, fdcount) 1104 struct file **rp; 1105 int fdcount; 1106 { 1107 int i; 1108 struct file *fp; 1109 1110 for (i = 0; i < fdcount; i++) { 1111 fp = *rp; 1112 /* 1113 * zero the pointer before calling 1114 * unp_discard since it may end up 1115 * in unp_gc().. 1116 */ 1117 *rp++ = 0; 1118 unp_discard(fp); 1119 } 1120 } 1121 1122 int 1123 unp_externalize(control, controlp) 1124 struct mbuf *control, **controlp; 1125 { 1126 struct thread *td = curthread; /* XXX */ 1127 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1128 int i; 1129 int *fdp; 1130 struct file **rp; 1131 struct file *fp; 1132 void *data; 1133 socklen_t clen = control->m_len, datalen; 1134 int error, newfds; 1135 int f; 1136 u_int newlen; 1137 1138 error = 0; 1139 if (controlp != NULL) /* controlp == NULL => free control messages */ 1140 *controlp = NULL; 1141 1142 while (cm != NULL) { 1143 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1144 error = EINVAL; 1145 break; 1146 } 1147 1148 data = CMSG_DATA(cm); 1149 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1150 1151 if (cm->cmsg_level == SOL_SOCKET 1152 && cm->cmsg_type == SCM_RIGHTS) { 1153 newfds = datalen / sizeof(struct file *); 1154 rp = data; 1155 1156 /* If we're not outputting the descriptors free them. */ 1157 if (error || controlp == NULL) { 1158 unp_freerights(rp, newfds); 1159 goto next; 1160 } 1161 FILEDESC_LOCK(td->td_proc->p_fd); 1162 /* if the new FD's will not fit free them. */ 1163 if (!fdavail(td, newfds)) { 1164 FILEDESC_UNLOCK(td->td_proc->p_fd); 1165 error = EMSGSIZE; 1166 unp_freerights(rp, newfds); 1167 goto next; 1168 } 1169 /* 1170 * now change each pointer to an fd in the global 1171 * table to an integer that is the index to the 1172 * local fd table entry that we set up to point 1173 * to the global one we are transferring. 1174 */ 1175 newlen = newfds * sizeof(int); 1176 *controlp = sbcreatecontrol(NULL, newlen, 1177 SCM_RIGHTS, SOL_SOCKET); 1178 if (*controlp == NULL) { 1179 FILEDESC_UNLOCK(td->td_proc->p_fd); 1180 error = E2BIG; 1181 unp_freerights(rp, newfds); 1182 goto next; 1183 } 1184 1185 fdp = (int *) 1186 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1187 for (i = 0; i < newfds; i++) { 1188 if (fdalloc(td, 0, &f)) 1189 panic("unp_externalize fdalloc failed"); 1190 fp = *rp++; 1191 td->td_proc->p_fd->fd_ofiles[f] = fp; 1192 FILE_LOCK(fp); 1193 fp->f_msgcount--; 1194 FILE_UNLOCK(fp); 1195 unp_rights--; 1196 *fdp++ = f; 1197 } 1198 FILEDESC_UNLOCK(td->td_proc->p_fd); 1199 } else { /* We can just copy anything else across */ 1200 if (error || controlp == NULL) 1201 goto next; 1202 *controlp = sbcreatecontrol(NULL, datalen, 1203 cm->cmsg_type, cm->cmsg_level); 1204 if (*controlp == NULL) { 1205 error = ENOBUFS; 1206 goto next; 1207 } 1208 bcopy(data, 1209 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1210 datalen); 1211 } 1212 1213 controlp = &(*controlp)->m_next; 1214 1215 next: 1216 if (CMSG_SPACE(datalen) < clen) { 1217 clen -= CMSG_SPACE(datalen); 1218 cm = (struct cmsghdr *) 1219 ((caddr_t)cm + CMSG_SPACE(datalen)); 1220 } else { 1221 clen = 0; 1222 cm = NULL; 1223 } 1224 } 1225 1226 m_freem(control); 1227 1228 return (error); 1229 } 1230 1231 void 1232 unp_init(void) 1233 { 1234 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1235 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1236 if (unp_zone == NULL) 1237 panic("unp_init"); 1238 uma_zone_set_max(unp_zone, nmbclusters); 1239 LIST_INIT(&unp_dhead); 1240 LIST_INIT(&unp_shead); 1241 1242 UNP_LOCK_INIT(); 1243 } 1244 1245 static int 1246 unp_internalize(controlp, td) 1247 struct mbuf **controlp; 1248 struct thread *td; 1249 { 1250 struct mbuf *control = *controlp; 1251 struct proc *p = td->td_proc; 1252 struct filedesc *fdescp = p->p_fd; 1253 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1254 struct cmsgcred *cmcred; 1255 struct file **rp; 1256 struct file *fp; 1257 struct timeval *tv; 1258 int i, fd, *fdp; 1259 void *data; 1260 socklen_t clen = control->m_len, datalen; 1261 int error, oldfds; 1262 u_int newlen; 1263 1264 error = 0; 1265 *controlp = NULL; 1266 1267 while (cm != NULL) { 1268 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1269 || cm->cmsg_len > clen) { 1270 error = EINVAL; 1271 goto out; 1272 } 1273 1274 data = CMSG_DATA(cm); 1275 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1276 1277 switch (cm->cmsg_type) { 1278 /* 1279 * Fill in credential information. 1280 */ 1281 case SCM_CREDS: 1282 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1283 SCM_CREDS, SOL_SOCKET); 1284 if (*controlp == NULL) { 1285 error = ENOBUFS; 1286 goto out; 1287 } 1288 1289 cmcred = (struct cmsgcred *) 1290 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1291 cmcred->cmcred_pid = p->p_pid; 1292 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1293 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1294 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1295 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1296 CMGROUP_MAX); 1297 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1298 cmcred->cmcred_groups[i] = 1299 td->td_ucred->cr_groups[i]; 1300 break; 1301 1302 case SCM_RIGHTS: 1303 oldfds = datalen / sizeof (int); 1304 /* 1305 * check that all the FDs passed in refer to legal files 1306 * If not, reject the entire operation. 1307 */ 1308 fdp = data; 1309 FILEDESC_LOCK(fdescp); 1310 for (i = 0; i < oldfds; i++) { 1311 fd = *fdp++; 1312 if ((unsigned)fd >= fdescp->fd_nfiles || 1313 fdescp->fd_ofiles[fd] == NULL) { 1314 FILEDESC_UNLOCK(fdescp); 1315 error = EBADF; 1316 goto out; 1317 } 1318 fp = fdescp->fd_ofiles[fd]; 1319 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1320 FILEDESC_UNLOCK(fdescp); 1321 error = EOPNOTSUPP; 1322 goto out; 1323 } 1324 1325 } 1326 /* 1327 * Now replace the integer FDs with pointers to 1328 * the associated global file table entry.. 1329 */ 1330 newlen = oldfds * sizeof(struct file *); 1331 *controlp = sbcreatecontrol(NULL, newlen, 1332 SCM_RIGHTS, SOL_SOCKET); 1333 if (*controlp == NULL) { 1334 FILEDESC_UNLOCK(fdescp); 1335 error = E2BIG; 1336 goto out; 1337 } 1338 1339 fdp = data; 1340 rp = (struct file **) 1341 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1342 for (i = 0; i < oldfds; i++) { 1343 fp = fdescp->fd_ofiles[*fdp++]; 1344 *rp++ = fp; 1345 FILE_LOCK(fp); 1346 fp->f_count++; 1347 fp->f_msgcount++; 1348 FILE_UNLOCK(fp); 1349 unp_rights++; 1350 } 1351 FILEDESC_UNLOCK(fdescp); 1352 break; 1353 1354 case SCM_TIMESTAMP: 1355 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1356 SCM_TIMESTAMP, SOL_SOCKET); 1357 if (*controlp == NULL) { 1358 error = ENOBUFS; 1359 goto out; 1360 } 1361 tv = (struct timeval *) 1362 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1363 microtime(tv); 1364 break; 1365 1366 default: 1367 error = EINVAL; 1368 goto out; 1369 } 1370 1371 controlp = &(*controlp)->m_next; 1372 1373 if (CMSG_SPACE(datalen) < clen) { 1374 clen -= CMSG_SPACE(datalen); 1375 cm = (struct cmsghdr *) 1376 ((caddr_t)cm + CMSG_SPACE(datalen)); 1377 } else { 1378 clen = 0; 1379 cm = NULL; 1380 } 1381 } 1382 1383 out: 1384 m_freem(control); 1385 1386 return (error); 1387 } 1388 1389 static int unp_defer, unp_gcing; 1390 1391 static void 1392 unp_gc() 1393 { 1394 register struct file *fp, *nextfp; 1395 register struct socket *so; 1396 struct file **extra_ref, **fpp; 1397 int nunref, i; 1398 int nfiles_snap; 1399 int nfiles_slack = 20; 1400 1401 UNP_LOCK_ASSERT(); 1402 1403 if (unp_gcing) 1404 return; 1405 unp_gcing = 1; 1406 unp_defer = 0; 1407 /* 1408 * before going through all this, set all FDs to 1409 * be NOT defered and NOT externally accessible 1410 */ 1411 /* 1412 * XXXRW: Acquiring a sleep lock while holding UNP 1413 * mutex cannot be a good thing. 1414 */ 1415 sx_slock(&filelist_lock); 1416 LIST_FOREACH(fp, &filehead, f_list) 1417 fp->f_gcflag &= ~(FMARK|FDEFER); 1418 do { 1419 LIST_FOREACH(fp, &filehead, f_list) { 1420 FILE_LOCK(fp); 1421 /* 1422 * If the file is not open, skip it 1423 */ 1424 if (fp->f_count == 0) { 1425 FILE_UNLOCK(fp); 1426 continue; 1427 } 1428 /* 1429 * If we already marked it as 'defer' in a 1430 * previous pass, then try process it this time 1431 * and un-mark it 1432 */ 1433 if (fp->f_gcflag & FDEFER) { 1434 fp->f_gcflag &= ~FDEFER; 1435 unp_defer--; 1436 } else { 1437 /* 1438 * if it's not defered, then check if it's 1439 * already marked.. if so skip it 1440 */ 1441 if (fp->f_gcflag & FMARK) { 1442 FILE_UNLOCK(fp); 1443 continue; 1444 } 1445 /* 1446 * If all references are from messages 1447 * in transit, then skip it. it's not 1448 * externally accessible. 1449 */ 1450 if (fp->f_count == fp->f_msgcount) { 1451 FILE_UNLOCK(fp); 1452 continue; 1453 } 1454 /* 1455 * If it got this far then it must be 1456 * externally accessible. 1457 */ 1458 fp->f_gcflag |= FMARK; 1459 } 1460 /* 1461 * either it was defered, or it is externally 1462 * accessible and not already marked so. 1463 * Now check if it is possibly one of OUR sockets. 1464 */ 1465 if (fp->f_type != DTYPE_SOCKET || 1466 (so = fp->f_data) == NULL) { 1467 FILE_UNLOCK(fp); 1468 continue; 1469 } 1470 FILE_UNLOCK(fp); 1471 if (so->so_proto->pr_domain != &localdomain || 1472 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1473 continue; 1474 #ifdef notdef 1475 if (so->so_rcv.sb_flags & SB_LOCK) { 1476 /* 1477 * This is problematical; it's not clear 1478 * we need to wait for the sockbuf to be 1479 * unlocked (on a uniprocessor, at least), 1480 * and it's also not clear what to do 1481 * if sbwait returns an error due to receipt 1482 * of a signal. If sbwait does return 1483 * an error, we'll go into an infinite 1484 * loop. Delete all of this for now. 1485 */ 1486 (void) sbwait(&so->so_rcv); 1487 goto restart; 1488 } 1489 #endif 1490 /* 1491 * So, Ok, it's one of our sockets and it IS externally 1492 * accessible (or was defered). Now we look 1493 * to see if we hold any file descriptors in its 1494 * message buffers. Follow those links and mark them 1495 * as accessible too. 1496 */ 1497 SOCKBUF_LOCK(&so->so_rcv); 1498 unp_scan(so->so_rcv.sb_mb, unp_mark); 1499 SOCKBUF_UNLOCK(&so->so_rcv); 1500 } 1501 } while (unp_defer); 1502 sx_sunlock(&filelist_lock); 1503 /* 1504 * We grab an extra reference to each of the file table entries 1505 * that are not otherwise accessible and then free the rights 1506 * that are stored in messages on them. 1507 * 1508 * The bug in the orginal code is a little tricky, so I'll describe 1509 * what's wrong with it here. 1510 * 1511 * It is incorrect to simply unp_discard each entry for f_msgcount 1512 * times -- consider the case of sockets A and B that contain 1513 * references to each other. On a last close of some other socket, 1514 * we trigger a gc since the number of outstanding rights (unp_rights) 1515 * is non-zero. If during the sweep phase the gc code un_discards, 1516 * we end up doing a (full) closef on the descriptor. A closef on A 1517 * results in the following chain. Closef calls soo_close, which 1518 * calls soclose. Soclose calls first (through the switch 1519 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1520 * returns because the previous instance had set unp_gcing, and 1521 * we return all the way back to soclose, which marks the socket 1522 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1523 * to free up the rights that are queued in messages on the socket A, 1524 * i.e., the reference on B. The sorflush calls via the dom_dispose 1525 * switch unp_dispose, which unp_scans with unp_discard. This second 1526 * instance of unp_discard just calls closef on B. 1527 * 1528 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1529 * which results in another closef on A. Unfortunately, A is already 1530 * being closed, and the descriptor has already been marked with 1531 * SS_NOFDREF, and soclose panics at this point. 1532 * 1533 * Here, we first take an extra reference to each inaccessible 1534 * descriptor. Then, we call sorflush ourself, since we know 1535 * it is a Unix domain socket anyhow. After we destroy all the 1536 * rights carried in messages, we do a last closef to get rid 1537 * of our extra reference. This is the last close, and the 1538 * unp_detach etc will shut down the socket. 1539 * 1540 * 91/09/19, bsy@cs.cmu.edu 1541 */ 1542 again: 1543 nfiles_snap = nfiles + nfiles_slack; /* some slack */ 1544 extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP, 1545 M_WAITOK); 1546 sx_slock(&filelist_lock); 1547 if (nfiles_snap < nfiles) { 1548 sx_sunlock(&filelist_lock); 1549 free(extra_ref, M_TEMP); 1550 nfiles_slack += 20; 1551 goto again; 1552 } 1553 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 1554 fp != NULL; fp = nextfp) { 1555 nextfp = LIST_NEXT(fp, f_list); 1556 FILE_LOCK(fp); 1557 /* 1558 * If it's not open, skip it 1559 */ 1560 if (fp->f_count == 0) { 1561 FILE_UNLOCK(fp); 1562 continue; 1563 } 1564 /* 1565 * If all refs are from msgs, and it's not marked accessible 1566 * then it must be referenced from some unreachable cycle 1567 * of (shut-down) FDs, so include it in our 1568 * list of FDs to remove 1569 */ 1570 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { 1571 *fpp++ = fp; 1572 nunref++; 1573 fp->f_count++; 1574 } 1575 FILE_UNLOCK(fp); 1576 } 1577 sx_sunlock(&filelist_lock); 1578 /* 1579 * for each FD on our hit list, do the following two things 1580 */ 1581 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1582 struct file *tfp = *fpp; 1583 FILE_LOCK(tfp); 1584 if (tfp->f_type == DTYPE_SOCKET && 1585 tfp->f_data != NULL) { 1586 FILE_UNLOCK(tfp); 1587 sorflush(tfp->f_data); 1588 } else { 1589 FILE_UNLOCK(tfp); 1590 } 1591 } 1592 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1593 closef(*fpp, (struct thread *) NULL); 1594 free(extra_ref, M_TEMP); 1595 unp_gcing = 0; 1596 } 1597 1598 void 1599 unp_dispose(m) 1600 struct mbuf *m; 1601 { 1602 1603 if (m) 1604 unp_scan(m, unp_discard); 1605 } 1606 1607 static int 1608 unp_listen(unp, td) 1609 struct unpcb *unp; 1610 struct thread *td; 1611 { 1612 UNP_LOCK_ASSERT(); 1613 1614 /* 1615 * XXXRW: Why populate the local peer cred with our own credential? 1616 */ 1617 cru2x(td->td_ucred, &unp->unp_peercred); 1618 unp->unp_flags |= UNP_HAVEPCCACHED; 1619 return (0); 1620 } 1621 1622 static void 1623 unp_scan(m0, op) 1624 register struct mbuf *m0; 1625 void (*op)(struct file *); 1626 { 1627 struct mbuf *m; 1628 struct file **rp; 1629 struct cmsghdr *cm; 1630 void *data; 1631 int i; 1632 socklen_t clen, datalen; 1633 int qfds; 1634 1635 while (m0 != NULL) { 1636 for (m = m0; m; m = m->m_next) { 1637 if (m->m_type != MT_CONTROL) 1638 continue; 1639 1640 cm = mtod(m, struct cmsghdr *); 1641 clen = m->m_len; 1642 1643 while (cm != NULL) { 1644 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 1645 break; 1646 1647 data = CMSG_DATA(cm); 1648 datalen = (caddr_t)cm + cm->cmsg_len 1649 - (caddr_t)data; 1650 1651 if (cm->cmsg_level == SOL_SOCKET && 1652 cm->cmsg_type == SCM_RIGHTS) { 1653 qfds = datalen / sizeof (struct file *); 1654 rp = data; 1655 for (i = 0; i < qfds; i++) 1656 (*op)(*rp++); 1657 } 1658 1659 if (CMSG_SPACE(datalen) < clen) { 1660 clen -= CMSG_SPACE(datalen); 1661 cm = (struct cmsghdr *) 1662 ((caddr_t)cm + CMSG_SPACE(datalen)); 1663 } else { 1664 clen = 0; 1665 cm = NULL; 1666 } 1667 } 1668 } 1669 m0 = m0->m_act; 1670 } 1671 } 1672 1673 static void 1674 unp_mark(fp) 1675 struct file *fp; 1676 { 1677 if (fp->f_gcflag & FMARK) 1678 return; 1679 unp_defer++; 1680 fp->f_gcflag |= (FMARK|FDEFER); 1681 } 1682 1683 static void 1684 unp_discard(fp) 1685 struct file *fp; 1686 { 1687 FILE_LOCK(fp); 1688 fp->f_msgcount--; 1689 unp_rights--; 1690 FILE_UNLOCK(fp); 1691 (void) closef(fp, (struct thread *)NULL); 1692 } 1693