1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 34 * $Id: uipc_usrreq.c,v 1.36 1998/07/15 02:32:12 bde Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/domain.h> 41 #include <sys/fcntl.h> 42 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/lock.h> 46 #include <sys/mbuf.h> 47 #include <sys/namei.h> 48 #include <sys/proc.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/stat.h> 53 #include <sys/sysctl.h> 54 #include <sys/un.h> 55 #include <sys/unpcb.h> 56 #include <sys/vnode.h> 57 58 #include <vm/vm_zone.h> 59 60 struct vm_zone *unp_zone; 61 static unp_gen_t unp_gencnt; 62 static u_int unp_count; 63 64 static struct unp_head unp_shead, unp_dhead; 65 66 /* 67 * Unix communications domain. 68 * 69 * TODO: 70 * SEQPACKET, RDM 71 * rethink name space problems 72 * need a proper out-of-band 73 * lock pushdown 74 */ 75 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 76 static ino_t unp_ino; /* prototype for fake inode numbers */ 77 78 static int unp_attach __P((struct socket *)); 79 static void unp_detach __P((struct unpcb *)); 80 static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); 81 static int unp_connect __P((struct socket *,struct sockaddr *, 82 struct proc *)); 83 static void unp_disconnect __P((struct unpcb *)); 84 static void unp_shutdown __P((struct unpcb *)); 85 static void unp_drop __P((struct unpcb *, int)); 86 static void unp_gc __P((void)); 87 static void unp_scan __P((struct mbuf *, void (*)(struct file *))); 88 static void unp_mark __P((struct file *)); 89 static void unp_discard __P((struct file *)); 90 static int unp_internalize __P((struct mbuf *, struct proc *)); 91 92 static int 93 uipc_abort(struct socket *so) 94 { 95 struct unpcb *unp = sotounpcb(so); 96 97 if (unp == 0) 98 return EINVAL; 99 unp_drop(unp, ECONNABORTED); 100 return 0; 101 } 102 103 static int 104 uipc_accept(struct socket *so, struct sockaddr **nam) 105 { 106 struct unpcb *unp = sotounpcb(so); 107 108 if (unp == 0) 109 return EINVAL; 110 111 /* 112 * Pass back name of connected socket, 113 * if it was bound and we are still connected 114 * (our peer may have closed already!). 115 */ 116 if (unp->unp_conn && unp->unp_conn->unp_addr) { 117 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 118 1); 119 } else { 120 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 121 } 122 return 0; 123 } 124 125 static int 126 uipc_attach(struct socket *so, int proto, struct proc *p) 127 { 128 struct unpcb *unp = sotounpcb(so); 129 130 if (unp != 0) 131 return EISCONN; 132 return unp_attach(so); 133 } 134 135 static int 136 uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 137 { 138 struct unpcb *unp = sotounpcb(so); 139 140 if (unp == 0) 141 return EINVAL; 142 143 return unp_bind(unp, nam, p); 144 } 145 146 static int 147 uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 148 { 149 struct unpcb *unp = sotounpcb(so); 150 151 if (unp == 0) 152 return EINVAL; 153 return unp_connect(so, nam, curproc); 154 } 155 156 static int 157 uipc_connect2(struct socket *so1, struct socket *so2) 158 { 159 struct unpcb *unp = sotounpcb(so1); 160 161 if (unp == 0) 162 return EINVAL; 163 164 return unp_connect2(so1, so2); 165 } 166 167 /* control is EOPNOTSUPP */ 168 169 static int 170 uipc_detach(struct socket *so) 171 { 172 struct unpcb *unp = sotounpcb(so); 173 174 if (unp == 0) 175 return EINVAL; 176 177 unp_detach(unp); 178 return 0; 179 } 180 181 static int 182 uipc_disconnect(struct socket *so) 183 { 184 struct unpcb *unp = sotounpcb(so); 185 186 if (unp == 0) 187 return EINVAL; 188 unp_disconnect(unp); 189 return 0; 190 } 191 192 static int 193 uipc_listen(struct socket *so, struct proc *p) 194 { 195 struct unpcb *unp = sotounpcb(so); 196 197 if (unp == 0 || unp->unp_vnode == 0) 198 return EINVAL; 199 return 0; 200 } 201 202 static int 203 uipc_peeraddr(struct socket *so, struct sockaddr **nam) 204 { 205 struct unpcb *unp = sotounpcb(so); 206 207 if (unp == 0) 208 return EINVAL; 209 if (unp->unp_conn && unp->unp_conn->unp_addr) 210 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 211 1); 212 return 0; 213 } 214 215 static int 216 uipc_rcvd(struct socket *so, int flags) 217 { 218 struct unpcb *unp = sotounpcb(so); 219 struct socket *so2; 220 221 if (unp == 0) 222 return EINVAL; 223 switch (so->so_type) { 224 case SOCK_DGRAM: 225 panic("uipc_rcvd DGRAM?"); 226 /*NOTREACHED*/ 227 228 case SOCK_STREAM: 229 #define rcv (&so->so_rcv) 230 #define snd (&so2->so_snd) 231 if (unp->unp_conn == 0) 232 break; 233 so2 = unp->unp_conn->unp_socket; 234 /* 235 * Adjust backpressure on sender 236 * and wakeup any waiting to write. 237 */ 238 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 239 unp->unp_mbcnt = rcv->sb_mbcnt; 240 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 241 unp->unp_cc = rcv->sb_cc; 242 sowwakeup(so2); 243 #undef snd 244 #undef rcv 245 break; 246 247 default: 248 panic("uipc_rcvd unknown socktype"); 249 } 250 return 0; 251 } 252 253 /* pru_rcvoob is EOPNOTSUPP */ 254 255 static int 256 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 257 struct mbuf *control, struct proc *p) 258 { 259 int error = 0; 260 struct unpcb *unp = sotounpcb(so); 261 struct socket *so2; 262 263 if (unp == 0) { 264 error = EINVAL; 265 goto release; 266 } 267 if (flags & PRUS_OOB) { 268 error = EOPNOTSUPP; 269 goto release; 270 } 271 272 if (control && (error = unp_internalize(control, p))) 273 goto release; 274 275 switch (so->so_type) { 276 case SOCK_DGRAM: 277 { 278 struct sockaddr *from; 279 280 if (nam) { 281 if (unp->unp_conn) { 282 error = EISCONN; 283 break; 284 } 285 error = unp_connect(so, nam, p); 286 if (error) 287 break; 288 } else { 289 if (unp->unp_conn == 0) { 290 error = ENOTCONN; 291 break; 292 } 293 } 294 so2 = unp->unp_conn->unp_socket; 295 if (unp->unp_addr) 296 from = (struct sockaddr *)unp->unp_addr; 297 else 298 from = &sun_noname; 299 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 300 sorwakeup(so2); 301 m = 0; 302 control = 0; 303 } else 304 error = ENOBUFS; 305 if (nam) 306 unp_disconnect(unp); 307 break; 308 } 309 310 case SOCK_STREAM: 311 #define rcv (&so2->so_rcv) 312 #define snd (&so->so_snd) 313 /* Connect if not connected yet. */ 314 /* 315 * Note: A better implementation would complain 316 * if not equal to the peer's address. 317 */ 318 if ((so->so_state & SS_ISCONNECTED) == 0) { 319 if (nam) { 320 error = unp_connect(so, nam, p); 321 if (error) 322 break; /* XXX */ 323 } else { 324 error = ENOTCONN; 325 break; 326 } 327 } 328 329 if (so->so_state & SS_CANTSENDMORE) { 330 error = EPIPE; 331 break; 332 } 333 if (unp->unp_conn == 0) 334 panic("uipc_send connected but no connection?"); 335 so2 = unp->unp_conn->unp_socket; 336 /* 337 * Send to paired receive port, and then reduce 338 * send buffer hiwater marks to maintain backpressure. 339 * Wake up readers. 340 */ 341 if (control) { 342 if (sbappendcontrol(rcv, m, control)) 343 control = 0; 344 } else 345 sbappend(rcv, m); 346 snd->sb_mbmax -= 347 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 348 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 349 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 350 unp->unp_conn->unp_cc = rcv->sb_cc; 351 sorwakeup(so2); 352 m = 0; 353 #undef snd 354 #undef rcv 355 break; 356 357 default: 358 panic("uipc_send unknown socktype"); 359 } 360 361 /* 362 * SEND_EOF is equivalent to a SEND followed by 363 * a SHUTDOWN. 364 */ 365 if (flags & PRUS_EOF) { 366 socantsendmore(so); 367 unp_shutdown(unp); 368 } 369 370 release: 371 if (control) 372 m_freem(control); 373 if (m) 374 m_freem(m); 375 return error; 376 } 377 378 static int 379 uipc_sense(struct socket *so, struct stat *sb) 380 { 381 struct unpcb *unp = sotounpcb(so); 382 struct socket *so2; 383 384 if (unp == 0) 385 return EINVAL; 386 sb->st_blksize = so->so_snd.sb_hiwat; 387 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 388 so2 = unp->unp_conn->unp_socket; 389 sb->st_blksize += so2->so_rcv.sb_cc; 390 } 391 sb->st_dev = NODEV; 392 if (unp->unp_ino == 0) 393 unp->unp_ino = unp_ino++; 394 sb->st_ino = unp->unp_ino; 395 return (0); 396 } 397 398 static int 399 uipc_shutdown(struct socket *so) 400 { 401 struct unpcb *unp = sotounpcb(so); 402 403 if (unp == 0) 404 return EINVAL; 405 socantsendmore(so); 406 unp_shutdown(unp); 407 return 0; 408 } 409 410 static int 411 uipc_sockaddr(struct socket *so, struct sockaddr **nam) 412 { 413 struct unpcb *unp = sotounpcb(so); 414 415 if (unp == 0) 416 return EINVAL; 417 if (unp->unp_addr) 418 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); 419 return 0; 420 } 421 422 struct pr_usrreqs uipc_usrreqs = { 423 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 424 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 425 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 426 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 427 sosend, soreceive, sopoll 428 }; 429 430 /* 431 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 432 * for stream sockets, although the total for sender and receiver is 433 * actually only PIPSIZ. 434 * Datagram sockets really use the sendspace as the maximum datagram size, 435 * and don't really want to reserve the sendspace. Their recvspace should 436 * be large enough for at least one max-size datagram plus address. 437 */ 438 #ifndef PIPSIZ 439 #define PIPSIZ 8192 440 #endif 441 static u_long unpst_sendspace = PIPSIZ; 442 static u_long unpst_recvspace = PIPSIZ; 443 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 444 static u_long unpdg_recvspace = 4*1024; 445 446 static int unp_rights; /* file descriptors in flight */ 447 448 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 449 &unpst_sendspace, 0, ""); 450 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 451 &unpst_recvspace, 0, ""); 452 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 453 &unpdg_sendspace, 0, ""); 454 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 455 &unpdg_recvspace, 0, ""); 456 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 457 458 static int 459 unp_attach(so) 460 struct socket *so; 461 { 462 register struct unpcb *unp; 463 int error; 464 465 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 466 switch (so->so_type) { 467 468 case SOCK_STREAM: 469 error = soreserve(so, unpst_sendspace, unpst_recvspace); 470 break; 471 472 case SOCK_DGRAM: 473 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 474 break; 475 476 default: 477 panic("unp_attach"); 478 } 479 if (error) 480 return (error); 481 } 482 unp = zalloc(unp_zone); 483 if (unp == NULL) 484 return (ENOBUFS); 485 bzero(unp, sizeof *unp); 486 unp->unp_gencnt = ++unp_gencnt; 487 unp_count++; 488 LIST_INIT(&unp->unp_refs); 489 unp->unp_socket = so; 490 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 491 : &unp_shead, unp, unp_link); 492 so->so_pcb = (caddr_t)unp; 493 return (0); 494 } 495 496 static void 497 unp_detach(unp) 498 register struct unpcb *unp; 499 { 500 LIST_REMOVE(unp, unp_link); 501 unp->unp_gencnt = ++unp_gencnt; 502 --unp_count; 503 if (unp->unp_vnode) { 504 unp->unp_vnode->v_socket = 0; 505 vrele(unp->unp_vnode); 506 unp->unp_vnode = 0; 507 } 508 if (unp->unp_conn) 509 unp_disconnect(unp); 510 while (unp->unp_refs.lh_first) 511 unp_drop(unp->unp_refs.lh_first, ECONNRESET); 512 soisdisconnected(unp->unp_socket); 513 unp->unp_socket->so_pcb = 0; 514 if (unp_rights) { 515 /* 516 * Normally the receive buffer is flushed later, 517 * in sofree, but if our receive buffer holds references 518 * to descriptors that are now garbage, we will dispose 519 * of those descriptor references after the garbage collector 520 * gets them (resulting in a "panic: closef: count < 0"). 521 */ 522 sorflush(unp->unp_socket); 523 unp_gc(); 524 } 525 if (unp->unp_addr) 526 FREE(unp->unp_addr, M_SONAME); 527 zfree(unp_zone, unp); 528 } 529 530 static int 531 unp_bind(unp, nam, p) 532 struct unpcb *unp; 533 struct sockaddr *nam; 534 struct proc *p; 535 { 536 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 537 register struct vnode *vp; 538 struct vattr vattr; 539 int error, namelen; 540 struct nameidata nd; 541 char buf[SOCK_MAXADDRLEN]; 542 543 if (unp->unp_vnode != NULL) 544 return (EINVAL); 545 #define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0)) 546 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 547 if (namelen <= 0) 548 return EINVAL; 549 strncpy(buf, soun->sun_path, namelen); 550 buf[namelen] = 0; /* null-terminate the string */ 551 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, 552 buf, p); 553 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 554 error = namei(&nd); 555 if (error) 556 return (error); 557 vp = nd.ni_vp; 558 if (vp != NULL) { 559 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 560 if (nd.ni_dvp == vp) 561 vrele(nd.ni_dvp); 562 else 563 vput(nd.ni_dvp); 564 vrele(vp); 565 return (EADDRINUSE); 566 } 567 VATTR_NULL(&vattr); 568 vattr.va_type = VSOCK; 569 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 570 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 571 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 572 vput(nd.ni_dvp); 573 if (error) 574 return (error); 575 vp = nd.ni_vp; 576 vp->v_socket = unp->unp_socket; 577 unp->unp_vnode = vp; 578 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); 579 VOP_UNLOCK(vp, 0, p); 580 return (0); 581 } 582 583 static int 584 unp_connect(so, nam, p) 585 struct socket *so; 586 struct sockaddr *nam; 587 struct proc *p; 588 { 589 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 590 register struct vnode *vp; 591 register struct socket *so2, *so3; 592 struct unpcb *unp2, *unp3; 593 int error, len; 594 struct nameidata nd; 595 char buf[SOCK_MAXADDRLEN]; 596 597 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 598 if (len <= 0) 599 return EINVAL; 600 strncpy(buf, soun->sun_path, len); 601 buf[len] = 0; 602 603 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); 604 error = namei(&nd); 605 if (error) 606 return (error); 607 vp = nd.ni_vp; 608 if (vp->v_type != VSOCK) { 609 error = ENOTSOCK; 610 goto bad; 611 } 612 error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); 613 if (error) 614 goto bad; 615 so2 = vp->v_socket; 616 if (so2 == 0) { 617 error = ECONNREFUSED; 618 goto bad; 619 } 620 if (so->so_type != so2->so_type) { 621 error = EPROTOTYPE; 622 goto bad; 623 } 624 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 625 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 626 (so3 = sonewconn(so2, 0)) == 0) { 627 error = ECONNREFUSED; 628 goto bad; 629 } 630 unp2 = sotounpcb(so2); 631 unp3 = sotounpcb(so3); 632 if (unp2->unp_addr) 633 unp3->unp_addr = (struct sockaddr_un *) 634 dup_sockaddr((struct sockaddr *) 635 unp2->unp_addr, 1); 636 so2 = so3; 637 } 638 error = unp_connect2(so, so2); 639 bad: 640 vput(vp); 641 return (error); 642 } 643 644 int 645 unp_connect2(so, so2) 646 register struct socket *so; 647 register struct socket *so2; 648 { 649 register struct unpcb *unp = sotounpcb(so); 650 register struct unpcb *unp2; 651 652 if (so2->so_type != so->so_type) 653 return (EPROTOTYPE); 654 unp2 = sotounpcb(so2); 655 unp->unp_conn = unp2; 656 switch (so->so_type) { 657 658 case SOCK_DGRAM: 659 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 660 soisconnected(so); 661 break; 662 663 case SOCK_STREAM: 664 unp2->unp_conn = unp; 665 soisconnected(so); 666 soisconnected(so2); 667 break; 668 669 default: 670 panic("unp_connect2"); 671 } 672 return (0); 673 } 674 675 static void 676 unp_disconnect(unp) 677 struct unpcb *unp; 678 { 679 register struct unpcb *unp2 = unp->unp_conn; 680 681 if (unp2 == 0) 682 return; 683 unp->unp_conn = 0; 684 switch (unp->unp_socket->so_type) { 685 686 case SOCK_DGRAM: 687 LIST_REMOVE(unp, unp_reflink); 688 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 689 break; 690 691 case SOCK_STREAM: 692 soisdisconnected(unp->unp_socket); 693 unp2->unp_conn = 0; 694 soisdisconnected(unp2->unp_socket); 695 break; 696 } 697 } 698 699 #ifdef notdef 700 void 701 unp_abort(unp) 702 struct unpcb *unp; 703 { 704 705 unp_detach(unp); 706 } 707 #endif 708 709 static int 710 unp_pcblist SYSCTL_HANDLER_ARGS 711 { 712 int error, i, n; 713 struct unpcb *unp, **unp_list; 714 unp_gen_t gencnt; 715 struct xunpgen xug; 716 struct unp_head *head; 717 718 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 719 720 /* 721 * The process of preparing the PCB list is too time-consuming and 722 * resource-intensive to repeat twice on every request. 723 */ 724 if (req->oldptr == 0) { 725 n = unp_count; 726 req->oldidx = 2 * (sizeof xug) 727 + (n + n/8) * sizeof(struct xunpcb); 728 return 0; 729 } 730 731 if (req->newptr != 0) 732 return EPERM; 733 734 /* 735 * OK, now we're committed to doing something. 736 */ 737 gencnt = unp_gencnt; 738 n = unp_count; 739 740 xug.xug_len = sizeof xug; 741 xug.xug_count = n; 742 xug.xug_gen = gencnt; 743 xug.xug_sogen = so_gencnt; 744 error = SYSCTL_OUT(req, &xug, sizeof xug); 745 if (error) 746 return error; 747 748 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 749 if (unp_list == 0) 750 return ENOMEM; 751 752 for (unp = head->lh_first, i = 0; unp && i < n; 753 unp = unp->unp_link.le_next) { 754 if (unp->unp_gencnt <= gencnt) 755 unp_list[i++] = unp; 756 } 757 n = i; /* in case we lost some during malloc */ 758 759 error = 0; 760 for (i = 0; i < n; i++) { 761 unp = unp_list[i]; 762 if (unp->unp_gencnt <= gencnt) { 763 struct xunpcb xu; 764 xu.xu_len = sizeof xu; 765 xu.xu_unpp = unp; 766 /* 767 * XXX - need more locking here to protect against 768 * connect/disconnect races for SMP. 769 */ 770 if (unp->unp_addr) 771 bcopy(unp->unp_addr, &xu.xu_addr, 772 unp->unp_addr->sun_len); 773 if (unp->unp_conn && unp->unp_conn->unp_addr) 774 bcopy(unp->unp_conn->unp_addr, 775 &xu.xu_caddr, 776 unp->unp_conn->unp_addr->sun_len); 777 bcopy(unp, &xu.xu_unp, sizeof *unp); 778 sotoxsocket(unp->unp_socket, &xu.xu_socket); 779 error = SYSCTL_OUT(req, &xu, sizeof xu); 780 } 781 } 782 if (!error) { 783 /* 784 * Give the user an updated idea of our state. 785 * If the generation differs from what we told 786 * her before, she knows that something happened 787 * while we were processing this request, and it 788 * might be necessary to retry. 789 */ 790 xug.xug_gen = unp_gencnt; 791 xug.xug_sogen = so_gencnt; 792 xug.xug_count = unp_count; 793 error = SYSCTL_OUT(req, &xug, sizeof xug); 794 } 795 free(unp_list, M_TEMP); 796 return error; 797 } 798 799 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 800 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 801 "List of active local datagram sockets"); 802 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 803 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 804 "List of active local stream sockets"); 805 806 static void 807 unp_shutdown(unp) 808 struct unpcb *unp; 809 { 810 struct socket *so; 811 812 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 813 (so = unp->unp_conn->unp_socket)) 814 socantrcvmore(so); 815 } 816 817 static void 818 unp_drop(unp, errno) 819 struct unpcb *unp; 820 int errno; 821 { 822 struct socket *so = unp->unp_socket; 823 824 so->so_error = errno; 825 unp_disconnect(unp); 826 if (so->so_head) { 827 LIST_REMOVE(unp, unp_link); 828 unp->unp_gencnt = ++unp_gencnt; 829 unp_count--; 830 so->so_pcb = (caddr_t) 0; 831 if (unp->unp_addr) 832 FREE(unp->unp_addr, M_SONAME); 833 zfree(unp_zone, unp); 834 sofree(so); 835 } 836 } 837 838 #ifdef notdef 839 void 840 unp_drain() 841 { 842 843 } 844 #endif 845 846 int 847 unp_externalize(rights) 848 struct mbuf *rights; 849 { 850 struct proc *p = curproc; /* XXX */ 851 register int i; 852 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 853 register struct file **rp = (struct file **)(cm + 1); 854 register struct file *fp; 855 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int); 856 int f; 857 858 /* 859 * if the new FD's will not fit, then we free them all 860 */ 861 if (!fdavail(p, newfds)) { 862 for (i = 0; i < newfds; i++) { 863 fp = *rp; 864 unp_discard(fp); 865 *rp++ = 0; 866 } 867 return (EMSGSIZE); 868 } 869 /* 870 * now change each pointer to an fd in the global table to 871 * an integer that is the index to the local fd table entry 872 * that we set up to point to the global one we are transferring. 873 * XXX this assumes a pointer and int are the same size...! 874 */ 875 for (i = 0; i < newfds; i++) { 876 if (fdalloc(p, 0, &f)) 877 panic("unp_externalize"); 878 fp = *rp; 879 p->p_fd->fd_ofiles[f] = fp; 880 fp->f_msgcount--; 881 unp_rights--; 882 *(int *)rp++ = f; 883 } 884 return (0); 885 } 886 887 void 888 unp_init(void) 889 { 890 unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0); 891 if (unp_zone == 0) 892 panic("unp_init"); 893 LIST_INIT(&unp_dhead); 894 LIST_INIT(&unp_shead); 895 } 896 897 #ifndef MIN 898 #define MIN(a,b) (((a)<(b))?(a):(b)) 899 #endif 900 901 static int 902 unp_internalize(control, p) 903 struct mbuf *control; 904 struct proc *p; 905 { 906 struct filedesc *fdp = p->p_fd; 907 register struct cmsghdr *cm = mtod(control, struct cmsghdr *); 908 register struct file **rp; 909 register struct file *fp; 910 register int i, fd; 911 register struct cmsgcred *cmcred; 912 int oldfds; 913 914 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 915 cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) 916 return (EINVAL); 917 918 /* 919 * Fill in credential information. 920 */ 921 if (cm->cmsg_type == SCM_CREDS) { 922 cmcred = (struct cmsgcred *)(cm + 1); 923 cmcred->cmcred_pid = p->p_pid; 924 cmcred->cmcred_uid = p->p_cred->p_ruid; 925 cmcred->cmcred_gid = p->p_cred->p_rgid; 926 cmcred->cmcred_euid = p->p_ucred->cr_uid; 927 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 928 CMGROUP_MAX); 929 for (i = 0; i < cmcred->cmcred_ngroups; i++) 930 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 931 return(0); 932 } 933 934 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 935 /* 936 * check that all the FDs passed in refer to legal OPEN files 937 * If not, reject the entire operation. 938 */ 939 rp = (struct file **)(cm + 1); 940 for (i = 0; i < oldfds; i++) { 941 fd = *(int *)rp++; 942 if ((unsigned)fd >= fdp->fd_nfiles || 943 fdp->fd_ofiles[fd] == NULL) 944 return (EBADF); 945 } 946 /* 947 * Now replace the integer FDs with pointers to 948 * the associated global file table entry.. 949 * XXX this assumes a pointer and an int are the same size! 950 */ 951 rp = (struct file **)(cm + 1); 952 for (i = 0; i < oldfds; i++) { 953 fp = fdp->fd_ofiles[*(int *)rp]; 954 *rp++ = fp; 955 fp->f_count++; 956 fp->f_msgcount++; 957 unp_rights++; 958 } 959 return (0); 960 } 961 962 static int unp_defer, unp_gcing; 963 964 static void 965 unp_gc() 966 { 967 register struct file *fp, *nextfp; 968 register struct socket *so; 969 struct file **extra_ref, **fpp; 970 int nunref, i; 971 972 if (unp_gcing) 973 return; 974 unp_gcing = 1; 975 unp_defer = 0; 976 /* 977 * before going through all this, set all FDs to 978 * be NOT defered and NOT externally accessible 979 */ 980 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) 981 fp->f_flag &= ~(FMARK|FDEFER); 982 do { 983 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { 984 /* 985 * If the file is not open, skip it 986 */ 987 if (fp->f_count == 0) 988 continue; 989 /* 990 * If we already marked it as 'defer' in a 991 * previous pass, then try process it this time 992 * and un-mark it 993 */ 994 if (fp->f_flag & FDEFER) { 995 fp->f_flag &= ~FDEFER; 996 unp_defer--; 997 } else { 998 /* 999 * if it's not defered, then check if it's 1000 * already marked.. if so skip it 1001 */ 1002 if (fp->f_flag & FMARK) 1003 continue; 1004 /* 1005 * If all references are from messages 1006 * in transit, then skip it. it's not 1007 * externally accessible. 1008 */ 1009 if (fp->f_count == fp->f_msgcount) 1010 continue; 1011 /* 1012 * If it got this far then it must be 1013 * externally accessible. 1014 */ 1015 fp->f_flag |= FMARK; 1016 } 1017 /* 1018 * either it was defered, or it is externally 1019 * accessible and not already marked so. 1020 * Now check if it is possibly one of OUR sockets. 1021 */ 1022 if (fp->f_type != DTYPE_SOCKET || 1023 (so = (struct socket *)fp->f_data) == 0) 1024 continue; 1025 if (so->so_proto->pr_domain != &localdomain || 1026 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1027 continue; 1028 #ifdef notdef 1029 if (so->so_rcv.sb_flags & SB_LOCK) { 1030 /* 1031 * This is problematical; it's not clear 1032 * we need to wait for the sockbuf to be 1033 * unlocked (on a uniprocessor, at least), 1034 * and it's also not clear what to do 1035 * if sbwait returns an error due to receipt 1036 * of a signal. If sbwait does return 1037 * an error, we'll go into an infinite 1038 * loop. Delete all of this for now. 1039 */ 1040 (void) sbwait(&so->so_rcv); 1041 goto restart; 1042 } 1043 #endif 1044 /* 1045 * So, Ok, it's one of our sockets and it IS externally 1046 * accessible (or was defered). Now we look 1047 * to see if we hold any file descriptors in its 1048 * message buffers. Follow those links and mark them 1049 * as accessible too. 1050 */ 1051 unp_scan(so->so_rcv.sb_mb, unp_mark); 1052 } 1053 } while (unp_defer); 1054 /* 1055 * We grab an extra reference to each of the file table entries 1056 * that are not otherwise accessible and then free the rights 1057 * that are stored in messages on them. 1058 * 1059 * The bug in the orginal code is a little tricky, so I'll describe 1060 * what's wrong with it here. 1061 * 1062 * It is incorrect to simply unp_discard each entry for f_msgcount 1063 * times -- consider the case of sockets A and B that contain 1064 * references to each other. On a last close of some other socket, 1065 * we trigger a gc since the number of outstanding rights (unp_rights) 1066 * is non-zero. If during the sweep phase the gc code un_discards, 1067 * we end up doing a (full) closef on the descriptor. A closef on A 1068 * results in the following chain. Closef calls soo_close, which 1069 * calls soclose. Soclose calls first (through the switch 1070 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1071 * returns because the previous instance had set unp_gcing, and 1072 * we return all the way back to soclose, which marks the socket 1073 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1074 * to free up the rights that are queued in messages on the socket A, 1075 * i.e., the reference on B. The sorflush calls via the dom_dispose 1076 * switch unp_dispose, which unp_scans with unp_discard. This second 1077 * instance of unp_discard just calls closef on B. 1078 * 1079 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1080 * which results in another closef on A. Unfortunately, A is already 1081 * being closed, and the descriptor has already been marked with 1082 * SS_NOFDREF, and soclose panics at this point. 1083 * 1084 * Here, we first take an extra reference to each inaccessible 1085 * descriptor. Then, we call sorflush ourself, since we know 1086 * it is a Unix domain socket anyhow. After we destroy all the 1087 * rights carried in messages, we do a last closef to get rid 1088 * of our extra reference. This is the last close, and the 1089 * unp_detach etc will shut down the socket. 1090 * 1091 * 91/09/19, bsy@cs.cmu.edu 1092 */ 1093 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 1094 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; 1095 fp = nextfp) { 1096 nextfp = fp->f_list.le_next; 1097 /* 1098 * If it's not open, skip it 1099 */ 1100 if (fp->f_count == 0) 1101 continue; 1102 /* 1103 * If all refs are from msgs, and it's not marked accessible 1104 * then it must be referenced from some unreachable cycle 1105 * of (shut-down) FDs, so include it in our 1106 * list of FDs to remove 1107 */ 1108 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1109 *fpp++ = fp; 1110 nunref++; 1111 fp->f_count++; 1112 } 1113 } 1114 /* 1115 * for each FD on our hit list, do the following two things 1116 */ 1117 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1118 sorflush((struct socket *)(*fpp)->f_data); 1119 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1120 closef(*fpp, (struct proc *) NULL); 1121 free((caddr_t)extra_ref, M_FILE); 1122 unp_gcing = 0; 1123 } 1124 1125 void 1126 unp_dispose(m) 1127 struct mbuf *m; 1128 { 1129 1130 if (m) 1131 unp_scan(m, unp_discard); 1132 } 1133 1134 static void 1135 unp_scan(m0, op) 1136 register struct mbuf *m0; 1137 void (*op) __P((struct file *)); 1138 { 1139 register struct mbuf *m; 1140 register struct file **rp; 1141 register struct cmsghdr *cm; 1142 register int i; 1143 int qfds; 1144 1145 while (m0) { 1146 for (m = m0; m; m = m->m_next) 1147 if (m->m_type == MT_CONTROL && 1148 m->m_len >= sizeof(*cm)) { 1149 cm = mtod(m, struct cmsghdr *); 1150 if (cm->cmsg_level != SOL_SOCKET || 1151 cm->cmsg_type != SCM_RIGHTS) 1152 continue; 1153 qfds = (cm->cmsg_len - sizeof *cm) 1154 / sizeof (struct file *); 1155 rp = (struct file **)(cm + 1); 1156 for (i = 0; i < qfds; i++) 1157 (*op)(*rp++); 1158 break; /* XXX, but saves time */ 1159 } 1160 m0 = m0->m_act; 1161 } 1162 } 1163 1164 static void 1165 unp_mark(fp) 1166 struct file *fp; 1167 { 1168 1169 if (fp->f_flag & FMARK) 1170 return; 1171 unp_defer++; 1172 fp->f_flag |= (FMARK|FDEFER); 1173 } 1174 1175 static void 1176 unp_discard(fp) 1177 struct file *fp; 1178 { 1179 1180 fp->f_msgcount--; 1181 unp_rights--; 1182 (void) closef(fp, (struct proc *)NULL); 1183 } 1184