1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. 4 * Copyright 2004-2006 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_mac.h" 38 39 #include <sys/param.h> 40 #include <sys/domain.h> 41 #include <sys/fcntl.h> 42 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 43 #include <sys/eventhandler.h> 44 #include <sys/file.h> 45 #include <sys/filedesc.h> 46 #include <sys/jail.h> 47 #include <sys/kernel.h> 48 #include <sys/lock.h> 49 #include <sys/mac.h> 50 #include <sys/mbuf.h> 51 #include <sys/mount.h> 52 #include <sys/mutex.h> 53 #include <sys/namei.h> 54 #include <sys/proc.h> 55 #include <sys/protosw.h> 56 #include <sys/resourcevar.h> 57 #include <sys/socket.h> 58 #include <sys/socketvar.h> 59 #include <sys/signalvar.h> 60 #include <sys/stat.h> 61 #include <sys/sx.h> 62 #include <sys/sysctl.h> 63 #include <sys/systm.h> 64 #include <sys/taskqueue.h> 65 #include <sys/un.h> 66 #include <sys/unpcb.h> 67 #include <sys/vnode.h> 68 69 #include <vm/uma.h> 70 71 static uma_zone_t unp_zone; 72 static unp_gen_t unp_gencnt; 73 static u_int unp_count; 74 75 static struct unp_head unp_shead, unp_dhead; 76 77 /* 78 * Unix communications domain. 79 * 80 * TODO: 81 * SEQPACKET, RDM 82 * rethink name space problems 83 * need a proper out-of-band 84 * lock pushdown 85 */ 86 static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 87 static ino_t unp_ino; /* prototype for fake inode numbers */ 88 struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); 89 90 /* 91 * Currently, UNIX domain sockets are protected by a single subsystem lock, 92 * which covers global data structures and variables, the contents of each 93 * per-socket unpcb structure, and the so_pcb field in sockets attached to 94 * the UNIX domain. This provides for a moderate degree of paralellism, as 95 * receive operations on UNIX domain sockets do not need to acquire the 96 * subsystem lock. Finer grained locking to permit send() without acquiring 97 * a global lock would be a logical next step. 98 * 99 * The UNIX domain socket lock preceds all socket layer locks, including the 100 * socket lock and socket buffer lock, permitting UNIX domain socket code to 101 * call into socket support routines without releasing its locks. 102 * 103 * Some caution is required in areas where the UNIX domain socket code enters 104 * VFS in order to create or find rendezvous points. This results in 105 * dropping of the UNIX domain socket subsystem lock, acquisition of the 106 * Giant lock, and potential sleeping. This increases the chances of races, 107 * and exposes weaknesses in the socket->protocol API by offering poor 108 * failure modes. 109 */ 110 static struct mtx unp_mtx; 111 #define UNP_LOCK_INIT() \ 112 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) 113 #define UNP_LOCK() mtx_lock(&unp_mtx) 114 #define UNP_UNLOCK() mtx_unlock(&unp_mtx) 115 #define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) 116 #define UNP_UNLOCK_ASSERT() mtx_assert(&unp_mtx, MA_NOTOWNED) 117 118 /* 119 * Garbage collection of cyclic file descriptor/socket references occurs 120 * asynchronously in a taskqueue context in order to avoid recursion and 121 * reentrance in the UNIX domain socket, file descriptor, and socket layer 122 * code. See unp_gc() for a full description. 123 */ 124 static struct task unp_gc_task; 125 126 static int unp_attach(struct socket *); 127 static void unp_detach(struct unpcb *); 128 static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); 129 static int unp_connect(struct socket *,struct sockaddr *, struct thread *); 130 static int unp_connect2(struct socket *so, struct socket *so2, int); 131 static void unp_disconnect(struct unpcb *); 132 static void unp_shutdown(struct unpcb *); 133 static void unp_drop(struct unpcb *, int); 134 static void unp_gc(__unused void *, int); 135 static void unp_scan(struct mbuf *, void (*)(struct file *)); 136 static void unp_mark(struct file *); 137 static void unp_discard(struct file *); 138 static void unp_freerights(struct file **, int); 139 static int unp_internalize(struct mbuf **, struct thread *); 140 static int unp_listen(struct socket *, struct unpcb *, int, 141 struct thread *); 142 143 static void 144 uipc_abort(struct socket *so) 145 { 146 struct unpcb *unp; 147 148 unp = sotounpcb(so); 149 KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); 150 UNP_LOCK(); 151 unp_drop(unp, ECONNABORTED); 152 unp_detach(unp); 153 UNP_UNLOCK_ASSERT(); 154 } 155 156 static int 157 uipc_accept(struct socket *so, struct sockaddr **nam) 158 { 159 struct unpcb *unp; 160 const struct sockaddr *sa; 161 162 /* 163 * Pass back name of connected socket, 164 * if it was bound and we are still connected 165 * (our peer may have closed already!). 166 */ 167 unp = sotounpcb(so); 168 KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); 169 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 170 UNP_LOCK(); 171 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) 172 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 173 else 174 sa = &sun_noname; 175 bcopy(sa, *nam, sa->sa_len); 176 UNP_UNLOCK(); 177 return (0); 178 } 179 180 static int 181 uipc_attach(struct socket *so, int proto, struct thread *td) 182 { 183 184 return (unp_attach(so)); 185 } 186 187 static int 188 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 189 { 190 struct unpcb *unp; 191 int error; 192 193 unp = sotounpcb(so); 194 KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); 195 UNP_LOCK(); 196 error = unp_bind(unp, nam, td); 197 UNP_UNLOCK(); 198 return (error); 199 } 200 201 static int 202 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 203 { 204 int error; 205 206 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 207 UNP_LOCK(); 208 error = unp_connect(so, nam, td); 209 UNP_UNLOCK(); 210 return (error); 211 } 212 213 int 214 uipc_connect2(struct socket *so1, struct socket *so2) 215 { 216 struct unpcb *unp; 217 int error; 218 219 unp = sotounpcb(so1); 220 KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); 221 UNP_LOCK(); 222 error = unp_connect2(so1, so2, PRU_CONNECT2); 223 UNP_UNLOCK(); 224 return (error); 225 } 226 227 /* control is EOPNOTSUPP */ 228 229 static void 230 uipc_detach(struct socket *so) 231 { 232 struct unpcb *unp; 233 234 unp = sotounpcb(so); 235 KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); 236 UNP_LOCK(); 237 unp_detach(unp); 238 UNP_UNLOCK_ASSERT(); 239 } 240 241 static int 242 uipc_disconnect(struct socket *so) 243 { 244 struct unpcb *unp; 245 246 unp = sotounpcb(so); 247 KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); 248 UNP_LOCK(); 249 unp_disconnect(unp); 250 UNP_UNLOCK(); 251 return (0); 252 } 253 254 static int 255 uipc_listen(struct socket *so, int backlog, struct thread *td) 256 { 257 struct unpcb *unp; 258 int error; 259 260 unp = sotounpcb(so); 261 KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); 262 UNP_LOCK(); 263 if (unp->unp_vnode == NULL) { 264 UNP_UNLOCK(); 265 return (EINVAL); 266 } 267 error = unp_listen(so, unp, backlog, td); 268 UNP_UNLOCK(); 269 return (error); 270 } 271 272 static int 273 uipc_peeraddr(struct socket *so, struct sockaddr **nam) 274 { 275 struct unpcb *unp; 276 const struct sockaddr *sa; 277 278 unp = sotounpcb(so); 279 KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); 280 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 281 UNP_LOCK(); 282 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) 283 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 284 else { 285 /* 286 * XXX: It seems that this test always fails even when 287 * connection is established. So, this else clause is 288 * added as workaround to return PF_LOCAL sockaddr. 289 */ 290 sa = &sun_noname; 291 } 292 bcopy(sa, *nam, sa->sa_len); 293 UNP_UNLOCK(); 294 return (0); 295 } 296 297 static int 298 uipc_rcvd(struct socket *so, int flags) 299 { 300 struct unpcb *unp; 301 struct socket *so2; 302 u_int mbcnt, sbcc; 303 u_long newhiwat; 304 305 unp = sotounpcb(so); 306 KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); 307 switch (so->so_type) { 308 case SOCK_DGRAM: 309 panic("uipc_rcvd DGRAM?"); 310 /*NOTREACHED*/ 311 312 case SOCK_STREAM: 313 /* 314 * Adjust backpressure on sender 315 * and wakeup any waiting to write. 316 */ 317 SOCKBUF_LOCK(&so->so_rcv); 318 mbcnt = so->so_rcv.sb_mbcnt; 319 sbcc = so->so_rcv.sb_cc; 320 SOCKBUF_UNLOCK(&so->so_rcv); 321 UNP_LOCK(); 322 if (unp->unp_conn == NULL) { 323 UNP_UNLOCK(); 324 break; 325 } 326 so2 = unp->unp_conn->unp_socket; 327 SOCKBUF_LOCK(&so2->so_snd); 328 so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt; 329 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc; 330 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 331 newhiwat, RLIM_INFINITY); 332 sowwakeup_locked(so2); 333 unp->unp_mbcnt = mbcnt; 334 unp->unp_cc = sbcc; 335 UNP_UNLOCK(); 336 break; 337 338 default: 339 panic("uipc_rcvd unknown socktype"); 340 } 341 return (0); 342 } 343 344 /* pru_rcvoob is EOPNOTSUPP */ 345 346 static int 347 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 348 struct mbuf *control, struct thread *td) 349 { 350 int error = 0; 351 struct unpcb *unp; 352 struct socket *so2; 353 u_int mbcnt, sbcc; 354 u_long newhiwat; 355 356 unp = sotounpcb(so); 357 KASSERT(unp != NULL, ("uipc_send: unp == NULL")); 358 if (flags & PRUS_OOB) { 359 error = EOPNOTSUPP; 360 goto release; 361 } 362 363 if (control != NULL && (error = unp_internalize(&control, td))) 364 goto release; 365 366 UNP_LOCK(); 367 switch (so->so_type) { 368 case SOCK_DGRAM: 369 { 370 const struct sockaddr *from; 371 372 if (nam != NULL) { 373 if (unp->unp_conn != NULL) { 374 error = EISCONN; 375 break; 376 } 377 error = unp_connect(so, nam, td); 378 if (error) 379 break; 380 } else { 381 if (unp->unp_conn == NULL) { 382 error = ENOTCONN; 383 break; 384 } 385 } 386 so2 = unp->unp_conn->unp_socket; 387 if (unp->unp_addr != NULL) 388 from = (struct sockaddr *)unp->unp_addr; 389 else 390 from = &sun_noname; 391 if (unp->unp_conn->unp_flags & UNP_WANTCRED) 392 control = unp_addsockcred(td, control); 393 SOCKBUF_LOCK(&so2->so_rcv); 394 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 395 sorwakeup_locked(so2); 396 m = NULL; 397 control = NULL; 398 } else { 399 SOCKBUF_UNLOCK(&so2->so_rcv); 400 error = ENOBUFS; 401 } 402 if (nam != NULL) 403 unp_disconnect(unp); 404 break; 405 } 406 407 case SOCK_STREAM: 408 /* Connect if not connected yet. */ 409 /* 410 * Note: A better implementation would complain 411 * if not equal to the peer's address. 412 */ 413 if ((so->so_state & SS_ISCONNECTED) == 0) { 414 if (nam != NULL) { 415 error = unp_connect(so, nam, td); 416 if (error) 417 break; /* XXX */ 418 } else { 419 error = ENOTCONN; 420 break; 421 } 422 } 423 424 /* Lockless read. */ 425 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 426 error = EPIPE; 427 break; 428 } 429 if (unp->unp_conn == NULL) 430 panic("uipc_send connected but no connection?"); 431 so2 = unp->unp_conn->unp_socket; 432 SOCKBUF_LOCK(&so2->so_rcv); 433 if (unp->unp_conn->unp_flags & UNP_WANTCRED) { 434 /* 435 * Credentials are passed only once on 436 * SOCK_STREAM. 437 */ 438 unp->unp_conn->unp_flags &= ~UNP_WANTCRED; 439 control = unp_addsockcred(td, control); 440 } 441 /* 442 * Send to paired receive port, and then reduce 443 * send buffer hiwater marks to maintain backpressure. 444 * Wake up readers. 445 */ 446 if (control != NULL) { 447 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 448 control = NULL; 449 } else { 450 sbappend_locked(&so2->so_rcv, m); 451 } 452 mbcnt = so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 453 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 454 sbcc = so2->so_rcv.sb_cc; 455 sorwakeup_locked(so2); 456 457 SOCKBUF_LOCK(&so->so_snd); 458 newhiwat = so->so_snd.sb_hiwat - 459 (sbcc - unp->unp_conn->unp_cc); 460 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 461 newhiwat, RLIM_INFINITY); 462 so->so_snd.sb_mbmax -= mbcnt; 463 SOCKBUF_UNLOCK(&so->so_snd); 464 465 unp->unp_conn->unp_cc = sbcc; 466 m = NULL; 467 break; 468 469 default: 470 panic("uipc_send unknown socktype"); 471 } 472 473 /* 474 * SEND_EOF is equivalent to a SEND followed by 475 * a SHUTDOWN. 476 */ 477 if (flags & PRUS_EOF) { 478 socantsendmore(so); 479 unp_shutdown(unp); 480 } 481 UNP_UNLOCK(); 482 483 if (control != NULL && error != 0) 484 unp_dispose(control); 485 486 release: 487 if (control != NULL) 488 m_freem(control); 489 if (m != NULL) 490 m_freem(m); 491 return (error); 492 } 493 494 static int 495 uipc_sense(struct socket *so, struct stat *sb) 496 { 497 struct unpcb *unp; 498 struct socket *so2; 499 500 unp = sotounpcb(so); 501 KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); 502 UNP_LOCK(); 503 sb->st_blksize = so->so_snd.sb_hiwat; 504 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 505 so2 = unp->unp_conn->unp_socket; 506 sb->st_blksize += so2->so_rcv.sb_cc; 507 } 508 sb->st_dev = NODEV; 509 if (unp->unp_ino == 0) 510 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 511 sb->st_ino = unp->unp_ino; 512 UNP_UNLOCK(); 513 return (0); 514 } 515 516 static int 517 uipc_shutdown(struct socket *so) 518 { 519 struct unpcb *unp; 520 521 unp = sotounpcb(so); 522 KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); 523 UNP_LOCK(); 524 socantsendmore(so); 525 unp_shutdown(unp); 526 UNP_UNLOCK(); 527 return (0); 528 } 529 530 static int 531 uipc_sockaddr(struct socket *so, struct sockaddr **nam) 532 { 533 struct unpcb *unp; 534 const struct sockaddr *sa; 535 536 unp = sotounpcb(so); 537 KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); 538 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 539 UNP_LOCK(); 540 if (unp->unp_addr != NULL) 541 sa = (struct sockaddr *) unp->unp_addr; 542 else 543 sa = &sun_noname; 544 bcopy(sa, *nam, sa->sa_len); 545 UNP_UNLOCK(); 546 return (0); 547 } 548 549 struct pr_usrreqs uipc_usrreqs = { 550 .pru_abort = uipc_abort, 551 .pru_accept = uipc_accept, 552 .pru_attach = uipc_attach, 553 .pru_bind = uipc_bind, 554 .pru_connect = uipc_connect, 555 .pru_connect2 = uipc_connect2, 556 .pru_detach = uipc_detach, 557 .pru_disconnect = uipc_disconnect, 558 .pru_listen = uipc_listen, 559 .pru_peeraddr = uipc_peeraddr, 560 .pru_rcvd = uipc_rcvd, 561 .pru_send = uipc_send, 562 .pru_sense = uipc_sense, 563 .pru_shutdown = uipc_shutdown, 564 .pru_sockaddr = uipc_sockaddr, 565 .pru_sosend = sosend, 566 .pru_soreceive = soreceive, 567 .pru_sopoll = sopoll, 568 }; 569 570 int 571 uipc_ctloutput(struct socket *so, struct sockopt *sopt) 572 { 573 struct unpcb *unp; 574 struct xucred xu; 575 int error, optval; 576 577 if (sopt->sopt_level != 0) 578 return (EINVAL); 579 580 unp = sotounpcb(so); 581 KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); 582 UNP_LOCK(); 583 error = 0; 584 switch (sopt->sopt_dir) { 585 case SOPT_GET: 586 switch (sopt->sopt_name) { 587 case LOCAL_PEERCRED: 588 if (unp->unp_flags & UNP_HAVEPC) 589 xu = unp->unp_peercred; 590 else { 591 if (so->so_type == SOCK_STREAM) 592 error = ENOTCONN; 593 else 594 error = EINVAL; 595 } 596 if (error == 0) 597 error = sooptcopyout(sopt, &xu, sizeof(xu)); 598 break; 599 case LOCAL_CREDS: 600 optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; 601 error = sooptcopyout(sopt, &optval, sizeof(optval)); 602 break; 603 case LOCAL_CONNWAIT: 604 optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; 605 error = sooptcopyout(sopt, &optval, sizeof(optval)); 606 break; 607 default: 608 error = EOPNOTSUPP; 609 break; 610 } 611 break; 612 case SOPT_SET: 613 switch (sopt->sopt_name) { 614 case LOCAL_CREDS: 615 case LOCAL_CONNWAIT: 616 error = sooptcopyin(sopt, &optval, sizeof(optval), 617 sizeof(optval)); 618 if (error) 619 break; 620 621 #define OPTSET(bit) \ 622 if (optval) \ 623 unp->unp_flags |= bit; \ 624 else \ 625 unp->unp_flags &= ~bit; 626 627 switch (sopt->sopt_name) { 628 case LOCAL_CREDS: 629 OPTSET(UNP_WANTCRED); 630 break; 631 case LOCAL_CONNWAIT: 632 OPTSET(UNP_CONNWAIT); 633 break; 634 default: 635 break; 636 } 637 break; 638 #undef OPTSET 639 default: 640 error = ENOPROTOOPT; 641 break; 642 } 643 break; 644 default: 645 error = EOPNOTSUPP; 646 break; 647 } 648 UNP_UNLOCK(); 649 return (error); 650 } 651 652 /* 653 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 654 * for stream sockets, although the total for sender and receiver is 655 * actually only PIPSIZ. 656 * Datagram sockets really use the sendspace as the maximum datagram size, 657 * and don't really want to reserve the sendspace. Their recvspace should 658 * be large enough for at least one max-size datagram plus address. 659 */ 660 #ifndef PIPSIZ 661 #define PIPSIZ 8192 662 #endif 663 static u_long unpst_sendspace = PIPSIZ; 664 static u_long unpst_recvspace = PIPSIZ; 665 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 666 static u_long unpdg_recvspace = 4*1024; 667 668 static int unp_rights; /* file descriptors in flight */ 669 670 SYSCTL_DECL(_net_local_stream); 671 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 672 &unpst_sendspace, 0, ""); 673 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 674 &unpst_recvspace, 0, ""); 675 SYSCTL_DECL(_net_local_dgram); 676 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 677 &unpdg_sendspace, 0, ""); 678 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 679 &unpdg_recvspace, 0, ""); 680 SYSCTL_DECL(_net_local); 681 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 682 683 static int 684 unp_attach(struct socket *so) 685 { 686 struct unpcb *unp; 687 int error; 688 689 KASSERT(so->so_pcb == NULL, ("unp_attach: so_pcb != NULL")); 690 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 691 switch (so->so_type) { 692 693 case SOCK_STREAM: 694 error = soreserve(so, unpst_sendspace, unpst_recvspace); 695 break; 696 697 case SOCK_DGRAM: 698 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 699 break; 700 701 default: 702 panic("unp_attach"); 703 } 704 if (error) 705 return (error); 706 } 707 unp = uma_zalloc(unp_zone, M_WAITOK | M_ZERO); 708 if (unp == NULL) 709 return (ENOBUFS); 710 LIST_INIT(&unp->unp_refs); 711 unp->unp_socket = so; 712 so->so_pcb = unp; 713 714 UNP_LOCK(); 715 unp->unp_gencnt = ++unp_gencnt; 716 unp_count++; 717 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 718 : &unp_shead, unp, unp_link); 719 UNP_UNLOCK(); 720 721 return (0); 722 } 723 724 static void 725 unp_detach(struct unpcb *unp) 726 { 727 struct vnode *vp; 728 int local_unp_rights; 729 730 UNP_LOCK_ASSERT(); 731 732 LIST_REMOVE(unp, unp_link); 733 unp->unp_gencnt = ++unp_gencnt; 734 --unp_count; 735 if ((vp = unp->unp_vnode) != NULL) { 736 /* 737 * XXXRW: should v_socket be frobbed only while holding 738 * Giant? 739 */ 740 unp->unp_vnode->v_socket = NULL; 741 unp->unp_vnode = NULL; 742 } 743 if (unp->unp_conn != NULL) 744 unp_disconnect(unp); 745 while (!LIST_EMPTY(&unp->unp_refs)) { 746 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 747 unp_drop(ref, ECONNRESET); 748 } 749 soisdisconnected(unp->unp_socket); 750 unp->unp_socket->so_pcb = NULL; 751 local_unp_rights = unp_rights; 752 UNP_UNLOCK(); 753 if (unp->unp_addr != NULL) 754 FREE(unp->unp_addr, M_SONAME); 755 uma_zfree(unp_zone, unp); 756 if (vp) { 757 int vfslocked; 758 759 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 760 vrele(vp); 761 VFS_UNLOCK_GIANT(vfslocked); 762 } 763 if (local_unp_rights) 764 taskqueue_enqueue(taskqueue_thread, &unp_gc_task); 765 } 766 767 static int 768 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 769 { 770 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 771 struct vnode *vp; 772 struct mount *mp; 773 struct vattr vattr; 774 int error, namelen; 775 struct nameidata nd; 776 char *buf; 777 778 UNP_LOCK_ASSERT(); 779 780 /* 781 * XXXRW: This test-and-set of unp_vnode is non-atomic; the 782 * unlocked read here is fine, but the value of unp_vnode needs 783 * to be tested again after we do all the lookups to see if the 784 * pcb is still unbound? 785 */ 786 if (unp->unp_vnode != NULL) 787 return (EINVAL); 788 789 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 790 if (namelen <= 0) 791 return (EINVAL); 792 793 UNP_UNLOCK(); 794 795 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 796 strlcpy(buf, soun->sun_path, namelen + 1); 797 798 mtx_lock(&Giant); 799 restart: 800 mtx_assert(&Giant, MA_OWNED); 801 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, 802 buf, td); 803 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 804 error = namei(&nd); 805 if (error) 806 goto done; 807 vp = nd.ni_vp; 808 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 809 NDFREE(&nd, NDF_ONLY_PNBUF); 810 if (nd.ni_dvp == vp) 811 vrele(nd.ni_dvp); 812 else 813 vput(nd.ni_dvp); 814 if (vp != NULL) { 815 vrele(vp); 816 error = EADDRINUSE; 817 goto done; 818 } 819 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 820 if (error) 821 goto done; 822 goto restart; 823 } 824 VATTR_NULL(&vattr); 825 vattr.va_type = VSOCK; 826 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 827 #ifdef MAC 828 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 829 &vattr); 830 #endif 831 if (error == 0) { 832 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 833 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 834 } 835 NDFREE(&nd, NDF_ONLY_PNBUF); 836 vput(nd.ni_dvp); 837 if (error) { 838 vn_finished_write(mp); 839 goto done; 840 } 841 vp = nd.ni_vp; 842 ASSERT_VOP_LOCKED(vp, "unp_bind"); 843 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 844 UNP_LOCK(); 845 vp->v_socket = unp->unp_socket; 846 unp->unp_vnode = vp; 847 unp->unp_addr = soun; 848 UNP_UNLOCK(); 849 VOP_UNLOCK(vp, 0, td); 850 vn_finished_write(mp); 851 done: 852 mtx_unlock(&Giant); 853 free(buf, M_TEMP); 854 UNP_LOCK(); 855 return (error); 856 } 857 858 static int 859 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 860 { 861 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 862 struct vnode *vp; 863 struct socket *so2, *so3; 864 struct unpcb *unp, *unp2, *unp3; 865 int error, len; 866 struct nameidata nd; 867 char buf[SOCK_MAXADDRLEN]; 868 struct sockaddr *sa; 869 870 UNP_LOCK_ASSERT(); 871 872 unp = sotounpcb(so); 873 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 874 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 875 if (len <= 0) 876 return (EINVAL); 877 strlcpy(buf, soun->sun_path, len + 1); 878 UNP_UNLOCK(); 879 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 880 mtx_lock(&Giant); 881 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 882 error = namei(&nd); 883 if (error) 884 vp = NULL; 885 else 886 vp = nd.ni_vp; 887 ASSERT_VOP_LOCKED(vp, "unp_connect"); 888 NDFREE(&nd, NDF_ONLY_PNBUF); 889 if (error) 890 goto bad; 891 892 if (vp->v_type != VSOCK) { 893 error = ENOTSOCK; 894 goto bad; 895 } 896 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 897 if (error) 898 goto bad; 899 mtx_unlock(&Giant); 900 UNP_LOCK(); 901 unp = sotounpcb(so); 902 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 903 so2 = vp->v_socket; 904 if (so2 == NULL) { 905 error = ECONNREFUSED; 906 goto bad2; 907 } 908 if (so->so_type != so2->so_type) { 909 error = EPROTOTYPE; 910 goto bad2; 911 } 912 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 913 if (so2->so_options & SO_ACCEPTCONN) { 914 /* 915 * NB: drop locks here so unp_attach is entered 916 * w/o locks; this avoids a recursive lock 917 * of the head and holding sleep locks across 918 * a (potentially) blocking malloc. 919 */ 920 UNP_UNLOCK(); 921 so3 = sonewconn(so2, 0); 922 UNP_LOCK(); 923 } else 924 so3 = NULL; 925 if (so3 == NULL) { 926 error = ECONNREFUSED; 927 goto bad2; 928 } 929 unp = sotounpcb(so); 930 unp2 = sotounpcb(so2); 931 unp3 = sotounpcb(so3); 932 if (unp2->unp_addr != NULL) { 933 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 934 unp3->unp_addr = (struct sockaddr_un *) sa; 935 sa = NULL; 936 } 937 /* 938 * unp_peercred management: 939 * 940 * The connecter's (client's) credentials are copied 941 * from its process structure at the time of connect() 942 * (which is now). 943 */ 944 cru2x(td->td_ucred, &unp3->unp_peercred); 945 unp3->unp_flags |= UNP_HAVEPC; 946 /* 947 * The receiver's (server's) credentials are copied 948 * from the unp_peercred member of socket on which the 949 * former called listen(); unp_listen() cached that 950 * process's credentials at that time so we can use 951 * them now. 952 */ 953 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 954 ("unp_connect: listener without cached peercred")); 955 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 956 sizeof(unp->unp_peercred)); 957 unp->unp_flags |= UNP_HAVEPC; 958 if (unp2->unp_flags & UNP_WANTCRED) 959 unp3->unp_flags |= UNP_WANTCRED; 960 #ifdef MAC 961 SOCK_LOCK(so); 962 mac_set_socket_peer_from_socket(so, so3); 963 mac_set_socket_peer_from_socket(so3, so); 964 SOCK_UNLOCK(so); 965 #endif 966 967 so2 = so3; 968 } 969 error = unp_connect2(so, so2, PRU_CONNECT); 970 bad2: 971 UNP_UNLOCK(); 972 mtx_lock(&Giant); 973 bad: 974 mtx_assert(&Giant, MA_OWNED); 975 if (vp != NULL) 976 vput(vp); 977 mtx_unlock(&Giant); 978 free(sa, M_SONAME); 979 UNP_LOCK(); 980 return (error); 981 } 982 983 static int 984 unp_connect2(struct socket *so, struct socket *so2, int req) 985 { 986 struct unpcb *unp = sotounpcb(so); 987 struct unpcb *unp2; 988 989 UNP_LOCK_ASSERT(); 990 991 if (so2->so_type != so->so_type) 992 return (EPROTOTYPE); 993 unp2 = sotounpcb(so2); 994 KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); 995 unp->unp_conn = unp2; 996 switch (so->so_type) { 997 998 case SOCK_DGRAM: 999 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1000 soisconnected(so); 1001 break; 1002 1003 case SOCK_STREAM: 1004 unp2->unp_conn = unp; 1005 if (req == PRU_CONNECT && 1006 ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) 1007 soisconnecting(so); 1008 else 1009 soisconnected(so); 1010 soisconnected(so2); 1011 break; 1012 1013 default: 1014 panic("unp_connect2"); 1015 } 1016 return (0); 1017 } 1018 1019 static void 1020 unp_disconnect(struct unpcb *unp) 1021 { 1022 struct unpcb *unp2 = unp->unp_conn; 1023 struct socket *so; 1024 1025 UNP_LOCK_ASSERT(); 1026 1027 if (unp2 == NULL) 1028 return; 1029 unp->unp_conn = NULL; 1030 switch (unp->unp_socket->so_type) { 1031 case SOCK_DGRAM: 1032 LIST_REMOVE(unp, unp_reflink); 1033 so = unp->unp_socket; 1034 SOCK_LOCK(so); 1035 so->so_state &= ~SS_ISCONNECTED; 1036 SOCK_UNLOCK(so); 1037 break; 1038 1039 case SOCK_STREAM: 1040 soisdisconnected(unp->unp_socket); 1041 unp2->unp_conn = NULL; 1042 soisdisconnected(unp2->unp_socket); 1043 break; 1044 } 1045 } 1046 1047 /* 1048 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed 1049 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers 1050 * are safe to reference. It first scans the list of struct unpcb's to 1051 * generate a pointer list, then it rescans its list one entry at a time to 1052 * externalize and copyout. It checks the generation number to see if a 1053 * struct unpcb has been reused, and will skip it if so. 1054 */ 1055 static int 1056 unp_pcblist(SYSCTL_HANDLER_ARGS) 1057 { 1058 int error, i, n; 1059 struct unpcb *unp, **unp_list; 1060 unp_gen_t gencnt; 1061 struct xunpgen *xug; 1062 struct unp_head *head; 1063 struct xunpcb *xu; 1064 1065 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1066 1067 /* 1068 * The process of preparing the PCB list is too time-consuming and 1069 * resource-intensive to repeat twice on every request. 1070 */ 1071 if (req->oldptr == NULL) { 1072 n = unp_count; 1073 req->oldidx = 2 * (sizeof *xug) 1074 + (n + n/8) * sizeof(struct xunpcb); 1075 return (0); 1076 } 1077 1078 if (req->newptr != NULL) 1079 return (EPERM); 1080 1081 /* 1082 * OK, now we're committed to doing something. 1083 */ 1084 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 1085 UNP_LOCK(); 1086 gencnt = unp_gencnt; 1087 n = unp_count; 1088 UNP_UNLOCK(); 1089 1090 xug->xug_len = sizeof *xug; 1091 xug->xug_count = n; 1092 xug->xug_gen = gencnt; 1093 xug->xug_sogen = so_gencnt; 1094 error = SYSCTL_OUT(req, xug, sizeof *xug); 1095 if (error) { 1096 free(xug, M_TEMP); 1097 return (error); 1098 } 1099 1100 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1101 1102 UNP_LOCK(); 1103 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1104 unp = LIST_NEXT(unp, unp_link)) { 1105 if (unp->unp_gencnt <= gencnt) { 1106 if (cr_cansee(req->td->td_ucred, 1107 unp->unp_socket->so_cred)) 1108 continue; 1109 unp_list[i++] = unp; 1110 } 1111 } 1112 UNP_UNLOCK(); 1113 n = i; /* in case we lost some during malloc */ 1114 1115 error = 0; 1116 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); 1117 for (i = 0; i < n; i++) { 1118 unp = unp_list[i]; 1119 if (unp->unp_gencnt <= gencnt) { 1120 xu->xu_len = sizeof *xu; 1121 xu->xu_unpp = unp; 1122 /* 1123 * XXX - need more locking here to protect against 1124 * connect/disconnect races for SMP. 1125 */ 1126 if (unp->unp_addr != NULL) 1127 bcopy(unp->unp_addr, &xu->xu_addr, 1128 unp->unp_addr->sun_len); 1129 if (unp->unp_conn != NULL && 1130 unp->unp_conn->unp_addr != NULL) 1131 bcopy(unp->unp_conn->unp_addr, 1132 &xu->xu_caddr, 1133 unp->unp_conn->unp_addr->sun_len); 1134 bcopy(unp, &xu->xu_unp, sizeof *unp); 1135 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1136 error = SYSCTL_OUT(req, xu, sizeof *xu); 1137 } 1138 } 1139 free(xu, M_TEMP); 1140 if (!error) { 1141 /* 1142 * Give the user an updated idea of our state. 1143 * If the generation differs from what we told 1144 * her before, she knows that something happened 1145 * while we were processing this request, and it 1146 * might be necessary to retry. 1147 */ 1148 xug->xug_gen = unp_gencnt; 1149 xug->xug_sogen = so_gencnt; 1150 xug->xug_count = unp_count; 1151 error = SYSCTL_OUT(req, xug, sizeof *xug); 1152 } 1153 free(unp_list, M_TEMP); 1154 free(xug, M_TEMP); 1155 return (error); 1156 } 1157 1158 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1159 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1160 "List of active local datagram sockets"); 1161 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1162 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1163 "List of active local stream sockets"); 1164 1165 static void 1166 unp_shutdown(struct unpcb *unp) 1167 { 1168 struct socket *so; 1169 1170 UNP_LOCK_ASSERT(); 1171 1172 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 1173 (so = unp->unp_conn->unp_socket)) 1174 socantrcvmore(so); 1175 } 1176 1177 static void 1178 unp_drop(struct unpcb *unp, int errno) 1179 { 1180 struct socket *so = unp->unp_socket; 1181 1182 UNP_LOCK_ASSERT(); 1183 1184 so->so_error = errno; 1185 unp_disconnect(unp); 1186 } 1187 1188 static void 1189 unp_freerights(struct file **rp, int fdcount) 1190 { 1191 int i; 1192 struct file *fp; 1193 1194 for (i = 0; i < fdcount; i++) { 1195 fp = *rp; 1196 /* 1197 * zero the pointer before calling 1198 * unp_discard since it may end up 1199 * in unp_gc().. 1200 * 1201 * XXXRW: This is less true than it used to be. 1202 */ 1203 *rp++ = 0; 1204 unp_discard(fp); 1205 } 1206 } 1207 1208 int 1209 unp_externalize(struct mbuf *control, struct mbuf **controlp) 1210 { 1211 struct thread *td = curthread; /* XXX */ 1212 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1213 int i; 1214 int *fdp; 1215 struct file **rp; 1216 struct file *fp; 1217 void *data; 1218 socklen_t clen = control->m_len, datalen; 1219 int error, newfds; 1220 int f; 1221 u_int newlen; 1222 1223 UNP_UNLOCK_ASSERT(); 1224 1225 error = 0; 1226 if (controlp != NULL) /* controlp == NULL => free control messages */ 1227 *controlp = NULL; 1228 1229 while (cm != NULL) { 1230 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1231 error = EINVAL; 1232 break; 1233 } 1234 1235 data = CMSG_DATA(cm); 1236 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1237 1238 if (cm->cmsg_level == SOL_SOCKET 1239 && cm->cmsg_type == SCM_RIGHTS) { 1240 newfds = datalen / sizeof(struct file *); 1241 rp = data; 1242 1243 /* If we're not outputting the descriptors free them. */ 1244 if (error || controlp == NULL) { 1245 unp_freerights(rp, newfds); 1246 goto next; 1247 } 1248 FILEDESC_LOCK(td->td_proc->p_fd); 1249 /* if the new FD's will not fit free them. */ 1250 if (!fdavail(td, newfds)) { 1251 FILEDESC_UNLOCK(td->td_proc->p_fd); 1252 error = EMSGSIZE; 1253 unp_freerights(rp, newfds); 1254 goto next; 1255 } 1256 /* 1257 * now change each pointer to an fd in the global 1258 * table to an integer that is the index to the 1259 * local fd table entry that we set up to point 1260 * to the global one we are transferring. 1261 */ 1262 newlen = newfds * sizeof(int); 1263 *controlp = sbcreatecontrol(NULL, newlen, 1264 SCM_RIGHTS, SOL_SOCKET); 1265 if (*controlp == NULL) { 1266 FILEDESC_UNLOCK(td->td_proc->p_fd); 1267 error = E2BIG; 1268 unp_freerights(rp, newfds); 1269 goto next; 1270 } 1271 1272 fdp = (int *) 1273 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1274 for (i = 0; i < newfds; i++) { 1275 if (fdalloc(td, 0, &f)) 1276 panic("unp_externalize fdalloc failed"); 1277 fp = *rp++; 1278 td->td_proc->p_fd->fd_ofiles[f] = fp; 1279 FILE_LOCK(fp); 1280 fp->f_msgcount--; 1281 FILE_UNLOCK(fp); 1282 unp_rights--; 1283 *fdp++ = f; 1284 } 1285 FILEDESC_UNLOCK(td->td_proc->p_fd); 1286 } else { /* We can just copy anything else across */ 1287 if (error || controlp == NULL) 1288 goto next; 1289 *controlp = sbcreatecontrol(NULL, datalen, 1290 cm->cmsg_type, cm->cmsg_level); 1291 if (*controlp == NULL) { 1292 error = ENOBUFS; 1293 goto next; 1294 } 1295 bcopy(data, 1296 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1297 datalen); 1298 } 1299 1300 controlp = &(*controlp)->m_next; 1301 1302 next: 1303 if (CMSG_SPACE(datalen) < clen) { 1304 clen -= CMSG_SPACE(datalen); 1305 cm = (struct cmsghdr *) 1306 ((caddr_t)cm + CMSG_SPACE(datalen)); 1307 } else { 1308 clen = 0; 1309 cm = NULL; 1310 } 1311 } 1312 1313 m_freem(control); 1314 1315 return (error); 1316 } 1317 1318 static void 1319 unp_zone_change(void *tag) 1320 { 1321 1322 uma_zone_set_max(unp_zone, maxsockets); 1323 } 1324 1325 void 1326 unp_init(void) 1327 { 1328 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1329 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1330 if (unp_zone == NULL) 1331 panic("unp_init"); 1332 uma_zone_set_max(unp_zone, maxsockets); 1333 EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, 1334 NULL, EVENTHANDLER_PRI_ANY); 1335 LIST_INIT(&unp_dhead); 1336 LIST_INIT(&unp_shead); 1337 TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); 1338 UNP_LOCK_INIT(); 1339 } 1340 1341 static int 1342 unp_internalize(struct mbuf **controlp, struct thread *td) 1343 { 1344 struct mbuf *control = *controlp; 1345 struct proc *p = td->td_proc; 1346 struct filedesc *fdescp = p->p_fd; 1347 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1348 struct cmsgcred *cmcred; 1349 struct file **rp; 1350 struct file *fp; 1351 struct timeval *tv; 1352 int i, fd, *fdp; 1353 void *data; 1354 socklen_t clen = control->m_len, datalen; 1355 int error, oldfds; 1356 u_int newlen; 1357 1358 UNP_UNLOCK_ASSERT(); 1359 1360 error = 0; 1361 *controlp = NULL; 1362 1363 while (cm != NULL) { 1364 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1365 || cm->cmsg_len > clen) { 1366 error = EINVAL; 1367 goto out; 1368 } 1369 1370 data = CMSG_DATA(cm); 1371 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1372 1373 switch (cm->cmsg_type) { 1374 /* 1375 * Fill in credential information. 1376 */ 1377 case SCM_CREDS: 1378 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1379 SCM_CREDS, SOL_SOCKET); 1380 if (*controlp == NULL) { 1381 error = ENOBUFS; 1382 goto out; 1383 } 1384 1385 cmcred = (struct cmsgcred *) 1386 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1387 cmcred->cmcred_pid = p->p_pid; 1388 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1389 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1390 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1391 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1392 CMGROUP_MAX); 1393 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1394 cmcred->cmcred_groups[i] = 1395 td->td_ucred->cr_groups[i]; 1396 break; 1397 1398 case SCM_RIGHTS: 1399 oldfds = datalen / sizeof (int); 1400 /* 1401 * check that all the FDs passed in refer to legal files 1402 * If not, reject the entire operation. 1403 */ 1404 fdp = data; 1405 FILEDESC_LOCK(fdescp); 1406 for (i = 0; i < oldfds; i++) { 1407 fd = *fdp++; 1408 if ((unsigned)fd >= fdescp->fd_nfiles || 1409 fdescp->fd_ofiles[fd] == NULL) { 1410 FILEDESC_UNLOCK(fdescp); 1411 error = EBADF; 1412 goto out; 1413 } 1414 fp = fdescp->fd_ofiles[fd]; 1415 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1416 FILEDESC_UNLOCK(fdescp); 1417 error = EOPNOTSUPP; 1418 goto out; 1419 } 1420 1421 } 1422 /* 1423 * Now replace the integer FDs with pointers to 1424 * the associated global file table entry.. 1425 */ 1426 newlen = oldfds * sizeof(struct file *); 1427 *controlp = sbcreatecontrol(NULL, newlen, 1428 SCM_RIGHTS, SOL_SOCKET); 1429 if (*controlp == NULL) { 1430 FILEDESC_UNLOCK(fdescp); 1431 error = E2BIG; 1432 goto out; 1433 } 1434 1435 fdp = data; 1436 rp = (struct file **) 1437 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1438 for (i = 0; i < oldfds; i++) { 1439 fp = fdescp->fd_ofiles[*fdp++]; 1440 *rp++ = fp; 1441 FILE_LOCK(fp); 1442 fp->f_count++; 1443 fp->f_msgcount++; 1444 FILE_UNLOCK(fp); 1445 unp_rights++; 1446 } 1447 FILEDESC_UNLOCK(fdescp); 1448 break; 1449 1450 case SCM_TIMESTAMP: 1451 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1452 SCM_TIMESTAMP, SOL_SOCKET); 1453 if (*controlp == NULL) { 1454 error = ENOBUFS; 1455 goto out; 1456 } 1457 tv = (struct timeval *) 1458 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1459 microtime(tv); 1460 break; 1461 1462 default: 1463 error = EINVAL; 1464 goto out; 1465 } 1466 1467 controlp = &(*controlp)->m_next; 1468 1469 if (CMSG_SPACE(datalen) < clen) { 1470 clen -= CMSG_SPACE(datalen); 1471 cm = (struct cmsghdr *) 1472 ((caddr_t)cm + CMSG_SPACE(datalen)); 1473 } else { 1474 clen = 0; 1475 cm = NULL; 1476 } 1477 } 1478 1479 out: 1480 m_freem(control); 1481 1482 return (error); 1483 } 1484 1485 struct mbuf * 1486 unp_addsockcred(struct thread *td, struct mbuf *control) 1487 { 1488 struct mbuf *m, *n, *n_prev; 1489 struct sockcred *sc; 1490 const struct cmsghdr *cm; 1491 int ngroups; 1492 int i; 1493 1494 ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); 1495 1496 m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); 1497 if (m == NULL) 1498 return (control); 1499 1500 sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); 1501 sc->sc_uid = td->td_ucred->cr_ruid; 1502 sc->sc_euid = td->td_ucred->cr_uid; 1503 sc->sc_gid = td->td_ucred->cr_rgid; 1504 sc->sc_egid = td->td_ucred->cr_gid; 1505 sc->sc_ngroups = ngroups; 1506 for (i = 0; i < sc->sc_ngroups; i++) 1507 sc->sc_groups[i] = td->td_ucred->cr_groups[i]; 1508 1509 /* 1510 * Unlink SCM_CREDS control messages (struct cmsgcred), since 1511 * just created SCM_CREDS control message (struct sockcred) has 1512 * another format. 1513 */ 1514 if (control != NULL) 1515 for (n = control, n_prev = NULL; n != NULL;) { 1516 cm = mtod(n, struct cmsghdr *); 1517 if (cm->cmsg_level == SOL_SOCKET && 1518 cm->cmsg_type == SCM_CREDS) { 1519 if (n_prev == NULL) 1520 control = n->m_next; 1521 else 1522 n_prev->m_next = n->m_next; 1523 n = m_free(n); 1524 } else { 1525 n_prev = n; 1526 n = n->m_next; 1527 } 1528 } 1529 1530 /* Prepend it to the head. */ 1531 m->m_next = control; 1532 1533 return (m); 1534 } 1535 1536 /* 1537 * unp_defer indicates whether additional work has been defered for a future 1538 * pass through unp_gc(). It is thread local and does not require explicit 1539 * synchronization. 1540 */ 1541 static int unp_defer; 1542 1543 static int unp_taskcount; 1544 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, ""); 1545 1546 static int unp_recycled; 1547 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, ""); 1548 1549 static void 1550 unp_gc(__unused void *arg, int pending) 1551 { 1552 struct file *fp, *nextfp; 1553 struct socket *so; 1554 struct file **extra_ref, **fpp; 1555 int nunref, i; 1556 int nfiles_snap; 1557 int nfiles_slack = 20; 1558 1559 unp_taskcount++; 1560 unp_defer = 0; 1561 /* 1562 * before going through all this, set all FDs to 1563 * be NOT defered and NOT externally accessible 1564 */ 1565 sx_slock(&filelist_lock); 1566 LIST_FOREACH(fp, &filehead, f_list) 1567 fp->f_gcflag &= ~(FMARK|FDEFER); 1568 do { 1569 KASSERT(unp_defer >= 0, ("unp_gc: unp_defer %d", unp_defer)); 1570 LIST_FOREACH(fp, &filehead, f_list) { 1571 FILE_LOCK(fp); 1572 /* 1573 * If the file is not open, skip it -- could be a 1574 * file in the process of being opened, or in the 1575 * process of being closed. If the file is 1576 * "closing", it may have been marked for deferred 1577 * consideration. Clear the flag now if so. 1578 */ 1579 if (fp->f_count == 0) { 1580 if (fp->f_gcflag & FDEFER) 1581 unp_defer--; 1582 fp->f_gcflag &= ~(FMARK|FDEFER); 1583 FILE_UNLOCK(fp); 1584 continue; 1585 } 1586 /* 1587 * If we already marked it as 'defer' in a 1588 * previous pass, then try process it this time 1589 * and un-mark it 1590 */ 1591 if (fp->f_gcflag & FDEFER) { 1592 fp->f_gcflag &= ~FDEFER; 1593 unp_defer--; 1594 } else { 1595 /* 1596 * if it's not defered, then check if it's 1597 * already marked.. if so skip it 1598 */ 1599 if (fp->f_gcflag & FMARK) { 1600 FILE_UNLOCK(fp); 1601 continue; 1602 } 1603 /* 1604 * If all references are from messages 1605 * in transit, then skip it. it's not 1606 * externally accessible. 1607 */ 1608 if (fp->f_count == fp->f_msgcount) { 1609 FILE_UNLOCK(fp); 1610 continue; 1611 } 1612 /* 1613 * If it got this far then it must be 1614 * externally accessible. 1615 */ 1616 fp->f_gcflag |= FMARK; 1617 } 1618 /* 1619 * either it was defered, or it is externally 1620 * accessible and not already marked so. 1621 * Now check if it is possibly one of OUR sockets. 1622 */ 1623 if (fp->f_type != DTYPE_SOCKET || 1624 (so = fp->f_data) == NULL) { 1625 FILE_UNLOCK(fp); 1626 continue; 1627 } 1628 FILE_UNLOCK(fp); 1629 if (so->so_proto->pr_domain != &localdomain || 1630 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1631 continue; 1632 /* 1633 * So, Ok, it's one of our sockets and it IS externally 1634 * accessible (or was defered). Now we look 1635 * to see if we hold any file descriptors in its 1636 * message buffers. Follow those links and mark them 1637 * as accessible too. 1638 */ 1639 SOCKBUF_LOCK(&so->so_rcv); 1640 unp_scan(so->so_rcv.sb_mb, unp_mark); 1641 SOCKBUF_UNLOCK(&so->so_rcv); 1642 } 1643 } while (unp_defer); 1644 sx_sunlock(&filelist_lock); 1645 /* 1646 * XXXRW: The following comments need updating for a post-SMPng and 1647 * deferred unp_gc() world, but are still generally accurate. 1648 * 1649 * We grab an extra reference to each of the file table entries 1650 * that are not otherwise accessible and then free the rights 1651 * that are stored in messages on them. 1652 * 1653 * The bug in the orginal code is a little tricky, so I'll describe 1654 * what's wrong with it here. 1655 * 1656 * It is incorrect to simply unp_discard each entry for f_msgcount 1657 * times -- consider the case of sockets A and B that contain 1658 * references to each other. On a last close of some other socket, 1659 * we trigger a gc since the number of outstanding rights (unp_rights) 1660 * is non-zero. If during the sweep phase the gc code unp_discards, 1661 * we end up doing a (full) closef on the descriptor. A closef on A 1662 * results in the following chain. Closef calls soo_close, which 1663 * calls soclose. Soclose calls first (through the switch 1664 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1665 * returns because the previous instance had set unp_gcing, and 1666 * we return all the way back to soclose, which marks the socket 1667 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1668 * to free up the rights that are queued in messages on the socket A, 1669 * i.e., the reference on B. The sorflush calls via the dom_dispose 1670 * switch unp_dispose, which unp_scans with unp_discard. This second 1671 * instance of unp_discard just calls closef on B. 1672 * 1673 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1674 * which results in another closef on A. Unfortunately, A is already 1675 * being closed, and the descriptor has already been marked with 1676 * SS_NOFDREF, and soclose panics at this point. 1677 * 1678 * Here, we first take an extra reference to each inaccessible 1679 * descriptor. Then, we call sorflush ourself, since we know 1680 * it is a Unix domain socket anyhow. After we destroy all the 1681 * rights carried in messages, we do a last closef to get rid 1682 * of our extra reference. This is the last close, and the 1683 * unp_detach etc will shut down the socket. 1684 * 1685 * 91/09/19, bsy@cs.cmu.edu 1686 */ 1687 again: 1688 nfiles_snap = openfiles + nfiles_slack; /* some slack */ 1689 extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP, 1690 M_WAITOK); 1691 sx_slock(&filelist_lock); 1692 if (nfiles_snap < openfiles) { 1693 sx_sunlock(&filelist_lock); 1694 free(extra_ref, M_TEMP); 1695 nfiles_slack += 20; 1696 goto again; 1697 } 1698 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 1699 fp != NULL; fp = nextfp) { 1700 nextfp = LIST_NEXT(fp, f_list); 1701 FILE_LOCK(fp); 1702 /* 1703 * If it's not open, skip it 1704 */ 1705 if (fp->f_count == 0) { 1706 FILE_UNLOCK(fp); 1707 continue; 1708 } 1709 /* 1710 * If all refs are from msgs, and it's not marked accessible 1711 * then it must be referenced from some unreachable cycle 1712 * of (shut-down) FDs, so include it in our 1713 * list of FDs to remove 1714 */ 1715 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { 1716 *fpp++ = fp; 1717 nunref++; 1718 fp->f_count++; 1719 } 1720 FILE_UNLOCK(fp); 1721 } 1722 sx_sunlock(&filelist_lock); 1723 /* 1724 * for each FD on our hit list, do the following two things 1725 */ 1726 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1727 struct file *tfp = *fpp; 1728 FILE_LOCK(tfp); 1729 if (tfp->f_type == DTYPE_SOCKET && 1730 tfp->f_data != NULL) { 1731 FILE_UNLOCK(tfp); 1732 sorflush(tfp->f_data); 1733 } else { 1734 FILE_UNLOCK(tfp); 1735 } 1736 } 1737 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1738 closef(*fpp, (struct thread *) NULL); 1739 unp_recycled++; 1740 } 1741 free(extra_ref, M_TEMP); 1742 } 1743 1744 void 1745 unp_dispose(struct mbuf *m) 1746 { 1747 1748 if (m) 1749 unp_scan(m, unp_discard); 1750 } 1751 1752 static int 1753 unp_listen(struct socket *so, struct unpcb *unp, int backlog, 1754 struct thread *td) 1755 { 1756 int error; 1757 1758 UNP_LOCK_ASSERT(); 1759 1760 SOCK_LOCK(so); 1761 error = solisten_proto_check(so); 1762 if (error == 0) { 1763 cru2x(td->td_ucred, &unp->unp_peercred); 1764 unp->unp_flags |= UNP_HAVEPCCACHED; 1765 solisten_proto(so, backlog); 1766 } 1767 SOCK_UNLOCK(so); 1768 return (error); 1769 } 1770 1771 static void 1772 unp_scan(struct mbuf *m0, void (*op)(struct file *)) 1773 { 1774 struct mbuf *m; 1775 struct file **rp; 1776 struct cmsghdr *cm; 1777 void *data; 1778 int i; 1779 socklen_t clen, datalen; 1780 int qfds; 1781 1782 while (m0 != NULL) { 1783 for (m = m0; m; m = m->m_next) { 1784 if (m->m_type != MT_CONTROL) 1785 continue; 1786 1787 cm = mtod(m, struct cmsghdr *); 1788 clen = m->m_len; 1789 1790 while (cm != NULL) { 1791 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 1792 break; 1793 1794 data = CMSG_DATA(cm); 1795 datalen = (caddr_t)cm + cm->cmsg_len 1796 - (caddr_t)data; 1797 1798 if (cm->cmsg_level == SOL_SOCKET && 1799 cm->cmsg_type == SCM_RIGHTS) { 1800 qfds = datalen / sizeof (struct file *); 1801 rp = data; 1802 for (i = 0; i < qfds; i++) 1803 (*op)(*rp++); 1804 } 1805 1806 if (CMSG_SPACE(datalen) < clen) { 1807 clen -= CMSG_SPACE(datalen); 1808 cm = (struct cmsghdr *) 1809 ((caddr_t)cm + CMSG_SPACE(datalen)); 1810 } else { 1811 clen = 0; 1812 cm = NULL; 1813 } 1814 } 1815 } 1816 m0 = m0->m_act; 1817 } 1818 } 1819 1820 static void 1821 unp_mark(struct file *fp) 1822 { 1823 if (fp->f_gcflag & FMARK) 1824 return; 1825 unp_defer++; 1826 fp->f_gcflag |= (FMARK|FDEFER); 1827 } 1828 1829 static void 1830 unp_discard(struct file *fp) 1831 { 1832 UNP_LOCK(); 1833 FILE_LOCK(fp); 1834 fp->f_msgcount--; 1835 unp_rights--; 1836 FILE_UNLOCK(fp); 1837 UNP_UNLOCK(); 1838 (void) closef(fp, (struct thread *)NULL); 1839 } 1840