1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004-2008 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 32 */ 33 34 /* 35 * UNIX Domain (Local) Sockets 36 * 37 * This is an implementation of UNIX (local) domain sockets. Each socket has 38 * an associated struct unpcb (UNIX protocol control block). Stream sockets 39 * may be connected to 0 or 1 other socket. Datagram sockets may be 40 * connected to 0, 1, or many other sockets. Sockets may be created and 41 * connected in pairs (socketpair(2)), or bound/connected to using the file 42 * system name space. For most purposes, only the receive socket buffer is 43 * used, as sending on one socket delivers directly to the receive socket 44 * buffer of a second socket. 45 * 46 * The implementation is substantially complicated by the fact that 47 * "ancillary data", such as file descriptors or credentials, may be passed 48 * across UNIX domain sockets. The potential for passing UNIX domain sockets 49 * over other UNIX domain sockets requires the implementation of a simple 50 * garbage collector to find and tear down cycles of disconnected sockets. 51 * 52 * TODO: 53 * SEQPACKET, RDM 54 * rethink name space problems 55 * need a proper out-of-band 56 */ 57 58 #include <sys/cdefs.h> 59 __FBSDID("$FreeBSD$"); 60 61 #include "opt_ddb.h" 62 #include "opt_mac.h" 63 64 #include <sys/param.h> 65 #include <sys/domain.h> 66 #include <sys/fcntl.h> 67 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 68 #include <sys/eventhandler.h> 69 #include <sys/file.h> 70 #include <sys/filedesc.h> 71 #include <sys/jail.h> 72 #include <sys/kernel.h> 73 #include <sys/lock.h> 74 #include <sys/mbuf.h> 75 #include <sys/mount.h> 76 #include <sys/mutex.h> 77 #include <sys/namei.h> 78 #include <sys/proc.h> 79 #include <sys/protosw.h> 80 #include <sys/resourcevar.h> 81 #include <sys/rwlock.h> 82 #include <sys/socket.h> 83 #include <sys/socketvar.h> 84 #include <sys/signalvar.h> 85 #include <sys/stat.h> 86 #include <sys/sx.h> 87 #include <sys/sysctl.h> 88 #include <sys/systm.h> 89 #include <sys/taskqueue.h> 90 #include <sys/un.h> 91 #include <sys/unpcb.h> 92 #include <sys/vnode.h> 93 94 #ifdef DDB 95 #include <ddb/ddb.h> 96 #endif 97 98 #include <security/mac/mac_framework.h> 99 100 #include <vm/uma.h> 101 102 static uma_zone_t unp_zone; 103 static unp_gen_t unp_gencnt; 104 static u_int unp_count; /* Count of local sockets. */ 105 static ino_t unp_ino; /* Prototype for fake inode numbers. */ 106 static int unp_rights; /* File descriptors in flight. */ 107 static struct unp_head unp_shead; /* List of local stream sockets. */ 108 static struct unp_head unp_dhead; /* List of local datagram sockets. */ 109 110 static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 111 112 /* 113 * Garbage collection of cyclic file descriptor/socket references occurs 114 * asynchronously in a taskqueue context in order to avoid recursion and 115 * reentrance in the UNIX domain socket, file descriptor, and socket layer 116 * code. See unp_gc() for a full description. 117 */ 118 static struct task unp_gc_task; 119 120 /* 121 * Both send and receive buffers are allocated PIPSIZ bytes of buffering for 122 * stream sockets, although the total for sender and receiver is actually 123 * only PIPSIZ. 124 * 125 * Datagram sockets really use the sendspace as the maximum datagram size, 126 * and don't really want to reserve the sendspace. Their recvspace should be 127 * large enough for at least one max-size datagram plus address. 128 */ 129 #ifndef PIPSIZ 130 #define PIPSIZ 8192 131 #endif 132 static u_long unpst_sendspace = PIPSIZ; 133 static u_long unpst_recvspace = PIPSIZ; 134 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 135 static u_long unpdg_recvspace = 4*1024; 136 137 SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); 138 SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM"); 139 SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM"); 140 141 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 142 &unpst_sendspace, 0, "Default stream send space."); 143 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 144 &unpst_recvspace, 0, "Default stream receive space."); 145 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 146 &unpdg_sendspace, 0, "Default datagram send space."); 147 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 148 &unpdg_recvspace, 0, "Default datagram receive space."); 149 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 150 "File descriptors in flight."); 151 152 /*- 153 * Locking and synchronization: 154 * 155 * The global UNIX domain socket rwlock (unp_global_rwlock) protects all 156 * global variables, including the linked lists tracking the set of allocated 157 * UNIX domain sockets. The global rwlock also serves to prevent deadlock 158 * when more than one PCB lock is acquired at a time (i.e., during 159 * connect()). Finally, the global rwlock protects uncounted references from 160 * vnodes to sockets bound to those vnodes: to safely dereference the 161 * v_socket pointer, the global rwlock must be held while a full reference is 162 * acquired. 163 * 164 * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, 165 * allocated in pru_attach() and freed in pru_detach(). The validity of that 166 * pointer is an invariant, so no lock is required to dereference the so_pcb 167 * pointer if a valid socket reference is held by the caller. In practice, 168 * this is always true during operations performed on a socket. Each unpcb 169 * has a back-pointer to its socket, unp_socket, which will be stable under 170 * the same circumstances. 171 * 172 * This pointer may only be safely dereferenced as long as a valid reference 173 * to the unpcb is held. Typically, this reference will be from the socket, 174 * or from another unpcb when the referring unpcb's lock is held (in order 175 * that the reference not be invalidated during use). For example, to follow 176 * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn, 177 * as unp_socket remains valid as long as the reference to unp_conn is valid. 178 * 179 * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual 180 * atomic reads without the lock may be performed "lockless", but more 181 * complex reads and read-modify-writes require the mutex to be held. No 182 * lock order is defined between unpcb locks -- multiple unpcb locks may be 183 * acquired at the same time only when holding the global UNIX domain socket 184 * rwlock exclusively, which prevents deadlocks. 185 * 186 * Blocking with UNIX domain sockets is a tricky issue: unlike most network 187 * protocols, bind() is a non-atomic operation, and connect() requires 188 * potential sleeping in the protocol, due to potentially waiting on local or 189 * distributed file systems. We try to separate "lookup" operations, which 190 * may sleep, and the IPC operations themselves, which typically can occur 191 * with relative atomicity as locks can be held over the entire operation. 192 * 193 * Another tricky issue is simultaneous multi-threaded or multi-process 194 * access to a single UNIX domain socket. These are handled by the flags 195 * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or 196 * binding, both of which involve dropping UNIX domain socket locks in order 197 * to perform namei() and other file system operations. 198 */ 199 static struct rwlock unp_global_rwlock; 200 201 #define UNP_GLOBAL_LOCK_INIT() rw_init(&unp_global_rwlock, \ 202 "unp_global_rwlock") 203 204 #define UNP_GLOBAL_LOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 205 RA_LOCKED) 206 #define UNP_GLOBAL_UNLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 207 RA_UNLOCKED) 208 209 #define UNP_GLOBAL_WLOCK() rw_wlock(&unp_global_rwlock) 210 #define UNP_GLOBAL_WUNLOCK() rw_wunlock(&unp_global_rwlock) 211 #define UNP_GLOBAL_WLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 212 RA_WLOCKED) 213 #define UNP_GLOBAL_WOWNED() rw_wowned(&unp_global_rwlock) 214 215 #define UNP_GLOBAL_RLOCK() rw_rlock(&unp_global_rwlock) 216 #define UNP_GLOBAL_RUNLOCK() rw_runlock(&unp_global_rwlock) 217 #define UNP_GLOBAL_RLOCK_ASSERT() rw_assert(&unp_global_rwlock, \ 218 RA_RLOCKED) 219 220 #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ 221 "unp_mtx", "unp_mtx", \ 222 MTX_DUPOK|MTX_DEF|MTX_RECURSE) 223 #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) 224 #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) 225 #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) 226 #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) 227 228 static int uipc_connect2(struct socket *, struct socket *); 229 static int uipc_ctloutput(struct socket *, struct sockopt *); 230 static int unp_connect(struct socket *, struct sockaddr *, 231 struct thread *); 232 static int unp_connect2(struct socket *so, struct socket *so2, int); 233 static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); 234 static void unp_dispose(struct mbuf *); 235 static void unp_shutdown(struct unpcb *); 236 static void unp_drop(struct unpcb *, int); 237 static void unp_gc(__unused void *, int); 238 static void unp_scan(struct mbuf *, void (*)(struct file *)); 239 static void unp_discard(struct file *); 240 static void unp_freerights(struct file **, int); 241 static void unp_init(void); 242 static int unp_internalize(struct mbuf **, struct thread *); 243 static void unp_internalize_fp(struct file *); 244 static int unp_externalize(struct mbuf *, struct mbuf **); 245 static void unp_externalize_fp(struct file *); 246 static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); 247 248 /* 249 * Definitions of protocols supported in the LOCAL domain. 250 */ 251 static struct domain localdomain; 252 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream; 253 static struct protosw localsw[] = { 254 { 255 .pr_type = SOCK_STREAM, 256 .pr_domain = &localdomain, 257 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, 258 .pr_ctloutput = &uipc_ctloutput, 259 .pr_usrreqs = &uipc_usrreqs_stream 260 }, 261 { 262 .pr_type = SOCK_DGRAM, 263 .pr_domain = &localdomain, 264 .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, 265 .pr_usrreqs = &uipc_usrreqs_dgram 266 }, 267 }; 268 269 static struct domain localdomain = { 270 .dom_family = AF_LOCAL, 271 .dom_name = "local", 272 .dom_init = unp_init, 273 .dom_externalize = unp_externalize, 274 .dom_dispose = unp_dispose, 275 .dom_protosw = localsw, 276 .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])] 277 }; 278 DOMAIN_SET(local); 279 280 static void 281 uipc_abort(struct socket *so) 282 { 283 struct unpcb *unp, *unp2; 284 285 unp = sotounpcb(so); 286 KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); 287 288 UNP_GLOBAL_WLOCK(); 289 UNP_PCB_LOCK(unp); 290 unp2 = unp->unp_conn; 291 if (unp2 != NULL) { 292 UNP_PCB_LOCK(unp2); 293 unp_drop(unp2, ECONNABORTED); 294 UNP_PCB_UNLOCK(unp2); 295 } 296 UNP_PCB_UNLOCK(unp); 297 UNP_GLOBAL_WUNLOCK(); 298 } 299 300 static int 301 uipc_accept(struct socket *so, struct sockaddr **nam) 302 { 303 struct unpcb *unp, *unp2; 304 const struct sockaddr *sa; 305 306 /* 307 * Pass back name of connected socket, if it was bound and we are 308 * still connected (our peer may have closed already!). 309 */ 310 unp = sotounpcb(so); 311 KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); 312 313 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 314 UNP_GLOBAL_RLOCK(); 315 unp2 = unp->unp_conn; 316 if (unp2 != NULL && unp2->unp_addr != NULL) { 317 UNP_PCB_LOCK(unp2); 318 sa = (struct sockaddr *) unp2->unp_addr; 319 bcopy(sa, *nam, sa->sa_len); 320 UNP_PCB_UNLOCK(unp2); 321 } else { 322 sa = &sun_noname; 323 bcopy(sa, *nam, sa->sa_len); 324 } 325 UNP_GLOBAL_RUNLOCK(); 326 return (0); 327 } 328 329 static int 330 uipc_attach(struct socket *so, int proto, struct thread *td) 331 { 332 u_long sendspace, recvspace; 333 struct unpcb *unp; 334 int error, locked; 335 336 KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); 337 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 338 switch (so->so_type) { 339 case SOCK_STREAM: 340 sendspace = unpst_sendspace; 341 recvspace = unpst_recvspace; 342 break; 343 344 case SOCK_DGRAM: 345 sendspace = unpdg_sendspace; 346 recvspace = unpdg_recvspace; 347 break; 348 349 default: 350 panic("uipc_attach"); 351 } 352 error = soreserve(so, sendspace, recvspace); 353 if (error) 354 return (error); 355 } 356 unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO); 357 if (unp == NULL) 358 return (ENOBUFS); 359 LIST_INIT(&unp->unp_refs); 360 UNP_PCB_LOCK_INIT(unp); 361 unp->unp_socket = so; 362 so->so_pcb = unp; 363 unp->unp_refcount = 1; 364 365 /* 366 * uipc_attach() may be called indirectly from within the UNIX domain 367 * socket code via sonewconn() in unp_connect(). Since rwlocks can 368 * not be recursed, we do the closest thing. 369 */ 370 locked = 0; 371 if (!UNP_GLOBAL_WOWNED()) { 372 UNP_GLOBAL_WLOCK(); 373 locked = 1; 374 } 375 unp->unp_gencnt = ++unp_gencnt; 376 unp_count++; 377 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, 378 unp, unp_link); 379 if (locked) 380 UNP_GLOBAL_WUNLOCK(); 381 382 return (0); 383 } 384 385 static int 386 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 387 { 388 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 389 struct vattr vattr; 390 int error, namelen, vfslocked; 391 struct nameidata nd; 392 struct unpcb *unp; 393 struct vnode *vp; 394 struct mount *mp; 395 char *buf; 396 397 unp = sotounpcb(so); 398 KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); 399 400 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 401 if (namelen <= 0) 402 return (EINVAL); 403 404 /* 405 * We don't allow simultaneous bind() calls on a single UNIX domain 406 * socket, so flag in-progress operations, and return an error if an 407 * operation is already in progress. 408 * 409 * Historically, we have not allowed a socket to be rebound, so this 410 * also returns an error. Not allowing re-binding simplifies the 411 * implementation and avoids a great many possible failure modes. 412 */ 413 UNP_PCB_LOCK(unp); 414 if (unp->unp_vnode != NULL) { 415 UNP_PCB_UNLOCK(unp); 416 return (EINVAL); 417 } 418 if (unp->unp_flags & UNP_BINDING) { 419 UNP_PCB_UNLOCK(unp); 420 return (EALREADY); 421 } 422 unp->unp_flags |= UNP_BINDING; 423 UNP_PCB_UNLOCK(unp); 424 425 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 426 bcopy(soun->sun_path, buf, namelen); 427 buf[namelen] = 0; 428 429 restart: 430 vfslocked = 0; 431 NDINIT(&nd, CREATE, MPSAFE | NOFOLLOW | LOCKPARENT | SAVENAME, 432 UIO_SYSSPACE, buf, td); 433 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 434 error = namei(&nd); 435 if (error) 436 goto error; 437 vp = nd.ni_vp; 438 vfslocked = NDHASGIANT(&nd); 439 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 440 NDFREE(&nd, NDF_ONLY_PNBUF); 441 if (nd.ni_dvp == vp) 442 vrele(nd.ni_dvp); 443 else 444 vput(nd.ni_dvp); 445 if (vp != NULL) { 446 vrele(vp); 447 error = EADDRINUSE; 448 goto error; 449 } 450 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 451 if (error) 452 goto error; 453 VFS_UNLOCK_GIANT(vfslocked); 454 goto restart; 455 } 456 VATTR_NULL(&vattr); 457 vattr.va_type = VSOCK; 458 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 459 #ifdef MAC 460 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 461 &vattr); 462 #endif 463 if (error == 0) { 464 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 465 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 466 } 467 NDFREE(&nd, NDF_ONLY_PNBUF); 468 vput(nd.ni_dvp); 469 if (error) { 470 vn_finished_write(mp); 471 goto error; 472 } 473 vp = nd.ni_vp; 474 ASSERT_VOP_ELOCKED(vp, "uipc_bind"); 475 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 476 477 UNP_GLOBAL_WLOCK(); 478 UNP_PCB_LOCK(unp); 479 vp->v_socket = unp->unp_socket; 480 unp->unp_vnode = vp; 481 unp->unp_addr = soun; 482 unp->unp_flags &= ~UNP_BINDING; 483 UNP_PCB_UNLOCK(unp); 484 UNP_GLOBAL_WUNLOCK(); 485 VOP_UNLOCK(vp, 0); 486 vn_finished_write(mp); 487 VFS_UNLOCK_GIANT(vfslocked); 488 free(buf, M_TEMP); 489 return (0); 490 491 error: 492 VFS_UNLOCK_GIANT(vfslocked); 493 UNP_PCB_LOCK(unp); 494 unp->unp_flags &= ~UNP_BINDING; 495 UNP_PCB_UNLOCK(unp); 496 free(buf, M_TEMP); 497 return (error); 498 } 499 500 static int 501 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 502 { 503 int error; 504 505 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 506 UNP_GLOBAL_WLOCK(); 507 error = unp_connect(so, nam, td); 508 UNP_GLOBAL_WUNLOCK(); 509 return (error); 510 } 511 512 static void 513 uipc_close(struct socket *so) 514 { 515 struct unpcb *unp, *unp2; 516 517 unp = sotounpcb(so); 518 KASSERT(unp != NULL, ("uipc_close: unp == NULL")); 519 520 UNP_GLOBAL_WLOCK(); 521 UNP_PCB_LOCK(unp); 522 unp2 = unp->unp_conn; 523 if (unp2 != NULL) { 524 UNP_PCB_LOCK(unp2); 525 unp_disconnect(unp, unp2); 526 UNP_PCB_UNLOCK(unp2); 527 } 528 UNP_PCB_UNLOCK(unp); 529 UNP_GLOBAL_WUNLOCK(); 530 } 531 532 static int 533 uipc_connect2(struct socket *so1, struct socket *so2) 534 { 535 struct unpcb *unp, *unp2; 536 int error; 537 538 UNP_GLOBAL_WLOCK(); 539 unp = so1->so_pcb; 540 KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); 541 UNP_PCB_LOCK(unp); 542 unp2 = so2->so_pcb; 543 KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); 544 UNP_PCB_LOCK(unp2); 545 error = unp_connect2(so1, so2, PRU_CONNECT2); 546 UNP_PCB_UNLOCK(unp2); 547 UNP_PCB_UNLOCK(unp); 548 UNP_GLOBAL_WUNLOCK(); 549 return (error); 550 } 551 552 static void 553 uipc_detach(struct socket *so) 554 { 555 struct unpcb *unp, *unp2; 556 struct sockaddr_un *saved_unp_addr; 557 struct vnode *vp; 558 int freeunp, local_unp_rights; 559 560 unp = sotounpcb(so); 561 KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); 562 563 UNP_GLOBAL_WLOCK(); 564 UNP_PCB_LOCK(unp); 565 566 LIST_REMOVE(unp, unp_link); 567 unp->unp_gencnt = ++unp_gencnt; 568 --unp_count; 569 570 /* 571 * XXXRW: Should assert vp->v_socket == so. 572 */ 573 if ((vp = unp->unp_vnode) != NULL) { 574 unp->unp_vnode->v_socket = NULL; 575 unp->unp_vnode = NULL; 576 } 577 unp2 = unp->unp_conn; 578 if (unp2 != NULL) { 579 UNP_PCB_LOCK(unp2); 580 unp_disconnect(unp, unp2); 581 UNP_PCB_UNLOCK(unp2); 582 } 583 584 /* 585 * We hold the global lock exclusively, so it's OK to acquire 586 * multiple pcb locks at a time. 587 */ 588 while (!LIST_EMPTY(&unp->unp_refs)) { 589 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 590 591 UNP_PCB_LOCK(ref); 592 unp_drop(ref, ECONNRESET); 593 UNP_PCB_UNLOCK(ref); 594 } 595 local_unp_rights = unp_rights; 596 UNP_GLOBAL_WUNLOCK(); 597 unp->unp_socket->so_pcb = NULL; 598 saved_unp_addr = unp->unp_addr; 599 unp->unp_addr = NULL; 600 unp->unp_refcount--; 601 freeunp = (unp->unp_refcount == 0); 602 if (saved_unp_addr != NULL) 603 free(saved_unp_addr, M_SONAME); 604 if (freeunp) { 605 UNP_PCB_LOCK_DESTROY(unp); 606 uma_zfree(unp_zone, unp); 607 } else 608 UNP_PCB_UNLOCK(unp); 609 if (vp) { 610 int vfslocked; 611 612 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 613 vrele(vp); 614 VFS_UNLOCK_GIANT(vfslocked); 615 } 616 if (local_unp_rights) 617 taskqueue_enqueue(taskqueue_thread, &unp_gc_task); 618 } 619 620 static int 621 uipc_disconnect(struct socket *so) 622 { 623 struct unpcb *unp, *unp2; 624 625 unp = sotounpcb(so); 626 KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); 627 628 UNP_GLOBAL_WLOCK(); 629 UNP_PCB_LOCK(unp); 630 unp2 = unp->unp_conn; 631 if (unp2 != NULL) { 632 UNP_PCB_LOCK(unp2); 633 unp_disconnect(unp, unp2); 634 UNP_PCB_UNLOCK(unp2); 635 } 636 UNP_PCB_UNLOCK(unp); 637 UNP_GLOBAL_WUNLOCK(); 638 return (0); 639 } 640 641 static int 642 uipc_listen(struct socket *so, int backlog, struct thread *td) 643 { 644 struct unpcb *unp; 645 int error; 646 647 unp = sotounpcb(so); 648 KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); 649 650 UNP_PCB_LOCK(unp); 651 if (unp->unp_vnode == NULL) { 652 UNP_PCB_UNLOCK(unp); 653 return (EINVAL); 654 } 655 656 SOCK_LOCK(so); 657 error = solisten_proto_check(so); 658 if (error == 0) { 659 cru2x(td->td_ucred, &unp->unp_peercred); 660 unp->unp_flags |= UNP_HAVEPCCACHED; 661 solisten_proto(so, backlog); 662 } 663 SOCK_UNLOCK(so); 664 UNP_PCB_UNLOCK(unp); 665 return (error); 666 } 667 668 static int 669 uipc_peeraddr(struct socket *so, struct sockaddr **nam) 670 { 671 struct unpcb *unp, *unp2; 672 const struct sockaddr *sa; 673 674 unp = sotounpcb(so); 675 KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); 676 677 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 678 UNP_PCB_LOCK(unp); 679 /* 680 * XXX: It seems that this test always fails even when connection is 681 * established. So, this else clause is added as workaround to 682 * return PF_LOCAL sockaddr. 683 */ 684 unp2 = unp->unp_conn; 685 if (unp2 != NULL) { 686 UNP_PCB_LOCK(unp2); 687 if (unp2->unp_addr != NULL) 688 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 689 else 690 sa = &sun_noname; 691 bcopy(sa, *nam, sa->sa_len); 692 UNP_PCB_UNLOCK(unp2); 693 } else { 694 sa = &sun_noname; 695 bcopy(sa, *nam, sa->sa_len); 696 } 697 UNP_PCB_UNLOCK(unp); 698 return (0); 699 } 700 701 static int 702 uipc_rcvd(struct socket *so, int flags) 703 { 704 struct unpcb *unp, *unp2; 705 struct socket *so2; 706 u_int mbcnt, sbcc; 707 u_long newhiwat; 708 709 unp = sotounpcb(so); 710 KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); 711 712 if (so->so_type == SOCK_DGRAM) 713 panic("uipc_rcvd DGRAM?"); 714 715 if (so->so_type != SOCK_STREAM) 716 panic("uipc_rcvd unknown socktype"); 717 718 /* 719 * Adjust backpressure on sender and wakeup any waiting to write. 720 * 721 * The unp lock is acquired to maintain the validity of the unp_conn 722 * pointer; no lock on unp2 is required as unp2->unp_socket will be 723 * static as long as we don't permit unp2 to disconnect from unp, 724 * which is prevented by the lock on unp. We cache values from 725 * so_rcv to avoid holding the so_rcv lock over the entire 726 * transaction on the remote so_snd. 727 */ 728 SOCKBUF_LOCK(&so->so_rcv); 729 mbcnt = so->so_rcv.sb_mbcnt; 730 sbcc = so->so_rcv.sb_cc; 731 SOCKBUF_UNLOCK(&so->so_rcv); 732 UNP_PCB_LOCK(unp); 733 unp2 = unp->unp_conn; 734 if (unp2 == NULL) { 735 UNP_PCB_UNLOCK(unp); 736 return (0); 737 } 738 so2 = unp2->unp_socket; 739 SOCKBUF_LOCK(&so2->so_snd); 740 so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt; 741 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc; 742 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 743 newhiwat, RLIM_INFINITY); 744 sowwakeup_locked(so2); 745 unp->unp_mbcnt = mbcnt; 746 unp->unp_cc = sbcc; 747 UNP_PCB_UNLOCK(unp); 748 return (0); 749 } 750 751 static int 752 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 753 struct mbuf *control, struct thread *td) 754 { 755 struct unpcb *unp, *unp2; 756 struct socket *so2; 757 u_int mbcnt_delta, sbcc; 758 u_long newhiwat; 759 int error = 0; 760 761 unp = sotounpcb(so); 762 KASSERT(unp != NULL, ("uipc_send: unp == NULL")); 763 764 if (flags & PRUS_OOB) { 765 error = EOPNOTSUPP; 766 goto release; 767 } 768 if (control != NULL && (error = unp_internalize(&control, td))) 769 goto release; 770 if ((nam != NULL) || (flags & PRUS_EOF)) 771 UNP_GLOBAL_WLOCK(); 772 else 773 UNP_GLOBAL_RLOCK(); 774 switch (so->so_type) { 775 case SOCK_DGRAM: 776 { 777 const struct sockaddr *from; 778 779 unp2 = unp->unp_conn; 780 if (nam != NULL) { 781 UNP_GLOBAL_WLOCK_ASSERT(); 782 if (unp2 != NULL) { 783 error = EISCONN; 784 break; 785 } 786 error = unp_connect(so, nam, td); 787 if (error) 788 break; 789 unp2 = unp->unp_conn; 790 } 791 792 /* 793 * Because connect() and send() are non-atomic in a sendto() 794 * with a target address, it's possible that the socket will 795 * have disconnected before the send() can run. In that case 796 * return the slightly counter-intuitive but otherwise 797 * correct error that the socket is not connected. 798 */ 799 if (unp2 == NULL) { 800 error = ENOTCONN; 801 break; 802 } 803 /* Lockless read. */ 804 if (unp2->unp_flags & UNP_WANTCRED) 805 control = unp_addsockcred(td, control); 806 UNP_PCB_LOCK(unp); 807 if (unp->unp_addr != NULL) 808 from = (struct sockaddr *)unp->unp_addr; 809 else 810 from = &sun_noname; 811 so2 = unp2->unp_socket; 812 SOCKBUF_LOCK(&so2->so_rcv); 813 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 814 sorwakeup_locked(so2); 815 m = NULL; 816 control = NULL; 817 } else { 818 SOCKBUF_UNLOCK(&so2->so_rcv); 819 error = ENOBUFS; 820 } 821 if (nam != NULL) { 822 UNP_GLOBAL_WLOCK_ASSERT(); 823 UNP_PCB_LOCK(unp2); 824 unp_disconnect(unp, unp2); 825 UNP_PCB_UNLOCK(unp2); 826 } 827 UNP_PCB_UNLOCK(unp); 828 break; 829 } 830 831 case SOCK_STREAM: 832 if ((so->so_state & SS_ISCONNECTED) == 0) { 833 if (nam != NULL) { 834 UNP_GLOBAL_WLOCK_ASSERT(); 835 error = unp_connect(so, nam, td); 836 if (error) 837 break; /* XXX */ 838 } else { 839 error = ENOTCONN; 840 break; 841 } 842 } 843 844 /* Lockless read. */ 845 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 846 error = EPIPE; 847 break; 848 } 849 850 /* 851 * Because connect() and send() are non-atomic in a sendto() 852 * with a target address, it's possible that the socket will 853 * have disconnected before the send() can run. In that case 854 * return the slightly counter-intuitive but otherwise 855 * correct error that the socket is not connected. 856 * 857 * Locking here must be done carefully: the global lock 858 * prevents interconnections between unpcbs from changing, so 859 * we can traverse from unp to unp2 without acquiring unp's 860 * lock. Socket buffer locks follow unpcb locks, so we can 861 * acquire both remote and lock socket buffer locks. 862 */ 863 unp2 = unp->unp_conn; 864 if (unp2 == NULL) { 865 error = ENOTCONN; 866 break; 867 } 868 so2 = unp2->unp_socket; 869 UNP_PCB_LOCK(unp2); 870 SOCKBUF_LOCK(&so2->so_rcv); 871 if (unp2->unp_flags & UNP_WANTCRED) { 872 /* 873 * Credentials are passed only once on SOCK_STREAM. 874 */ 875 unp2->unp_flags &= ~UNP_WANTCRED; 876 control = unp_addsockcred(td, control); 877 } 878 /* 879 * Send to paired receive port, and then reduce send buffer 880 * hiwater marks to maintain backpressure. Wake up readers. 881 */ 882 if (control != NULL) { 883 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 884 control = NULL; 885 } else 886 sbappend_locked(&so2->so_rcv, m); 887 mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt; 888 unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt; 889 sbcc = so2->so_rcv.sb_cc; 890 sorwakeup_locked(so2); 891 892 SOCKBUF_LOCK(&so->so_snd); 893 newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc); 894 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 895 newhiwat, RLIM_INFINITY); 896 so->so_snd.sb_mbmax -= mbcnt_delta; 897 SOCKBUF_UNLOCK(&so->so_snd); 898 unp2->unp_cc = sbcc; 899 UNP_PCB_UNLOCK(unp2); 900 m = NULL; 901 break; 902 903 default: 904 panic("uipc_send unknown socktype"); 905 } 906 907 /* 908 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown. 909 */ 910 if (flags & PRUS_EOF) { 911 UNP_PCB_LOCK(unp); 912 socantsendmore(so); 913 unp_shutdown(unp); 914 UNP_PCB_UNLOCK(unp); 915 } 916 917 if ((nam != NULL) || (flags & PRUS_EOF)) 918 UNP_GLOBAL_WUNLOCK(); 919 else 920 UNP_GLOBAL_RUNLOCK(); 921 922 if (control != NULL && error != 0) 923 unp_dispose(control); 924 925 release: 926 if (control != NULL) 927 m_freem(control); 928 if (m != NULL) 929 m_freem(m); 930 return (error); 931 } 932 933 static int 934 uipc_sense(struct socket *so, struct stat *sb) 935 { 936 struct unpcb *unp, *unp2; 937 struct socket *so2; 938 939 unp = sotounpcb(so); 940 KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); 941 942 sb->st_blksize = so->so_snd.sb_hiwat; 943 UNP_GLOBAL_RLOCK(); 944 UNP_PCB_LOCK(unp); 945 unp2 = unp->unp_conn; 946 if (so->so_type == SOCK_STREAM && unp2 != NULL) { 947 so2 = unp2->unp_socket; 948 sb->st_blksize += so2->so_rcv.sb_cc; 949 } 950 sb->st_dev = NODEV; 951 if (unp->unp_ino == 0) 952 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 953 sb->st_ino = unp->unp_ino; 954 UNP_PCB_UNLOCK(unp); 955 UNP_GLOBAL_RUNLOCK(); 956 return (0); 957 } 958 959 static int 960 uipc_shutdown(struct socket *so) 961 { 962 struct unpcb *unp; 963 964 unp = sotounpcb(so); 965 KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); 966 967 UNP_GLOBAL_WLOCK(); 968 UNP_PCB_LOCK(unp); 969 socantsendmore(so); 970 unp_shutdown(unp); 971 UNP_PCB_UNLOCK(unp); 972 UNP_GLOBAL_WUNLOCK(); 973 return (0); 974 } 975 976 static int 977 uipc_sockaddr(struct socket *so, struct sockaddr **nam) 978 { 979 struct unpcb *unp; 980 const struct sockaddr *sa; 981 982 unp = sotounpcb(so); 983 KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); 984 985 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 986 UNP_PCB_LOCK(unp); 987 if (unp->unp_addr != NULL) 988 sa = (struct sockaddr *) unp->unp_addr; 989 else 990 sa = &sun_noname; 991 bcopy(sa, *nam, sa->sa_len); 992 UNP_PCB_UNLOCK(unp); 993 return (0); 994 } 995 996 static struct pr_usrreqs uipc_usrreqs_dgram = { 997 .pru_abort = uipc_abort, 998 .pru_accept = uipc_accept, 999 .pru_attach = uipc_attach, 1000 .pru_bind = uipc_bind, 1001 .pru_connect = uipc_connect, 1002 .pru_connect2 = uipc_connect2, 1003 .pru_detach = uipc_detach, 1004 .pru_disconnect = uipc_disconnect, 1005 .pru_listen = uipc_listen, 1006 .pru_peeraddr = uipc_peeraddr, 1007 .pru_rcvd = uipc_rcvd, 1008 .pru_send = uipc_send, 1009 .pru_sense = uipc_sense, 1010 .pru_shutdown = uipc_shutdown, 1011 .pru_sockaddr = uipc_sockaddr, 1012 .pru_soreceive = soreceive_dgram, 1013 .pru_close = uipc_close, 1014 }; 1015 1016 static struct pr_usrreqs uipc_usrreqs_stream = { 1017 .pru_abort = uipc_abort, 1018 .pru_accept = uipc_accept, 1019 .pru_attach = uipc_attach, 1020 .pru_bind = uipc_bind, 1021 .pru_connect = uipc_connect, 1022 .pru_connect2 = uipc_connect2, 1023 .pru_detach = uipc_detach, 1024 .pru_disconnect = uipc_disconnect, 1025 .pru_listen = uipc_listen, 1026 .pru_peeraddr = uipc_peeraddr, 1027 .pru_rcvd = uipc_rcvd, 1028 .pru_send = uipc_send, 1029 .pru_sense = uipc_sense, 1030 .pru_shutdown = uipc_shutdown, 1031 .pru_sockaddr = uipc_sockaddr, 1032 .pru_soreceive = soreceive_generic, 1033 .pru_close = uipc_close, 1034 }; 1035 1036 static int 1037 uipc_ctloutput(struct socket *so, struct sockopt *sopt) 1038 { 1039 struct unpcb *unp; 1040 struct xucred xu; 1041 int error, optval; 1042 1043 if (sopt->sopt_level != 0) 1044 return (EINVAL); 1045 1046 unp = sotounpcb(so); 1047 KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); 1048 error = 0; 1049 switch (sopt->sopt_dir) { 1050 case SOPT_GET: 1051 switch (sopt->sopt_name) { 1052 case LOCAL_PEERCRED: 1053 UNP_PCB_LOCK(unp); 1054 if (unp->unp_flags & UNP_HAVEPC) 1055 xu = unp->unp_peercred; 1056 else { 1057 if (so->so_type == SOCK_STREAM) 1058 error = ENOTCONN; 1059 else 1060 error = EINVAL; 1061 } 1062 UNP_PCB_UNLOCK(unp); 1063 if (error == 0) 1064 error = sooptcopyout(sopt, &xu, sizeof(xu)); 1065 break; 1066 1067 case LOCAL_CREDS: 1068 /* Unlocked read. */ 1069 optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; 1070 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1071 break; 1072 1073 case LOCAL_CONNWAIT: 1074 /* Unlocked read. */ 1075 optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; 1076 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1077 break; 1078 1079 default: 1080 error = EOPNOTSUPP; 1081 break; 1082 } 1083 break; 1084 1085 case SOPT_SET: 1086 switch (sopt->sopt_name) { 1087 case LOCAL_CREDS: 1088 case LOCAL_CONNWAIT: 1089 error = sooptcopyin(sopt, &optval, sizeof(optval), 1090 sizeof(optval)); 1091 if (error) 1092 break; 1093 1094 #define OPTSET(bit) do { \ 1095 UNP_PCB_LOCK(unp); \ 1096 if (optval) \ 1097 unp->unp_flags |= bit; \ 1098 else \ 1099 unp->unp_flags &= ~bit; \ 1100 UNP_PCB_UNLOCK(unp); \ 1101 } while (0) 1102 1103 switch (sopt->sopt_name) { 1104 case LOCAL_CREDS: 1105 OPTSET(UNP_WANTCRED); 1106 break; 1107 1108 case LOCAL_CONNWAIT: 1109 OPTSET(UNP_CONNWAIT); 1110 break; 1111 1112 default: 1113 break; 1114 } 1115 break; 1116 #undef OPTSET 1117 default: 1118 error = ENOPROTOOPT; 1119 break; 1120 } 1121 break; 1122 1123 default: 1124 error = EOPNOTSUPP; 1125 break; 1126 } 1127 return (error); 1128 } 1129 1130 static int 1131 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1132 { 1133 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 1134 struct vnode *vp; 1135 struct socket *so2, *so3; 1136 struct unpcb *unp, *unp2, *unp3; 1137 int error, len, vfslocked; 1138 struct nameidata nd; 1139 char buf[SOCK_MAXADDRLEN]; 1140 struct sockaddr *sa; 1141 1142 UNP_GLOBAL_WLOCK_ASSERT(); 1143 1144 unp = sotounpcb(so); 1145 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1146 1147 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 1148 if (len <= 0) 1149 return (EINVAL); 1150 bcopy(soun->sun_path, buf, len); 1151 buf[len] = 0; 1152 1153 UNP_PCB_LOCK(unp); 1154 if (unp->unp_flags & UNP_CONNECTING) { 1155 UNP_PCB_UNLOCK(unp); 1156 return (EALREADY); 1157 } 1158 UNP_GLOBAL_WUNLOCK(); 1159 unp->unp_flags |= UNP_CONNECTING; 1160 UNP_PCB_UNLOCK(unp); 1161 1162 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 1163 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, 1164 td); 1165 error = namei(&nd); 1166 if (error) 1167 vp = NULL; 1168 else 1169 vp = nd.ni_vp; 1170 ASSERT_VOP_LOCKED(vp, "unp_connect"); 1171 vfslocked = NDHASGIANT(&nd); 1172 NDFREE(&nd, NDF_ONLY_PNBUF); 1173 if (error) 1174 goto bad; 1175 1176 if (vp->v_type != VSOCK) { 1177 error = ENOTSOCK; 1178 goto bad; 1179 } 1180 #ifdef MAC 1181 error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD); 1182 if (error) 1183 goto bad; 1184 #endif 1185 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 1186 if (error) 1187 goto bad; 1188 VFS_UNLOCK_GIANT(vfslocked); 1189 1190 unp = sotounpcb(so); 1191 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1192 1193 /* 1194 * Lock global lock for two reasons: make sure v_socket is stable, 1195 * and to protect simultaneous locking of multiple pcbs. 1196 */ 1197 UNP_GLOBAL_WLOCK(); 1198 so2 = vp->v_socket; 1199 if (so2 == NULL) { 1200 error = ECONNREFUSED; 1201 goto bad2; 1202 } 1203 if (so->so_type != so2->so_type) { 1204 error = EPROTOTYPE; 1205 goto bad2; 1206 } 1207 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1208 if (so2->so_options & SO_ACCEPTCONN) { 1209 /* 1210 * We can't drop the global lock here or 'so2' may 1211 * become invalid. As a result, we need to handle 1212 * possibly lock recursion in uipc_attach. 1213 */ 1214 so3 = sonewconn(so2, 0); 1215 } else 1216 so3 = NULL; 1217 if (so3 == NULL) { 1218 error = ECONNREFUSED; 1219 goto bad2; 1220 } 1221 unp = sotounpcb(so); 1222 unp2 = sotounpcb(so2); 1223 unp3 = sotounpcb(so3); 1224 UNP_PCB_LOCK(unp); 1225 UNP_PCB_LOCK(unp2); 1226 UNP_PCB_LOCK(unp3); 1227 if (unp2->unp_addr != NULL) { 1228 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 1229 unp3->unp_addr = (struct sockaddr_un *) sa; 1230 sa = NULL; 1231 } 1232 1233 /* 1234 * The connecter's (client's) credentials are copied from its 1235 * process structure at the time of connect() (which is now). 1236 */ 1237 cru2x(td->td_ucred, &unp3->unp_peercred); 1238 unp3->unp_flags |= UNP_HAVEPC; 1239 1240 /* 1241 * The receiver's (server's) credentials are copied from the 1242 * unp_peercred member of socket on which the former called 1243 * listen(); uipc_listen() cached that process's credentials 1244 * at that time so we can use them now. 1245 */ 1246 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1247 ("unp_connect: listener without cached peercred")); 1248 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1249 sizeof(unp->unp_peercred)); 1250 unp->unp_flags |= UNP_HAVEPC; 1251 if (unp2->unp_flags & UNP_WANTCRED) 1252 unp3->unp_flags |= UNP_WANTCRED; 1253 UNP_PCB_UNLOCK(unp3); 1254 UNP_PCB_UNLOCK(unp2); 1255 UNP_PCB_UNLOCK(unp); 1256 #ifdef MAC 1257 SOCK_LOCK(so); 1258 mac_socketpeer_set_from_socket(so, so3); 1259 mac_socketpeer_set_from_socket(so3, so); 1260 SOCK_UNLOCK(so); 1261 #endif 1262 1263 so2 = so3; 1264 } 1265 unp = sotounpcb(so); 1266 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1267 unp2 = sotounpcb(so2); 1268 KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); 1269 UNP_PCB_LOCK(unp); 1270 UNP_PCB_LOCK(unp2); 1271 error = unp_connect2(so, so2, PRU_CONNECT); 1272 UNP_PCB_UNLOCK(unp2); 1273 UNP_PCB_UNLOCK(unp); 1274 bad2: 1275 UNP_GLOBAL_WUNLOCK(); 1276 if (vfslocked) 1277 /* 1278 * Giant has been previously acquired. This means filesystem 1279 * isn't MPSAFE. Do it once again. 1280 */ 1281 mtx_lock(&Giant); 1282 bad: 1283 if (vp != NULL) 1284 vput(vp); 1285 VFS_UNLOCK_GIANT(vfslocked); 1286 free(sa, M_SONAME); 1287 UNP_GLOBAL_WLOCK(); 1288 UNP_PCB_LOCK(unp); 1289 unp->unp_flags &= ~UNP_CONNECTING; 1290 UNP_PCB_UNLOCK(unp); 1291 return (error); 1292 } 1293 1294 static int 1295 unp_connect2(struct socket *so, struct socket *so2, int req) 1296 { 1297 struct unpcb *unp; 1298 struct unpcb *unp2; 1299 1300 unp = sotounpcb(so); 1301 KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); 1302 unp2 = sotounpcb(so2); 1303 KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); 1304 1305 UNP_GLOBAL_WLOCK_ASSERT(); 1306 UNP_PCB_LOCK_ASSERT(unp); 1307 UNP_PCB_LOCK_ASSERT(unp2); 1308 1309 if (so2->so_type != so->so_type) 1310 return (EPROTOTYPE); 1311 unp->unp_conn = unp2; 1312 1313 switch (so->so_type) { 1314 case SOCK_DGRAM: 1315 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1316 soisconnected(so); 1317 break; 1318 1319 case SOCK_STREAM: 1320 unp2->unp_conn = unp; 1321 if (req == PRU_CONNECT && 1322 ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) 1323 soisconnecting(so); 1324 else 1325 soisconnected(so); 1326 soisconnected(so2); 1327 break; 1328 1329 default: 1330 panic("unp_connect2"); 1331 } 1332 return (0); 1333 } 1334 1335 static void 1336 unp_disconnect(struct unpcb *unp, struct unpcb *unp2) 1337 { 1338 struct socket *so; 1339 1340 KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); 1341 1342 UNP_GLOBAL_WLOCK_ASSERT(); 1343 UNP_PCB_LOCK_ASSERT(unp); 1344 UNP_PCB_LOCK_ASSERT(unp2); 1345 1346 unp->unp_conn = NULL; 1347 switch (unp->unp_socket->so_type) { 1348 case SOCK_DGRAM: 1349 LIST_REMOVE(unp, unp_reflink); 1350 so = unp->unp_socket; 1351 SOCK_LOCK(so); 1352 so->so_state &= ~SS_ISCONNECTED; 1353 SOCK_UNLOCK(so); 1354 break; 1355 1356 case SOCK_STREAM: 1357 soisdisconnected(unp->unp_socket); 1358 unp2->unp_conn = NULL; 1359 soisdisconnected(unp2->unp_socket); 1360 break; 1361 } 1362 } 1363 1364 /* 1365 * unp_pcblist() walks the global list of struct unpcb's to generate a 1366 * pointer list, bumping the refcount on each unpcb. It then copies them out 1367 * sequentially, validating the generation number on each to see if it has 1368 * been detached. All of this is necessary because copyout() may sleep on 1369 * disk I/O. 1370 */ 1371 static int 1372 unp_pcblist(SYSCTL_HANDLER_ARGS) 1373 { 1374 int error, i, n; 1375 int freeunp; 1376 struct unpcb *unp, **unp_list; 1377 unp_gen_t gencnt; 1378 struct xunpgen *xug; 1379 struct unp_head *head; 1380 struct xunpcb *xu; 1381 1382 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1383 1384 /* 1385 * The process of preparing the PCB list is too time-consuming and 1386 * resource-intensive to repeat twice on every request. 1387 */ 1388 if (req->oldptr == NULL) { 1389 n = unp_count; 1390 req->oldidx = 2 * (sizeof *xug) 1391 + (n + n/8) * sizeof(struct xunpcb); 1392 return (0); 1393 } 1394 1395 if (req->newptr != NULL) 1396 return (EPERM); 1397 1398 /* 1399 * OK, now we're committed to doing something. 1400 */ 1401 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 1402 UNP_GLOBAL_RLOCK(); 1403 gencnt = unp_gencnt; 1404 n = unp_count; 1405 UNP_GLOBAL_RUNLOCK(); 1406 1407 xug->xug_len = sizeof *xug; 1408 xug->xug_count = n; 1409 xug->xug_gen = gencnt; 1410 xug->xug_sogen = so_gencnt; 1411 error = SYSCTL_OUT(req, xug, sizeof *xug); 1412 if (error) { 1413 free(xug, M_TEMP); 1414 return (error); 1415 } 1416 1417 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1418 1419 UNP_GLOBAL_RLOCK(); 1420 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1421 unp = LIST_NEXT(unp, unp_link)) { 1422 UNP_PCB_LOCK(unp); 1423 if (unp->unp_gencnt <= gencnt) { 1424 if (cr_cansee(req->td->td_ucred, 1425 unp->unp_socket->so_cred)) { 1426 UNP_PCB_UNLOCK(unp); 1427 continue; 1428 } 1429 unp_list[i++] = unp; 1430 unp->unp_refcount++; 1431 } 1432 UNP_PCB_UNLOCK(unp); 1433 } 1434 UNP_GLOBAL_RUNLOCK(); 1435 n = i; /* In case we lost some during malloc. */ 1436 1437 error = 0; 1438 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); 1439 for (i = 0; i < n; i++) { 1440 unp = unp_list[i]; 1441 UNP_PCB_LOCK(unp); 1442 unp->unp_refcount--; 1443 if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) { 1444 xu->xu_len = sizeof *xu; 1445 xu->xu_unpp = unp; 1446 /* 1447 * XXX - need more locking here to protect against 1448 * connect/disconnect races for SMP. 1449 */ 1450 if (unp->unp_addr != NULL) 1451 bcopy(unp->unp_addr, &xu->xu_addr, 1452 unp->unp_addr->sun_len); 1453 if (unp->unp_conn != NULL && 1454 unp->unp_conn->unp_addr != NULL) 1455 bcopy(unp->unp_conn->unp_addr, 1456 &xu->xu_caddr, 1457 unp->unp_conn->unp_addr->sun_len); 1458 bcopy(unp, &xu->xu_unp, sizeof *unp); 1459 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1460 UNP_PCB_UNLOCK(unp); 1461 error = SYSCTL_OUT(req, xu, sizeof *xu); 1462 } else { 1463 freeunp = (unp->unp_refcount == 0); 1464 UNP_PCB_UNLOCK(unp); 1465 if (freeunp) { 1466 UNP_PCB_LOCK_DESTROY(unp); 1467 uma_zfree(unp_zone, unp); 1468 } 1469 } 1470 } 1471 free(xu, M_TEMP); 1472 if (!error) { 1473 /* 1474 * Give the user an updated idea of our state. If the 1475 * generation differs from what we told her before, she knows 1476 * that something happened while we were processing this 1477 * request, and it might be necessary to retry. 1478 */ 1479 xug->xug_gen = unp_gencnt; 1480 xug->xug_sogen = so_gencnt; 1481 xug->xug_count = unp_count; 1482 error = SYSCTL_OUT(req, xug, sizeof *xug); 1483 } 1484 free(unp_list, M_TEMP); 1485 free(xug, M_TEMP); 1486 return (error); 1487 } 1488 1489 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1490 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1491 "List of active local datagram sockets"); 1492 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1493 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1494 "List of active local stream sockets"); 1495 1496 static void 1497 unp_shutdown(struct unpcb *unp) 1498 { 1499 struct unpcb *unp2; 1500 struct socket *so; 1501 1502 UNP_GLOBAL_WLOCK_ASSERT(); 1503 UNP_PCB_LOCK_ASSERT(unp); 1504 1505 unp2 = unp->unp_conn; 1506 if (unp->unp_socket->so_type == SOCK_STREAM && unp2 != NULL) { 1507 so = unp2->unp_socket; 1508 if (so != NULL) 1509 socantrcvmore(so); 1510 } 1511 } 1512 1513 static void 1514 unp_drop(struct unpcb *unp, int errno) 1515 { 1516 struct socket *so = unp->unp_socket; 1517 struct unpcb *unp2; 1518 1519 UNP_GLOBAL_WLOCK_ASSERT(); 1520 UNP_PCB_LOCK_ASSERT(unp); 1521 1522 so->so_error = errno; 1523 unp2 = unp->unp_conn; 1524 if (unp2 == NULL) 1525 return; 1526 UNP_PCB_LOCK(unp2); 1527 unp_disconnect(unp, unp2); 1528 UNP_PCB_UNLOCK(unp2); 1529 } 1530 1531 static void 1532 unp_freerights(struct file **rp, int fdcount) 1533 { 1534 int i; 1535 struct file *fp; 1536 1537 for (i = 0; i < fdcount; i++) { 1538 fp = *rp; 1539 *rp++ = NULL; 1540 unp_discard(fp); 1541 } 1542 } 1543 1544 static int 1545 unp_externalize(struct mbuf *control, struct mbuf **controlp) 1546 { 1547 struct thread *td = curthread; /* XXX */ 1548 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1549 int i; 1550 int *fdp; 1551 struct file **rp; 1552 struct file *fp; 1553 void *data; 1554 socklen_t clen = control->m_len, datalen; 1555 int error, newfds; 1556 int f; 1557 u_int newlen; 1558 1559 UNP_GLOBAL_UNLOCK_ASSERT(); 1560 1561 error = 0; 1562 if (controlp != NULL) /* controlp == NULL => free control messages */ 1563 *controlp = NULL; 1564 while (cm != NULL) { 1565 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1566 error = EINVAL; 1567 break; 1568 } 1569 data = CMSG_DATA(cm); 1570 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1571 if (cm->cmsg_level == SOL_SOCKET 1572 && cm->cmsg_type == SCM_RIGHTS) { 1573 newfds = datalen / sizeof(struct file *); 1574 rp = data; 1575 1576 /* If we're not outputting the descriptors free them. */ 1577 if (error || controlp == NULL) { 1578 unp_freerights(rp, newfds); 1579 goto next; 1580 } 1581 FILEDESC_XLOCK(td->td_proc->p_fd); 1582 /* if the new FD's will not fit free them. */ 1583 if (!fdavail(td, newfds)) { 1584 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1585 error = EMSGSIZE; 1586 unp_freerights(rp, newfds); 1587 goto next; 1588 } 1589 1590 /* 1591 * Now change each pointer to an fd in the global 1592 * table to an integer that is the index to the local 1593 * fd table entry that we set up to point to the 1594 * global one we are transferring. 1595 */ 1596 newlen = newfds * sizeof(int); 1597 *controlp = sbcreatecontrol(NULL, newlen, 1598 SCM_RIGHTS, SOL_SOCKET); 1599 if (*controlp == NULL) { 1600 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1601 error = E2BIG; 1602 unp_freerights(rp, newfds); 1603 goto next; 1604 } 1605 1606 fdp = (int *) 1607 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1608 for (i = 0; i < newfds; i++) { 1609 if (fdalloc(td, 0, &f)) 1610 panic("unp_externalize fdalloc failed"); 1611 fp = *rp++; 1612 td->td_proc->p_fd->fd_ofiles[f] = fp; 1613 unp_externalize_fp(fp); 1614 *fdp++ = f; 1615 } 1616 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1617 } else { 1618 /* We can just copy anything else across. */ 1619 if (error || controlp == NULL) 1620 goto next; 1621 *controlp = sbcreatecontrol(NULL, datalen, 1622 cm->cmsg_type, cm->cmsg_level); 1623 if (*controlp == NULL) { 1624 error = ENOBUFS; 1625 goto next; 1626 } 1627 bcopy(data, 1628 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1629 datalen); 1630 } 1631 controlp = &(*controlp)->m_next; 1632 1633 next: 1634 if (CMSG_SPACE(datalen) < clen) { 1635 clen -= CMSG_SPACE(datalen); 1636 cm = (struct cmsghdr *) 1637 ((caddr_t)cm + CMSG_SPACE(datalen)); 1638 } else { 1639 clen = 0; 1640 cm = NULL; 1641 } 1642 } 1643 1644 m_freem(control); 1645 return (error); 1646 } 1647 1648 static void 1649 unp_zone_change(void *tag) 1650 { 1651 1652 uma_zone_set_max(unp_zone, maxsockets); 1653 } 1654 1655 static void 1656 unp_init(void) 1657 { 1658 1659 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1660 NULL, NULL, UMA_ALIGN_PTR, 0); 1661 if (unp_zone == NULL) 1662 panic("unp_init"); 1663 uma_zone_set_max(unp_zone, maxsockets); 1664 EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, 1665 NULL, EVENTHANDLER_PRI_ANY); 1666 LIST_INIT(&unp_dhead); 1667 LIST_INIT(&unp_shead); 1668 TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); 1669 UNP_GLOBAL_LOCK_INIT(); 1670 } 1671 1672 static int 1673 unp_internalize(struct mbuf **controlp, struct thread *td) 1674 { 1675 struct mbuf *control = *controlp; 1676 struct proc *p = td->td_proc; 1677 struct filedesc *fdescp = p->p_fd; 1678 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1679 struct cmsgcred *cmcred; 1680 struct file **rp; 1681 struct file *fp; 1682 struct timeval *tv; 1683 int i, fd, *fdp; 1684 void *data; 1685 socklen_t clen = control->m_len, datalen; 1686 int error, oldfds; 1687 u_int newlen; 1688 1689 UNP_GLOBAL_UNLOCK_ASSERT(); 1690 1691 error = 0; 1692 *controlp = NULL; 1693 while (cm != NULL) { 1694 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1695 || cm->cmsg_len > clen) { 1696 error = EINVAL; 1697 goto out; 1698 } 1699 data = CMSG_DATA(cm); 1700 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1701 1702 switch (cm->cmsg_type) { 1703 /* 1704 * Fill in credential information. 1705 */ 1706 case SCM_CREDS: 1707 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1708 SCM_CREDS, SOL_SOCKET); 1709 if (*controlp == NULL) { 1710 error = ENOBUFS; 1711 goto out; 1712 } 1713 cmcred = (struct cmsgcred *) 1714 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1715 cmcred->cmcred_pid = p->p_pid; 1716 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1717 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1718 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1719 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1720 CMGROUP_MAX); 1721 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1722 cmcred->cmcred_groups[i] = 1723 td->td_ucred->cr_groups[i]; 1724 break; 1725 1726 case SCM_RIGHTS: 1727 oldfds = datalen / sizeof (int); 1728 /* 1729 * Check that all the FDs passed in refer to legal 1730 * files. If not, reject the entire operation. 1731 */ 1732 fdp = data; 1733 FILEDESC_SLOCK(fdescp); 1734 for (i = 0; i < oldfds; i++) { 1735 fd = *fdp++; 1736 if ((unsigned)fd >= fdescp->fd_nfiles || 1737 fdescp->fd_ofiles[fd] == NULL) { 1738 FILEDESC_SUNLOCK(fdescp); 1739 error = EBADF; 1740 goto out; 1741 } 1742 fp = fdescp->fd_ofiles[fd]; 1743 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1744 FILEDESC_SUNLOCK(fdescp); 1745 error = EOPNOTSUPP; 1746 goto out; 1747 } 1748 1749 } 1750 1751 /* 1752 * Now replace the integer FDs with pointers to the 1753 * associated global file table entry.. 1754 */ 1755 newlen = oldfds * sizeof(struct file *); 1756 *controlp = sbcreatecontrol(NULL, newlen, 1757 SCM_RIGHTS, SOL_SOCKET); 1758 if (*controlp == NULL) { 1759 FILEDESC_SUNLOCK(fdescp); 1760 error = E2BIG; 1761 goto out; 1762 } 1763 fdp = data; 1764 rp = (struct file **) 1765 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1766 for (i = 0; i < oldfds; i++) { 1767 fp = fdescp->fd_ofiles[*fdp++]; 1768 *rp++ = fp; 1769 unp_internalize_fp(fp); 1770 } 1771 FILEDESC_SUNLOCK(fdescp); 1772 break; 1773 1774 case SCM_TIMESTAMP: 1775 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1776 SCM_TIMESTAMP, SOL_SOCKET); 1777 if (*controlp == NULL) { 1778 error = ENOBUFS; 1779 goto out; 1780 } 1781 tv = (struct timeval *) 1782 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1783 microtime(tv); 1784 break; 1785 1786 default: 1787 error = EINVAL; 1788 goto out; 1789 } 1790 1791 controlp = &(*controlp)->m_next; 1792 if (CMSG_SPACE(datalen) < clen) { 1793 clen -= CMSG_SPACE(datalen); 1794 cm = (struct cmsghdr *) 1795 ((caddr_t)cm + CMSG_SPACE(datalen)); 1796 } else { 1797 clen = 0; 1798 cm = NULL; 1799 } 1800 } 1801 1802 out: 1803 m_freem(control); 1804 return (error); 1805 } 1806 1807 static struct mbuf * 1808 unp_addsockcred(struct thread *td, struct mbuf *control) 1809 { 1810 struct mbuf *m, *n, *n_prev; 1811 struct sockcred *sc; 1812 const struct cmsghdr *cm; 1813 int ngroups; 1814 int i; 1815 1816 ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); 1817 m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); 1818 if (m == NULL) 1819 return (control); 1820 1821 sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); 1822 sc->sc_uid = td->td_ucred->cr_ruid; 1823 sc->sc_euid = td->td_ucred->cr_uid; 1824 sc->sc_gid = td->td_ucred->cr_rgid; 1825 sc->sc_egid = td->td_ucred->cr_gid; 1826 sc->sc_ngroups = ngroups; 1827 for (i = 0; i < sc->sc_ngroups; i++) 1828 sc->sc_groups[i] = td->td_ucred->cr_groups[i]; 1829 1830 /* 1831 * Unlink SCM_CREDS control messages (struct cmsgcred), since just 1832 * created SCM_CREDS control message (struct sockcred) has another 1833 * format. 1834 */ 1835 if (control != NULL) 1836 for (n = control, n_prev = NULL; n != NULL;) { 1837 cm = mtod(n, struct cmsghdr *); 1838 if (cm->cmsg_level == SOL_SOCKET && 1839 cm->cmsg_type == SCM_CREDS) { 1840 if (n_prev == NULL) 1841 control = n->m_next; 1842 else 1843 n_prev->m_next = n->m_next; 1844 n = m_free(n); 1845 } else { 1846 n_prev = n; 1847 n = n->m_next; 1848 } 1849 } 1850 1851 /* Prepend it to the head. */ 1852 m->m_next = control; 1853 return (m); 1854 } 1855 1856 static struct unpcb * 1857 fptounp(struct file *fp) 1858 { 1859 struct socket *so; 1860 1861 if (fp->f_type != DTYPE_SOCKET) 1862 return (NULL); 1863 if ((so = fp->f_data) == NULL) 1864 return (NULL); 1865 if (so->so_proto->pr_domain != &localdomain) 1866 return (NULL); 1867 return sotounpcb(so); 1868 } 1869 1870 static void 1871 unp_discard(struct file *fp) 1872 { 1873 1874 unp_externalize_fp(fp); 1875 (void) closef(fp, (struct thread *)NULL); 1876 } 1877 1878 static void 1879 unp_internalize_fp(struct file *fp) 1880 { 1881 struct unpcb *unp; 1882 1883 UNP_GLOBAL_WLOCK(); 1884 if ((unp = fptounp(fp)) != NULL) { 1885 unp->unp_file = fp; 1886 unp->unp_msgcount++; 1887 } 1888 fhold(fp); 1889 unp_rights++; 1890 UNP_GLOBAL_WUNLOCK(); 1891 } 1892 1893 static void 1894 unp_externalize_fp(struct file *fp) 1895 { 1896 struct unpcb *unp; 1897 1898 UNP_GLOBAL_WLOCK(); 1899 if ((unp = fptounp(fp)) != NULL) 1900 unp->unp_msgcount--; 1901 unp_rights--; 1902 UNP_GLOBAL_WUNLOCK(); 1903 } 1904 1905 /* 1906 * unp_defer indicates whether additional work has been defered for a future 1907 * pass through unp_gc(). It is thread local and does not require explicit 1908 * synchronization. 1909 */ 1910 static int unp_marked; 1911 static int unp_unreachable; 1912 1913 static void 1914 unp_accessable(struct file *fp) 1915 { 1916 struct unpcb *unp; 1917 1918 if ((unp = fptounp(fp)) == NULL) 1919 return; 1920 if (unp->unp_gcflag & UNPGC_REF) 1921 return; 1922 unp->unp_gcflag &= ~UNPGC_DEAD; 1923 unp->unp_gcflag |= UNPGC_REF; 1924 unp_marked++; 1925 } 1926 1927 static void 1928 unp_gc_process(struct unpcb *unp) 1929 { 1930 struct socket *soa; 1931 struct socket *so; 1932 struct file *fp; 1933 1934 /* Already processed. */ 1935 if (unp->unp_gcflag & UNPGC_SCANNED) 1936 return; 1937 fp = unp->unp_file; 1938 1939 /* 1940 * Check for a socket potentially in a cycle. It must be in a 1941 * queue as indicated by msgcount, and this must equal the file 1942 * reference count. Note that when msgcount is 0 the file is NULL. 1943 */ 1944 if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp && 1945 unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) { 1946 unp->unp_gcflag |= UNPGC_DEAD; 1947 unp_unreachable++; 1948 return; 1949 } 1950 1951 /* 1952 * Mark all sockets we reference with RIGHTS. 1953 */ 1954 so = unp->unp_socket; 1955 SOCKBUF_LOCK(&so->so_rcv); 1956 unp_scan(so->so_rcv.sb_mb, unp_accessable); 1957 SOCKBUF_UNLOCK(&so->so_rcv); 1958 1959 /* 1960 * Mark all sockets in our accept queue. 1961 */ 1962 ACCEPT_LOCK(); 1963 TAILQ_FOREACH(soa, &so->so_comp, so_list) { 1964 SOCKBUF_LOCK(&soa->so_rcv); 1965 unp_scan(soa->so_rcv.sb_mb, unp_accessable); 1966 SOCKBUF_UNLOCK(&soa->so_rcv); 1967 } 1968 ACCEPT_UNLOCK(); 1969 unp->unp_gcflag |= UNPGC_SCANNED; 1970 } 1971 1972 static int unp_recycled; 1973 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 1974 "Number of unreachable sockets claimed by the garbage collector."); 1975 1976 static int unp_taskcount; 1977 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 1978 "Number of times the garbage collector has run."); 1979 1980 static void 1981 unp_gc(__unused void *arg, int pending) 1982 { 1983 struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL }; 1984 struct unp_head **head; 1985 struct file **unref; 1986 struct unpcb *unp; 1987 int i; 1988 1989 unp_taskcount++; 1990 UNP_GLOBAL_RLOCK(); 1991 /* 1992 * First clear all gc flags from previous runs. 1993 */ 1994 for (head = heads; *head != NULL; head++) 1995 LIST_FOREACH(unp, *head, unp_link) 1996 unp->unp_gcflag = 0; 1997 1998 /* 1999 * Scan marking all reachable sockets with UNPGC_REF. Once a socket 2000 * is reachable all of the sockets it references are reachable. 2001 * Stop the scan once we do a complete loop without discovering 2002 * a new reachable socket. 2003 */ 2004 do { 2005 unp_unreachable = 0; 2006 unp_marked = 0; 2007 for (head = heads; *head != NULL; head++) 2008 LIST_FOREACH(unp, *head, unp_link) 2009 unp_gc_process(unp); 2010 } while (unp_marked); 2011 UNP_GLOBAL_RUNLOCK(); 2012 if (unp_unreachable == 0) 2013 return; 2014 2015 /* 2016 * Allocate space for a local list of dead unpcbs. 2017 */ 2018 unref = malloc(unp_unreachable * sizeof(struct file *), 2019 M_TEMP, M_WAITOK); 2020 2021 /* 2022 * Iterate looking for sockets which have been specifically marked 2023 * as as unreachable and store them locally. 2024 */ 2025 UNP_GLOBAL_RLOCK(); 2026 for (i = 0, head = heads; *head != NULL; head++) 2027 LIST_FOREACH(unp, *head, unp_link) 2028 if (unp->unp_gcflag & UNPGC_DEAD) { 2029 unref[i++] = unp->unp_file; 2030 fhold(unp->unp_file); 2031 KASSERT(unp->unp_file != NULL, 2032 ("unp_gc: Invalid unpcb.")); 2033 KASSERT(i <= unp_unreachable, 2034 ("unp_gc: incorrect unreachable count.")); 2035 } 2036 UNP_GLOBAL_RUNLOCK(); 2037 2038 /* 2039 * Now flush all sockets, free'ing rights. This will free the 2040 * struct files associated with these sockets but leave each socket 2041 * with one remaining ref. 2042 */ 2043 for (i = 0; i < unp_unreachable; i++) 2044 sorflush(unref[i]->f_data); 2045 2046 /* 2047 * And finally release the sockets so they can be reclaimed. 2048 */ 2049 for (i = 0; i < unp_unreachable; i++) 2050 fdrop(unref[i], NULL); 2051 unp_recycled += unp_unreachable; 2052 free(unref, M_TEMP); 2053 } 2054 2055 static void 2056 unp_dispose(struct mbuf *m) 2057 { 2058 2059 if (m) 2060 unp_scan(m, unp_discard); 2061 } 2062 2063 static void 2064 unp_scan(struct mbuf *m0, void (*op)(struct file *)) 2065 { 2066 struct mbuf *m; 2067 struct file **rp; 2068 struct cmsghdr *cm; 2069 void *data; 2070 int i; 2071 socklen_t clen, datalen; 2072 int qfds; 2073 2074 while (m0 != NULL) { 2075 for (m = m0; m; m = m->m_next) { 2076 if (m->m_type != MT_CONTROL) 2077 continue; 2078 2079 cm = mtod(m, struct cmsghdr *); 2080 clen = m->m_len; 2081 2082 while (cm != NULL) { 2083 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 2084 break; 2085 2086 data = CMSG_DATA(cm); 2087 datalen = (caddr_t)cm + cm->cmsg_len 2088 - (caddr_t)data; 2089 2090 if (cm->cmsg_level == SOL_SOCKET && 2091 cm->cmsg_type == SCM_RIGHTS) { 2092 qfds = datalen / sizeof (struct file *); 2093 rp = data; 2094 for (i = 0; i < qfds; i++) 2095 (*op)(*rp++); 2096 } 2097 2098 if (CMSG_SPACE(datalen) < clen) { 2099 clen -= CMSG_SPACE(datalen); 2100 cm = (struct cmsghdr *) 2101 ((caddr_t)cm + CMSG_SPACE(datalen)); 2102 } else { 2103 clen = 0; 2104 cm = NULL; 2105 } 2106 } 2107 } 2108 m0 = m0->m_act; 2109 } 2110 } 2111 2112 #ifdef DDB 2113 static void 2114 db_print_indent(int indent) 2115 { 2116 int i; 2117 2118 for (i = 0; i < indent; i++) 2119 db_printf(" "); 2120 } 2121 2122 static void 2123 db_print_unpflags(int unp_flags) 2124 { 2125 int comma; 2126 2127 comma = 0; 2128 if (unp_flags & UNP_HAVEPC) { 2129 db_printf("%sUNP_HAVEPC", comma ? ", " : ""); 2130 comma = 1; 2131 } 2132 if (unp_flags & UNP_HAVEPCCACHED) { 2133 db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : ""); 2134 comma = 1; 2135 } 2136 if (unp_flags & UNP_WANTCRED) { 2137 db_printf("%sUNP_WANTCRED", comma ? ", " : ""); 2138 comma = 1; 2139 } 2140 if (unp_flags & UNP_CONNWAIT) { 2141 db_printf("%sUNP_CONNWAIT", comma ? ", " : ""); 2142 comma = 1; 2143 } 2144 if (unp_flags & UNP_CONNECTING) { 2145 db_printf("%sUNP_CONNECTING", comma ? ", " : ""); 2146 comma = 1; 2147 } 2148 if (unp_flags & UNP_BINDING) { 2149 db_printf("%sUNP_BINDING", comma ? ", " : ""); 2150 comma = 1; 2151 } 2152 } 2153 2154 static void 2155 db_print_xucred(int indent, struct xucred *xu) 2156 { 2157 int comma, i; 2158 2159 db_print_indent(indent); 2160 db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n", 2161 xu->cr_version, xu->cr_uid, xu->cr_ngroups); 2162 db_print_indent(indent); 2163 db_printf("cr_groups: "); 2164 comma = 0; 2165 for (i = 0; i < xu->cr_ngroups; i++) { 2166 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]); 2167 comma = 1; 2168 } 2169 db_printf("\n"); 2170 } 2171 2172 static void 2173 db_print_unprefs(int indent, struct unp_head *uh) 2174 { 2175 struct unpcb *unp; 2176 int counter; 2177 2178 counter = 0; 2179 LIST_FOREACH(unp, uh, unp_reflink) { 2180 if (counter % 4 == 0) 2181 db_print_indent(indent); 2182 db_printf("%p ", unp); 2183 if (counter % 4 == 3) 2184 db_printf("\n"); 2185 counter++; 2186 } 2187 if (counter != 0 && counter % 4 != 0) 2188 db_printf("\n"); 2189 } 2190 2191 DB_SHOW_COMMAND(unpcb, db_show_unpcb) 2192 { 2193 struct unpcb *unp; 2194 2195 if (!have_addr) { 2196 db_printf("usage: show unpcb <addr>\n"); 2197 return; 2198 } 2199 unp = (struct unpcb *)addr; 2200 2201 db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket, 2202 unp->unp_vnode); 2203 2204 db_printf("unp_ino: %d unp_conn: %p\n", unp->unp_ino, 2205 unp->unp_conn); 2206 2207 db_printf("unp_refs:\n"); 2208 db_print_unprefs(2, &unp->unp_refs); 2209 2210 /* XXXRW: Would be nice to print the full address, if any. */ 2211 db_printf("unp_addr: %p\n", unp->unp_addr); 2212 2213 db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n", 2214 unp->unp_cc, unp->unp_mbcnt, 2215 (unsigned long long)unp->unp_gencnt); 2216 2217 db_printf("unp_flags: %x (", unp->unp_flags); 2218 db_print_unpflags(unp->unp_flags); 2219 db_printf(")\n"); 2220 2221 db_printf("unp_peercred:\n"); 2222 db_print_xucred(2, &unp->unp_peercred); 2223 2224 db_printf("unp_refcount: %u\n", unp->unp_refcount); 2225 } 2226 #endif 2227