1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004-2009 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 32 */ 33 34 /* 35 * UNIX Domain (Local) Sockets 36 * 37 * This is an implementation of UNIX (local) domain sockets. Each socket has 38 * an associated struct unpcb (UNIX protocol control block). Stream sockets 39 * may be connected to 0 or 1 other socket. Datagram sockets may be 40 * connected to 0, 1, or many other sockets. Sockets may be created and 41 * connected in pairs (socketpair(2)), or bound/connected to using the file 42 * system name space. For most purposes, only the receive socket buffer is 43 * used, as sending on one socket delivers directly to the receive socket 44 * buffer of a second socket. 45 * 46 * The implementation is substantially complicated by the fact that 47 * "ancillary data", such as file descriptors or credentials, may be passed 48 * across UNIX domain sockets. The potential for passing UNIX domain sockets 49 * over other UNIX domain sockets requires the implementation of a simple 50 * garbage collector to find and tear down cycles of disconnected sockets. 51 * 52 * TODO: 53 * SEQPACKET, RDM 54 * rethink name space problems 55 * need a proper out-of-band 56 */ 57 58 #include <sys/cdefs.h> 59 __FBSDID("$FreeBSD$"); 60 61 #include "opt_ddb.h" 62 63 #include <sys/param.h> 64 #include <sys/domain.h> 65 #include <sys/fcntl.h> 66 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 67 #include <sys/eventhandler.h> 68 #include <sys/file.h> 69 #include <sys/filedesc.h> 70 #include <sys/kernel.h> 71 #include <sys/lock.h> 72 #include <sys/mbuf.h> 73 #include <sys/mount.h> 74 #include <sys/mutex.h> 75 #include <sys/namei.h> 76 #include <sys/proc.h> 77 #include <sys/protosw.h> 78 #include <sys/resourcevar.h> 79 #include <sys/rwlock.h> 80 #include <sys/socket.h> 81 #include <sys/socketvar.h> 82 #include <sys/signalvar.h> 83 #include <sys/stat.h> 84 #include <sys/sx.h> 85 #include <sys/sysctl.h> 86 #include <sys/systm.h> 87 #include <sys/taskqueue.h> 88 #include <sys/un.h> 89 #include <sys/unpcb.h> 90 #include <sys/vnode.h> 91 #include <sys/vimage.h> 92 93 #ifdef DDB 94 #include <ddb/ddb.h> 95 #endif 96 97 #include <security/mac/mac_framework.h> 98 99 #include <vm/uma.h> 100 101 /* 102 * Locking key: 103 * (l) Locked using list lock 104 * (g) Locked using linkage lock 105 */ 106 107 static uma_zone_t unp_zone; 108 static unp_gen_t unp_gencnt; /* (l) */ 109 static u_int unp_count; /* (l) Count of local sockets. */ 110 static ino_t unp_ino; /* Prototype for fake inode numbers. */ 111 static int unp_rights; /* (g) File descriptors in flight. */ 112 static struct unp_head unp_shead; /* (l) List of stream sockets. */ 113 static struct unp_head unp_dhead; /* (l) List of datagram sockets. */ 114 115 static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 116 117 /* 118 * Garbage collection of cyclic file descriptor/socket references occurs 119 * asynchronously in a taskqueue context in order to avoid recursion and 120 * reentrance in the UNIX domain socket, file descriptor, and socket layer 121 * code. See unp_gc() for a full description. 122 */ 123 static struct task unp_gc_task; 124 125 /* 126 * Both send and receive buffers are allocated PIPSIZ bytes of buffering for 127 * stream sockets, although the total for sender and receiver is actually 128 * only PIPSIZ. 129 * 130 * Datagram sockets really use the sendspace as the maximum datagram size, 131 * and don't really want to reserve the sendspace. Their recvspace should be 132 * large enough for at least one max-size datagram plus address. 133 */ 134 #ifndef PIPSIZ 135 #define PIPSIZ 8192 136 #endif 137 static u_long unpst_sendspace = PIPSIZ; 138 static u_long unpst_recvspace = PIPSIZ; 139 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 140 static u_long unpdg_recvspace = 4*1024; 141 142 SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain"); 143 SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0, "SOCK_STREAM"); 144 SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM"); 145 146 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 147 &unpst_sendspace, 0, "Default stream send space."); 148 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 149 &unpst_recvspace, 0, "Default stream receive space."); 150 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 151 &unpdg_sendspace, 0, "Default datagram send space."); 152 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 153 &unpdg_recvspace, 0, "Default datagram receive space."); 154 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 155 "File descriptors in flight."); 156 157 /*- 158 * Locking and synchronization: 159 * 160 * Three types of locks exit in the local domain socket implementation: a 161 * global list mutex, a global linkage rwlock, and per-unpcb mutexes. Of the 162 * global locks, the list lock protects the socket count, global generation 163 * number, and stream/datagram global lists. The linkage lock protects the 164 * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be 165 * held exclusively over the acquisition of multiple unpcb locks to prevent 166 * deadlock. 167 * 168 * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer, 169 * allocated in pru_attach() and freed in pru_detach(). The validity of that 170 * pointer is an invariant, so no lock is required to dereference the so_pcb 171 * pointer if a valid socket reference is held by the caller. In practice, 172 * this is always true during operations performed on a socket. Each unpcb 173 * has a back-pointer to its socket, unp_socket, which will be stable under 174 * the same circumstances. 175 * 176 * This pointer may only be safely dereferenced as long as a valid reference 177 * to the unpcb is held. Typically, this reference will be from the socket, 178 * or from another unpcb when the referring unpcb's lock is held (in order 179 * that the reference not be invalidated during use). For example, to follow 180 * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn, 181 * as unp_socket remains valid as long as the reference to unp_conn is valid. 182 * 183 * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx. Individual 184 * atomic reads without the lock may be performed "lockless", but more 185 * complex reads and read-modify-writes require the mutex to be held. No 186 * lock order is defined between unpcb locks -- multiple unpcb locks may be 187 * acquired at the same time only when holding the linkage rwlock 188 * exclusively, which prevents deadlocks. 189 * 190 * Blocking with UNIX domain sockets is a tricky issue: unlike most network 191 * protocols, bind() is a non-atomic operation, and connect() requires 192 * potential sleeping in the protocol, due to potentially waiting on local or 193 * distributed file systems. We try to separate "lookup" operations, which 194 * may sleep, and the IPC operations themselves, which typically can occur 195 * with relative atomicity as locks can be held over the entire operation. 196 * 197 * Another tricky issue is simultaneous multi-threaded or multi-process 198 * access to a single UNIX domain socket. These are handled by the flags 199 * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or 200 * binding, both of which involve dropping UNIX domain socket locks in order 201 * to perform namei() and other file system operations. 202 */ 203 static struct rwlock unp_link_rwlock; 204 static struct mtx unp_list_lock; 205 206 #define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \ 207 "unp_link_rwlock") 208 209 #define UNP_LINK_LOCK_ASSERT() rw_assert(&unp_link_rwlock, \ 210 RA_LOCKED) 211 #define UNP_LINK_UNLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ 212 RA_UNLOCKED) 213 214 #define UNP_LINK_RLOCK() rw_rlock(&unp_link_rwlock) 215 #define UNP_LINK_RUNLOCK() rw_runlock(&unp_link_rwlock) 216 #define UNP_LINK_WLOCK() rw_wlock(&unp_link_rwlock) 217 #define UNP_LINK_WUNLOCK() rw_wunlock(&unp_link_rwlock) 218 #define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_link_rwlock, \ 219 RA_WLOCKED) 220 221 #define UNP_LIST_LOCK_INIT() mtx_init(&unp_list_lock, \ 222 "unp_list_lock", NULL, MTX_DEF) 223 #define UNP_LIST_LOCK() mtx_lock(&unp_list_lock) 224 #define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock) 225 226 #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ 227 "unp_mtx", "unp_mtx", \ 228 MTX_DUPOK|MTX_DEF|MTX_RECURSE) 229 #define UNP_PCB_LOCK_DESTROY(unp) mtx_destroy(&(unp)->unp_mtx) 230 #define UNP_PCB_LOCK(unp) mtx_lock(&(unp)->unp_mtx) 231 #define UNP_PCB_UNLOCK(unp) mtx_unlock(&(unp)->unp_mtx) 232 #define UNP_PCB_LOCK_ASSERT(unp) mtx_assert(&(unp)->unp_mtx, MA_OWNED) 233 234 static int uipc_connect2(struct socket *, struct socket *); 235 static int uipc_ctloutput(struct socket *, struct sockopt *); 236 static int unp_connect(struct socket *, struct sockaddr *, 237 struct thread *); 238 static int unp_connect2(struct socket *so, struct socket *so2, int); 239 static void unp_disconnect(struct unpcb *unp, struct unpcb *unp2); 240 static void unp_dispose(struct mbuf *); 241 static void unp_shutdown(struct unpcb *); 242 static void unp_drop(struct unpcb *, int); 243 static void unp_gc(__unused void *, int); 244 static void unp_scan(struct mbuf *, void (*)(struct file *)); 245 static void unp_discard(struct file *); 246 static void unp_freerights(struct file **, int); 247 static void unp_init(void); 248 static int unp_internalize(struct mbuf **, struct thread *); 249 static void unp_internalize_fp(struct file *); 250 static int unp_externalize(struct mbuf *, struct mbuf **); 251 static void unp_externalize_fp(struct file *); 252 static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); 253 254 /* 255 * Definitions of protocols supported in the LOCAL domain. 256 */ 257 static struct domain localdomain; 258 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream; 259 static struct protosw localsw[] = { 260 { 261 .pr_type = SOCK_STREAM, 262 .pr_domain = &localdomain, 263 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS, 264 .pr_ctloutput = &uipc_ctloutput, 265 .pr_usrreqs = &uipc_usrreqs_stream 266 }, 267 { 268 .pr_type = SOCK_DGRAM, 269 .pr_domain = &localdomain, 270 .pr_flags = PR_ATOMIC|PR_ADDR|PR_RIGHTS, 271 .pr_usrreqs = &uipc_usrreqs_dgram 272 }, 273 }; 274 275 static struct domain localdomain = { 276 .dom_family = AF_LOCAL, 277 .dom_name = "local", 278 .dom_init = unp_init, 279 .dom_externalize = unp_externalize, 280 .dom_dispose = unp_dispose, 281 .dom_protosw = localsw, 282 .dom_protoswNPROTOSW = &localsw[sizeof(localsw)/sizeof(localsw[0])] 283 }; 284 DOMAIN_SET(local); 285 286 static void 287 uipc_abort(struct socket *so) 288 { 289 struct unpcb *unp, *unp2; 290 291 unp = sotounpcb(so); 292 KASSERT(unp != NULL, ("uipc_abort: unp == NULL")); 293 294 UNP_LINK_WLOCK(); 295 UNP_PCB_LOCK(unp); 296 unp2 = unp->unp_conn; 297 if (unp2 != NULL) { 298 UNP_PCB_LOCK(unp2); 299 unp_drop(unp2, ECONNABORTED); 300 UNP_PCB_UNLOCK(unp2); 301 } 302 UNP_PCB_UNLOCK(unp); 303 UNP_LINK_WUNLOCK(); 304 } 305 306 static int 307 uipc_accept(struct socket *so, struct sockaddr **nam) 308 { 309 struct unpcb *unp, *unp2; 310 const struct sockaddr *sa; 311 312 /* 313 * Pass back name of connected socket, if it was bound and we are 314 * still connected (our peer may have closed already!). 315 */ 316 unp = sotounpcb(so); 317 KASSERT(unp != NULL, ("uipc_accept: unp == NULL")); 318 319 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 320 UNP_LINK_RLOCK(); 321 unp2 = unp->unp_conn; 322 if (unp2 != NULL && unp2->unp_addr != NULL) { 323 UNP_PCB_LOCK(unp2); 324 sa = (struct sockaddr *) unp2->unp_addr; 325 bcopy(sa, *nam, sa->sa_len); 326 UNP_PCB_UNLOCK(unp2); 327 } else { 328 sa = &sun_noname; 329 bcopy(sa, *nam, sa->sa_len); 330 } 331 UNP_LINK_RUNLOCK(); 332 return (0); 333 } 334 335 static int 336 uipc_attach(struct socket *so, int proto, struct thread *td) 337 { 338 u_long sendspace, recvspace; 339 struct unpcb *unp; 340 int error; 341 342 KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL")); 343 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 344 switch (so->so_type) { 345 case SOCK_STREAM: 346 sendspace = unpst_sendspace; 347 recvspace = unpst_recvspace; 348 break; 349 350 case SOCK_DGRAM: 351 sendspace = unpdg_sendspace; 352 recvspace = unpdg_recvspace; 353 break; 354 355 default: 356 panic("uipc_attach"); 357 } 358 error = soreserve(so, sendspace, recvspace); 359 if (error) 360 return (error); 361 } 362 unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO); 363 if (unp == NULL) 364 return (ENOBUFS); 365 LIST_INIT(&unp->unp_refs); 366 UNP_PCB_LOCK_INIT(unp); 367 unp->unp_socket = so; 368 so->so_pcb = unp; 369 unp->unp_refcount = 1; 370 371 UNP_LIST_LOCK(); 372 unp->unp_gencnt = ++unp_gencnt; 373 unp_count++; 374 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, 375 unp, unp_link); 376 UNP_LIST_UNLOCK(); 377 378 return (0); 379 } 380 381 static int 382 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 383 { 384 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 385 struct vattr vattr; 386 int error, namelen, vfslocked; 387 struct nameidata nd; 388 struct unpcb *unp; 389 struct vnode *vp; 390 struct mount *mp; 391 char *buf; 392 393 unp = sotounpcb(so); 394 KASSERT(unp != NULL, ("uipc_bind: unp == NULL")); 395 396 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 397 if (namelen <= 0) 398 return (EINVAL); 399 400 /* 401 * We don't allow simultaneous bind() calls on a single UNIX domain 402 * socket, so flag in-progress operations, and return an error if an 403 * operation is already in progress. 404 * 405 * Historically, we have not allowed a socket to be rebound, so this 406 * also returns an error. Not allowing re-binding simplifies the 407 * implementation and avoids a great many possible failure modes. 408 */ 409 UNP_PCB_LOCK(unp); 410 if (unp->unp_vnode != NULL) { 411 UNP_PCB_UNLOCK(unp); 412 return (EINVAL); 413 } 414 if (unp->unp_flags & UNP_BINDING) { 415 UNP_PCB_UNLOCK(unp); 416 return (EALREADY); 417 } 418 unp->unp_flags |= UNP_BINDING; 419 UNP_PCB_UNLOCK(unp); 420 421 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 422 bcopy(soun->sun_path, buf, namelen); 423 buf[namelen] = 0; 424 425 restart: 426 vfslocked = 0; 427 NDINIT(&nd, CREATE, MPSAFE | NOFOLLOW | LOCKPARENT | SAVENAME, 428 UIO_SYSSPACE, buf, td); 429 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 430 error = namei(&nd); 431 if (error) 432 goto error; 433 vp = nd.ni_vp; 434 vfslocked = NDHASGIANT(&nd); 435 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 436 NDFREE(&nd, NDF_ONLY_PNBUF); 437 if (nd.ni_dvp == vp) 438 vrele(nd.ni_dvp); 439 else 440 vput(nd.ni_dvp); 441 if (vp != NULL) { 442 vrele(vp); 443 error = EADDRINUSE; 444 goto error; 445 } 446 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 447 if (error) 448 goto error; 449 VFS_UNLOCK_GIANT(vfslocked); 450 goto restart; 451 } 452 VATTR_NULL(&vattr); 453 vattr.va_type = VSOCK; 454 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 455 #ifdef MAC 456 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 457 &vattr); 458 #endif 459 if (error == 0) 460 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 461 NDFREE(&nd, NDF_ONLY_PNBUF); 462 vput(nd.ni_dvp); 463 if (error) { 464 vn_finished_write(mp); 465 goto error; 466 } 467 vp = nd.ni_vp; 468 ASSERT_VOP_ELOCKED(vp, "uipc_bind"); 469 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 470 471 UNP_LINK_WLOCK(); 472 UNP_PCB_LOCK(unp); 473 vp->v_socket = unp->unp_socket; 474 unp->unp_vnode = vp; 475 unp->unp_addr = soun; 476 unp->unp_flags &= ~UNP_BINDING; 477 UNP_PCB_UNLOCK(unp); 478 UNP_LINK_WUNLOCK(); 479 VOP_UNLOCK(vp, 0); 480 vn_finished_write(mp); 481 VFS_UNLOCK_GIANT(vfslocked); 482 free(buf, M_TEMP); 483 return (0); 484 485 error: 486 VFS_UNLOCK_GIANT(vfslocked); 487 UNP_PCB_LOCK(unp); 488 unp->unp_flags &= ~UNP_BINDING; 489 UNP_PCB_UNLOCK(unp); 490 free(buf, M_TEMP); 491 return (error); 492 } 493 494 static int 495 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 496 { 497 int error; 498 499 KASSERT(td == curthread, ("uipc_connect: td != curthread")); 500 UNP_LINK_WLOCK(); 501 error = unp_connect(so, nam, td); 502 UNP_LINK_WUNLOCK(); 503 return (error); 504 } 505 506 static void 507 uipc_close(struct socket *so) 508 { 509 struct unpcb *unp, *unp2; 510 511 unp = sotounpcb(so); 512 KASSERT(unp != NULL, ("uipc_close: unp == NULL")); 513 514 UNP_LINK_WLOCK(); 515 UNP_PCB_LOCK(unp); 516 unp2 = unp->unp_conn; 517 if (unp2 != NULL) { 518 UNP_PCB_LOCK(unp2); 519 unp_disconnect(unp, unp2); 520 UNP_PCB_UNLOCK(unp2); 521 } 522 UNP_PCB_UNLOCK(unp); 523 UNP_LINK_WUNLOCK(); 524 } 525 526 static int 527 uipc_connect2(struct socket *so1, struct socket *so2) 528 { 529 struct unpcb *unp, *unp2; 530 int error; 531 532 UNP_LINK_WLOCK(); 533 unp = so1->so_pcb; 534 KASSERT(unp != NULL, ("uipc_connect2: unp == NULL")); 535 UNP_PCB_LOCK(unp); 536 unp2 = so2->so_pcb; 537 KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL")); 538 UNP_PCB_LOCK(unp2); 539 error = unp_connect2(so1, so2, PRU_CONNECT2); 540 UNP_PCB_UNLOCK(unp2); 541 UNP_PCB_UNLOCK(unp); 542 UNP_LINK_WUNLOCK(); 543 return (error); 544 } 545 546 static void 547 uipc_detach(struct socket *so) 548 { 549 struct unpcb *unp, *unp2; 550 struct sockaddr_un *saved_unp_addr; 551 struct vnode *vp; 552 int freeunp, local_unp_rights; 553 554 unp = sotounpcb(so); 555 KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); 556 557 UNP_LINK_WLOCK(); 558 UNP_LIST_LOCK(); 559 UNP_PCB_LOCK(unp); 560 LIST_REMOVE(unp, unp_link); 561 unp->unp_gencnt = ++unp_gencnt; 562 --unp_count; 563 UNP_LIST_UNLOCK(); 564 565 /* 566 * XXXRW: Should assert vp->v_socket == so. 567 */ 568 if ((vp = unp->unp_vnode) != NULL) { 569 unp->unp_vnode->v_socket = NULL; 570 unp->unp_vnode = NULL; 571 } 572 unp2 = unp->unp_conn; 573 if (unp2 != NULL) { 574 UNP_PCB_LOCK(unp2); 575 unp_disconnect(unp, unp2); 576 UNP_PCB_UNLOCK(unp2); 577 } 578 579 /* 580 * We hold the linkage lock exclusively, so it's OK to acquire 581 * multiple pcb locks at a time. 582 */ 583 while (!LIST_EMPTY(&unp->unp_refs)) { 584 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 585 586 UNP_PCB_LOCK(ref); 587 unp_drop(ref, ECONNRESET); 588 UNP_PCB_UNLOCK(ref); 589 } 590 local_unp_rights = unp_rights; 591 UNP_LINK_WUNLOCK(); 592 unp->unp_socket->so_pcb = NULL; 593 saved_unp_addr = unp->unp_addr; 594 unp->unp_addr = NULL; 595 unp->unp_refcount--; 596 freeunp = (unp->unp_refcount == 0); 597 if (saved_unp_addr != NULL) 598 free(saved_unp_addr, M_SONAME); 599 if (freeunp) { 600 UNP_PCB_LOCK_DESTROY(unp); 601 uma_zfree(unp_zone, unp); 602 } else 603 UNP_PCB_UNLOCK(unp); 604 if (vp) { 605 int vfslocked; 606 607 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 608 vrele(vp); 609 VFS_UNLOCK_GIANT(vfslocked); 610 } 611 if (local_unp_rights) 612 taskqueue_enqueue(taskqueue_thread, &unp_gc_task); 613 } 614 615 static int 616 uipc_disconnect(struct socket *so) 617 { 618 struct unpcb *unp, *unp2; 619 620 unp = sotounpcb(so); 621 KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL")); 622 623 UNP_LINK_WLOCK(); 624 UNP_PCB_LOCK(unp); 625 unp2 = unp->unp_conn; 626 if (unp2 != NULL) { 627 UNP_PCB_LOCK(unp2); 628 unp_disconnect(unp, unp2); 629 UNP_PCB_UNLOCK(unp2); 630 } 631 UNP_PCB_UNLOCK(unp); 632 UNP_LINK_WUNLOCK(); 633 return (0); 634 } 635 636 static int 637 uipc_listen(struct socket *so, int backlog, struct thread *td) 638 { 639 struct unpcb *unp; 640 int error; 641 642 unp = sotounpcb(so); 643 KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); 644 645 UNP_PCB_LOCK(unp); 646 if (unp->unp_vnode == NULL) { 647 UNP_PCB_UNLOCK(unp); 648 return (EINVAL); 649 } 650 651 SOCK_LOCK(so); 652 error = solisten_proto_check(so); 653 if (error == 0) { 654 cru2x(td->td_ucred, &unp->unp_peercred); 655 unp->unp_flags |= UNP_HAVEPCCACHED; 656 solisten_proto(so, backlog); 657 } 658 SOCK_UNLOCK(so); 659 UNP_PCB_UNLOCK(unp); 660 return (error); 661 } 662 663 static int 664 uipc_peeraddr(struct socket *so, struct sockaddr **nam) 665 { 666 struct unpcb *unp, *unp2; 667 const struct sockaddr *sa; 668 669 unp = sotounpcb(so); 670 KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL")); 671 672 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 673 UNP_LINK_RLOCK(); 674 /* 675 * XXX: It seems that this test always fails even when connection is 676 * established. So, this else clause is added as workaround to 677 * return PF_LOCAL sockaddr. 678 */ 679 unp2 = unp->unp_conn; 680 if (unp2 != NULL) { 681 UNP_PCB_LOCK(unp2); 682 if (unp2->unp_addr != NULL) 683 sa = (struct sockaddr *) unp2->unp_addr; 684 else 685 sa = &sun_noname; 686 bcopy(sa, *nam, sa->sa_len); 687 UNP_PCB_UNLOCK(unp2); 688 } else { 689 sa = &sun_noname; 690 bcopy(sa, *nam, sa->sa_len); 691 } 692 UNP_LINK_RUNLOCK(); 693 return (0); 694 } 695 696 static int 697 uipc_rcvd(struct socket *so, int flags) 698 { 699 struct unpcb *unp, *unp2; 700 struct socket *so2; 701 u_int mbcnt, sbcc; 702 u_long newhiwat; 703 704 unp = sotounpcb(so); 705 KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL")); 706 707 if (so->so_type == SOCK_DGRAM) 708 panic("uipc_rcvd DGRAM?"); 709 710 if (so->so_type != SOCK_STREAM) 711 panic("uipc_rcvd unknown socktype"); 712 713 /* 714 * Adjust backpressure on sender and wakeup any waiting to write. 715 * 716 * The unp lock is acquired to maintain the validity of the unp_conn 717 * pointer; no lock on unp2 is required as unp2->unp_socket will be 718 * static as long as we don't permit unp2 to disconnect from unp, 719 * which is prevented by the lock on unp. We cache values from 720 * so_rcv to avoid holding the so_rcv lock over the entire 721 * transaction on the remote so_snd. 722 */ 723 SOCKBUF_LOCK(&so->so_rcv); 724 mbcnt = so->so_rcv.sb_mbcnt; 725 sbcc = so->so_rcv.sb_cc; 726 SOCKBUF_UNLOCK(&so->so_rcv); 727 UNP_PCB_LOCK(unp); 728 unp2 = unp->unp_conn; 729 if (unp2 == NULL) { 730 UNP_PCB_UNLOCK(unp); 731 return (0); 732 } 733 so2 = unp2->unp_socket; 734 SOCKBUF_LOCK(&so2->so_snd); 735 so2->so_snd.sb_mbmax += unp->unp_mbcnt - mbcnt; 736 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - sbcc; 737 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 738 newhiwat, RLIM_INFINITY); 739 sowwakeup_locked(so2); 740 unp->unp_mbcnt = mbcnt; 741 unp->unp_cc = sbcc; 742 UNP_PCB_UNLOCK(unp); 743 return (0); 744 } 745 746 static int 747 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 748 struct mbuf *control, struct thread *td) 749 { 750 struct unpcb *unp, *unp2; 751 struct socket *so2; 752 u_int mbcnt_delta, sbcc; 753 u_long newhiwat; 754 int error = 0; 755 756 unp = sotounpcb(so); 757 KASSERT(unp != NULL, ("uipc_send: unp == NULL")); 758 759 if (flags & PRUS_OOB) { 760 error = EOPNOTSUPP; 761 goto release; 762 } 763 if (control != NULL && (error = unp_internalize(&control, td))) 764 goto release; 765 if ((nam != NULL) || (flags & PRUS_EOF)) 766 UNP_LINK_WLOCK(); 767 else 768 UNP_LINK_RLOCK(); 769 switch (so->so_type) { 770 case SOCK_DGRAM: 771 { 772 const struct sockaddr *from; 773 774 unp2 = unp->unp_conn; 775 if (nam != NULL) { 776 UNP_LINK_WLOCK_ASSERT(); 777 if (unp2 != NULL) { 778 error = EISCONN; 779 break; 780 } 781 error = unp_connect(so, nam, td); 782 if (error) 783 break; 784 unp2 = unp->unp_conn; 785 } 786 787 /* 788 * Because connect() and send() are non-atomic in a sendto() 789 * with a target address, it's possible that the socket will 790 * have disconnected before the send() can run. In that case 791 * return the slightly counter-intuitive but otherwise 792 * correct error that the socket is not connected. 793 */ 794 if (unp2 == NULL) { 795 error = ENOTCONN; 796 break; 797 } 798 /* Lockless read. */ 799 if (unp2->unp_flags & UNP_WANTCRED) 800 control = unp_addsockcred(td, control); 801 UNP_PCB_LOCK(unp); 802 if (unp->unp_addr != NULL) 803 from = (struct sockaddr *)unp->unp_addr; 804 else 805 from = &sun_noname; 806 so2 = unp2->unp_socket; 807 SOCKBUF_LOCK(&so2->so_rcv); 808 if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { 809 sorwakeup_locked(so2); 810 m = NULL; 811 control = NULL; 812 } else { 813 SOCKBUF_UNLOCK(&so2->so_rcv); 814 error = ENOBUFS; 815 } 816 if (nam != NULL) { 817 UNP_LINK_WLOCK_ASSERT(); 818 UNP_PCB_LOCK(unp2); 819 unp_disconnect(unp, unp2); 820 UNP_PCB_UNLOCK(unp2); 821 } 822 UNP_PCB_UNLOCK(unp); 823 break; 824 } 825 826 case SOCK_STREAM: 827 if ((so->so_state & SS_ISCONNECTED) == 0) { 828 if (nam != NULL) { 829 UNP_LINK_WLOCK_ASSERT(); 830 error = unp_connect(so, nam, td); 831 if (error) 832 break; /* XXX */ 833 } else { 834 error = ENOTCONN; 835 break; 836 } 837 } 838 839 /* Lockless read. */ 840 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 841 error = EPIPE; 842 break; 843 } 844 845 /* 846 * Because connect() and send() are non-atomic in a sendto() 847 * with a target address, it's possible that the socket will 848 * have disconnected before the send() can run. In that case 849 * return the slightly counter-intuitive but otherwise 850 * correct error that the socket is not connected. 851 * 852 * Locking here must be done carefully: the linkage lock 853 * prevents interconnections between unpcbs from changing, so 854 * we can traverse from unp to unp2 without acquiring unp's 855 * lock. Socket buffer locks follow unpcb locks, so we can 856 * acquire both remote and lock socket buffer locks. 857 */ 858 unp2 = unp->unp_conn; 859 if (unp2 == NULL) { 860 error = ENOTCONN; 861 break; 862 } 863 so2 = unp2->unp_socket; 864 UNP_PCB_LOCK(unp2); 865 SOCKBUF_LOCK(&so2->so_rcv); 866 if (unp2->unp_flags & UNP_WANTCRED) { 867 /* 868 * Credentials are passed only once on SOCK_STREAM. 869 */ 870 unp2->unp_flags &= ~UNP_WANTCRED; 871 control = unp_addsockcred(td, control); 872 } 873 /* 874 * Send to paired receive port, and then reduce send buffer 875 * hiwater marks to maintain backpressure. Wake up readers. 876 */ 877 if (control != NULL) { 878 if (sbappendcontrol_locked(&so2->so_rcv, m, control)) 879 control = NULL; 880 } else 881 sbappend_locked(&so2->so_rcv, m); 882 mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt; 883 unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt; 884 sbcc = so2->so_rcv.sb_cc; 885 sorwakeup_locked(so2); 886 887 SOCKBUF_LOCK(&so->so_snd); 888 newhiwat = so->so_snd.sb_hiwat - (sbcc - unp2->unp_cc); 889 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 890 newhiwat, RLIM_INFINITY); 891 so->so_snd.sb_mbmax -= mbcnt_delta; 892 SOCKBUF_UNLOCK(&so->so_snd); 893 unp2->unp_cc = sbcc; 894 UNP_PCB_UNLOCK(unp2); 895 m = NULL; 896 break; 897 898 default: 899 panic("uipc_send unknown socktype"); 900 } 901 902 /* 903 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown. 904 */ 905 if (flags & PRUS_EOF) { 906 UNP_PCB_LOCK(unp); 907 socantsendmore(so); 908 unp_shutdown(unp); 909 UNP_PCB_UNLOCK(unp); 910 } 911 912 if ((nam != NULL) || (flags & PRUS_EOF)) 913 UNP_LINK_WUNLOCK(); 914 else 915 UNP_LINK_RUNLOCK(); 916 917 if (control != NULL && error != 0) 918 unp_dispose(control); 919 920 release: 921 if (control != NULL) 922 m_freem(control); 923 if (m != NULL) 924 m_freem(m); 925 return (error); 926 } 927 928 static int 929 uipc_sense(struct socket *so, struct stat *sb) 930 { 931 struct unpcb *unp, *unp2; 932 struct socket *so2; 933 934 unp = sotounpcb(so); 935 KASSERT(unp != NULL, ("uipc_sense: unp == NULL")); 936 937 sb->st_blksize = so->so_snd.sb_hiwat; 938 UNP_LINK_RLOCK(); 939 UNP_PCB_LOCK(unp); 940 unp2 = unp->unp_conn; 941 if (so->so_type == SOCK_STREAM && unp2 != NULL) { 942 so2 = unp2->unp_socket; 943 sb->st_blksize += so2->so_rcv.sb_cc; 944 } 945 sb->st_dev = NODEV; 946 if (unp->unp_ino == 0) 947 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 948 sb->st_ino = unp->unp_ino; 949 UNP_PCB_UNLOCK(unp); 950 UNP_LINK_RUNLOCK(); 951 return (0); 952 } 953 954 static int 955 uipc_shutdown(struct socket *so) 956 { 957 struct unpcb *unp; 958 959 unp = sotounpcb(so); 960 KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL")); 961 962 UNP_LINK_WLOCK(); 963 UNP_PCB_LOCK(unp); 964 socantsendmore(so); 965 unp_shutdown(unp); 966 UNP_PCB_UNLOCK(unp); 967 UNP_LINK_WUNLOCK(); 968 return (0); 969 } 970 971 static int 972 uipc_sockaddr(struct socket *so, struct sockaddr **nam) 973 { 974 struct unpcb *unp; 975 const struct sockaddr *sa; 976 977 unp = sotounpcb(so); 978 KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL")); 979 980 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 981 UNP_PCB_LOCK(unp); 982 if (unp->unp_addr != NULL) 983 sa = (struct sockaddr *) unp->unp_addr; 984 else 985 sa = &sun_noname; 986 bcopy(sa, *nam, sa->sa_len); 987 UNP_PCB_UNLOCK(unp); 988 return (0); 989 } 990 991 static struct pr_usrreqs uipc_usrreqs_dgram = { 992 .pru_abort = uipc_abort, 993 .pru_accept = uipc_accept, 994 .pru_attach = uipc_attach, 995 .pru_bind = uipc_bind, 996 .pru_connect = uipc_connect, 997 .pru_connect2 = uipc_connect2, 998 .pru_detach = uipc_detach, 999 .pru_disconnect = uipc_disconnect, 1000 .pru_listen = uipc_listen, 1001 .pru_peeraddr = uipc_peeraddr, 1002 .pru_rcvd = uipc_rcvd, 1003 .pru_send = uipc_send, 1004 .pru_sense = uipc_sense, 1005 .pru_shutdown = uipc_shutdown, 1006 .pru_sockaddr = uipc_sockaddr, 1007 .pru_soreceive = soreceive_dgram, 1008 .pru_close = uipc_close, 1009 }; 1010 1011 static struct pr_usrreqs uipc_usrreqs_stream = { 1012 .pru_abort = uipc_abort, 1013 .pru_accept = uipc_accept, 1014 .pru_attach = uipc_attach, 1015 .pru_bind = uipc_bind, 1016 .pru_connect = uipc_connect, 1017 .pru_connect2 = uipc_connect2, 1018 .pru_detach = uipc_detach, 1019 .pru_disconnect = uipc_disconnect, 1020 .pru_listen = uipc_listen, 1021 .pru_peeraddr = uipc_peeraddr, 1022 .pru_rcvd = uipc_rcvd, 1023 .pru_send = uipc_send, 1024 .pru_sense = uipc_sense, 1025 .pru_shutdown = uipc_shutdown, 1026 .pru_sockaddr = uipc_sockaddr, 1027 .pru_soreceive = soreceive_generic, 1028 .pru_close = uipc_close, 1029 }; 1030 1031 static int 1032 uipc_ctloutput(struct socket *so, struct sockopt *sopt) 1033 { 1034 struct unpcb *unp; 1035 struct xucred xu; 1036 int error, optval; 1037 1038 if (sopt->sopt_level != 0) 1039 return (EINVAL); 1040 1041 unp = sotounpcb(so); 1042 KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL")); 1043 error = 0; 1044 switch (sopt->sopt_dir) { 1045 case SOPT_GET: 1046 switch (sopt->sopt_name) { 1047 case LOCAL_PEERCRED: 1048 UNP_PCB_LOCK(unp); 1049 if (unp->unp_flags & UNP_HAVEPC) 1050 xu = unp->unp_peercred; 1051 else { 1052 if (so->so_type == SOCK_STREAM) 1053 error = ENOTCONN; 1054 else 1055 error = EINVAL; 1056 } 1057 UNP_PCB_UNLOCK(unp); 1058 if (error == 0) 1059 error = sooptcopyout(sopt, &xu, sizeof(xu)); 1060 break; 1061 1062 case LOCAL_CREDS: 1063 /* Unlocked read. */ 1064 optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0; 1065 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1066 break; 1067 1068 case LOCAL_CONNWAIT: 1069 /* Unlocked read. */ 1070 optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0; 1071 error = sooptcopyout(sopt, &optval, sizeof(optval)); 1072 break; 1073 1074 default: 1075 error = EOPNOTSUPP; 1076 break; 1077 } 1078 break; 1079 1080 case SOPT_SET: 1081 switch (sopt->sopt_name) { 1082 case LOCAL_CREDS: 1083 case LOCAL_CONNWAIT: 1084 error = sooptcopyin(sopt, &optval, sizeof(optval), 1085 sizeof(optval)); 1086 if (error) 1087 break; 1088 1089 #define OPTSET(bit) do { \ 1090 UNP_PCB_LOCK(unp); \ 1091 if (optval) \ 1092 unp->unp_flags |= bit; \ 1093 else \ 1094 unp->unp_flags &= ~bit; \ 1095 UNP_PCB_UNLOCK(unp); \ 1096 } while (0) 1097 1098 switch (sopt->sopt_name) { 1099 case LOCAL_CREDS: 1100 OPTSET(UNP_WANTCRED); 1101 break; 1102 1103 case LOCAL_CONNWAIT: 1104 OPTSET(UNP_CONNWAIT); 1105 break; 1106 1107 default: 1108 break; 1109 } 1110 break; 1111 #undef OPTSET 1112 default: 1113 error = ENOPROTOOPT; 1114 break; 1115 } 1116 break; 1117 1118 default: 1119 error = EOPNOTSUPP; 1120 break; 1121 } 1122 return (error); 1123 } 1124 1125 static int 1126 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1127 { 1128 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 1129 struct vnode *vp; 1130 struct socket *so2, *so3; 1131 struct unpcb *unp, *unp2, *unp3; 1132 int error, len, vfslocked; 1133 struct nameidata nd; 1134 char buf[SOCK_MAXADDRLEN]; 1135 struct sockaddr *sa; 1136 1137 UNP_LINK_WLOCK_ASSERT(); 1138 1139 unp = sotounpcb(so); 1140 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1141 1142 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 1143 if (len <= 0) 1144 return (EINVAL); 1145 bcopy(soun->sun_path, buf, len); 1146 buf[len] = 0; 1147 1148 UNP_PCB_LOCK(unp); 1149 if (unp->unp_flags & UNP_CONNECTING) { 1150 UNP_PCB_UNLOCK(unp); 1151 return (EALREADY); 1152 } 1153 UNP_LINK_WUNLOCK(); 1154 unp->unp_flags |= UNP_CONNECTING; 1155 UNP_PCB_UNLOCK(unp); 1156 1157 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 1158 NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, 1159 td); 1160 error = namei(&nd); 1161 if (error) 1162 vp = NULL; 1163 else 1164 vp = nd.ni_vp; 1165 ASSERT_VOP_LOCKED(vp, "unp_connect"); 1166 vfslocked = NDHASGIANT(&nd); 1167 NDFREE(&nd, NDF_ONLY_PNBUF); 1168 if (error) 1169 goto bad; 1170 1171 if (vp->v_type != VSOCK) { 1172 error = ENOTSOCK; 1173 goto bad; 1174 } 1175 #ifdef MAC 1176 error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD); 1177 if (error) 1178 goto bad; 1179 #endif 1180 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 1181 if (error) 1182 goto bad; 1183 VFS_UNLOCK_GIANT(vfslocked); 1184 1185 unp = sotounpcb(so); 1186 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1187 1188 /* 1189 * Lock linkage lock for two reasons: make sure v_socket is stable, 1190 * and to protect simultaneous locking of multiple pcbs. 1191 */ 1192 UNP_LINK_WLOCK(); 1193 so2 = vp->v_socket; 1194 if (so2 == NULL) { 1195 error = ECONNREFUSED; 1196 goto bad2; 1197 } 1198 if (so->so_type != so2->so_type) { 1199 error = EPROTOTYPE; 1200 goto bad2; 1201 } 1202 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1203 if (so2->so_options & SO_ACCEPTCONN) { 1204 so3 = sonewconn(so2, 0); 1205 } else 1206 so3 = NULL; 1207 if (so3 == NULL) { 1208 error = ECONNREFUSED; 1209 goto bad2; 1210 } 1211 unp = sotounpcb(so); 1212 unp2 = sotounpcb(so2); 1213 unp3 = sotounpcb(so3); 1214 UNP_PCB_LOCK(unp); 1215 UNP_PCB_LOCK(unp2); 1216 UNP_PCB_LOCK(unp3); 1217 if (unp2->unp_addr != NULL) { 1218 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 1219 unp3->unp_addr = (struct sockaddr_un *) sa; 1220 sa = NULL; 1221 } 1222 1223 /* 1224 * The connecter's (client's) credentials are copied from its 1225 * process structure at the time of connect() (which is now). 1226 */ 1227 cru2x(td->td_ucred, &unp3->unp_peercred); 1228 unp3->unp_flags |= UNP_HAVEPC; 1229 1230 /* 1231 * The receiver's (server's) credentials are copied from the 1232 * unp_peercred member of socket on which the former called 1233 * listen(); uipc_listen() cached that process's credentials 1234 * at that time so we can use them now. 1235 */ 1236 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1237 ("unp_connect: listener without cached peercred")); 1238 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1239 sizeof(unp->unp_peercred)); 1240 unp->unp_flags |= UNP_HAVEPC; 1241 if (unp2->unp_flags & UNP_WANTCRED) 1242 unp3->unp_flags |= UNP_WANTCRED; 1243 UNP_PCB_UNLOCK(unp3); 1244 UNP_PCB_UNLOCK(unp2); 1245 UNP_PCB_UNLOCK(unp); 1246 #ifdef MAC 1247 mac_socketpeer_set_from_socket(so, so3); 1248 mac_socketpeer_set_from_socket(so3, so); 1249 #endif 1250 1251 so2 = so3; 1252 } 1253 unp = sotounpcb(so); 1254 KASSERT(unp != NULL, ("unp_connect: unp == NULL")); 1255 unp2 = sotounpcb(so2); 1256 KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL")); 1257 UNP_PCB_LOCK(unp); 1258 UNP_PCB_LOCK(unp2); 1259 error = unp_connect2(so, so2, PRU_CONNECT); 1260 UNP_PCB_UNLOCK(unp2); 1261 UNP_PCB_UNLOCK(unp); 1262 bad2: 1263 UNP_LINK_WUNLOCK(); 1264 if (vfslocked) 1265 /* 1266 * Giant has been previously acquired. This means filesystem 1267 * isn't MPSAFE. Do it once again. 1268 */ 1269 mtx_lock(&Giant); 1270 bad: 1271 if (vp != NULL) 1272 vput(vp); 1273 VFS_UNLOCK_GIANT(vfslocked); 1274 free(sa, M_SONAME); 1275 UNP_LINK_WLOCK(); 1276 UNP_PCB_LOCK(unp); 1277 unp->unp_flags &= ~UNP_CONNECTING; 1278 UNP_PCB_UNLOCK(unp); 1279 return (error); 1280 } 1281 1282 static int 1283 unp_connect2(struct socket *so, struct socket *so2, int req) 1284 { 1285 struct unpcb *unp; 1286 struct unpcb *unp2; 1287 1288 unp = sotounpcb(so); 1289 KASSERT(unp != NULL, ("unp_connect2: unp == NULL")); 1290 unp2 = sotounpcb(so2); 1291 KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL")); 1292 1293 UNP_LINK_WLOCK_ASSERT(); 1294 UNP_PCB_LOCK_ASSERT(unp); 1295 UNP_PCB_LOCK_ASSERT(unp2); 1296 1297 if (so2->so_type != so->so_type) 1298 return (EPROTOTYPE); 1299 unp->unp_conn = unp2; 1300 1301 switch (so->so_type) { 1302 case SOCK_DGRAM: 1303 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1304 soisconnected(so); 1305 break; 1306 1307 case SOCK_STREAM: 1308 unp2->unp_conn = unp; 1309 if (req == PRU_CONNECT && 1310 ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT)) 1311 soisconnecting(so); 1312 else 1313 soisconnected(so); 1314 soisconnected(so2); 1315 break; 1316 1317 default: 1318 panic("unp_connect2"); 1319 } 1320 return (0); 1321 } 1322 1323 static void 1324 unp_disconnect(struct unpcb *unp, struct unpcb *unp2) 1325 { 1326 struct socket *so; 1327 1328 KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL")); 1329 1330 UNP_LINK_WLOCK_ASSERT(); 1331 UNP_PCB_LOCK_ASSERT(unp); 1332 UNP_PCB_LOCK_ASSERT(unp2); 1333 1334 unp->unp_conn = NULL; 1335 switch (unp->unp_socket->so_type) { 1336 case SOCK_DGRAM: 1337 LIST_REMOVE(unp, unp_reflink); 1338 so = unp->unp_socket; 1339 SOCK_LOCK(so); 1340 so->so_state &= ~SS_ISCONNECTED; 1341 SOCK_UNLOCK(so); 1342 break; 1343 1344 case SOCK_STREAM: 1345 soisdisconnected(unp->unp_socket); 1346 unp2->unp_conn = NULL; 1347 soisdisconnected(unp2->unp_socket); 1348 break; 1349 } 1350 } 1351 1352 /* 1353 * unp_pcblist() walks the global list of struct unpcb's to generate a 1354 * pointer list, bumping the refcount on each unpcb. It then copies them out 1355 * sequentially, validating the generation number on each to see if it has 1356 * been detached. All of this is necessary because copyout() may sleep on 1357 * disk I/O. 1358 */ 1359 static int 1360 unp_pcblist(SYSCTL_HANDLER_ARGS) 1361 { 1362 int error, i, n; 1363 int freeunp; 1364 struct unpcb *unp, **unp_list; 1365 unp_gen_t gencnt; 1366 struct xunpgen *xug; 1367 struct unp_head *head; 1368 struct xunpcb *xu; 1369 1370 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1371 1372 /* 1373 * The process of preparing the PCB list is too time-consuming and 1374 * resource-intensive to repeat twice on every request. 1375 */ 1376 if (req->oldptr == NULL) { 1377 n = unp_count; 1378 req->oldidx = 2 * (sizeof *xug) 1379 + (n + n/8) * sizeof(struct xunpcb); 1380 return (0); 1381 } 1382 1383 if (req->newptr != NULL) 1384 return (EPERM); 1385 1386 /* 1387 * OK, now we're committed to doing something. 1388 */ 1389 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 1390 UNP_LIST_LOCK(); 1391 gencnt = unp_gencnt; 1392 n = unp_count; 1393 UNP_LIST_UNLOCK(); 1394 1395 xug->xug_len = sizeof *xug; 1396 xug->xug_count = n; 1397 xug->xug_gen = gencnt; 1398 xug->xug_sogen = so_gencnt; 1399 error = SYSCTL_OUT(req, xug, sizeof *xug); 1400 if (error) { 1401 free(xug, M_TEMP); 1402 return (error); 1403 } 1404 1405 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1406 1407 UNP_LIST_LOCK(); 1408 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1409 unp = LIST_NEXT(unp, unp_link)) { 1410 UNP_PCB_LOCK(unp); 1411 if (unp->unp_gencnt <= gencnt) { 1412 if (cr_cansee(req->td->td_ucred, 1413 unp->unp_socket->so_cred)) { 1414 UNP_PCB_UNLOCK(unp); 1415 continue; 1416 } 1417 unp_list[i++] = unp; 1418 unp->unp_refcount++; 1419 } 1420 UNP_PCB_UNLOCK(unp); 1421 } 1422 UNP_LIST_UNLOCK(); 1423 n = i; /* In case we lost some during malloc. */ 1424 1425 error = 0; 1426 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO); 1427 for (i = 0; i < n; i++) { 1428 unp = unp_list[i]; 1429 UNP_PCB_LOCK(unp); 1430 unp->unp_refcount--; 1431 if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) { 1432 xu->xu_len = sizeof *xu; 1433 xu->xu_unpp = unp; 1434 /* 1435 * XXX - need more locking here to protect against 1436 * connect/disconnect races for SMP. 1437 */ 1438 if (unp->unp_addr != NULL) 1439 bcopy(unp->unp_addr, &xu->xu_addr, 1440 unp->unp_addr->sun_len); 1441 if (unp->unp_conn != NULL && 1442 unp->unp_conn->unp_addr != NULL) 1443 bcopy(unp->unp_conn->unp_addr, 1444 &xu->xu_caddr, 1445 unp->unp_conn->unp_addr->sun_len); 1446 bcopy(unp, &xu->xu_unp, sizeof *unp); 1447 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1448 UNP_PCB_UNLOCK(unp); 1449 error = SYSCTL_OUT(req, xu, sizeof *xu); 1450 } else { 1451 freeunp = (unp->unp_refcount == 0); 1452 UNP_PCB_UNLOCK(unp); 1453 if (freeunp) { 1454 UNP_PCB_LOCK_DESTROY(unp); 1455 uma_zfree(unp_zone, unp); 1456 } 1457 } 1458 } 1459 free(xu, M_TEMP); 1460 if (!error) { 1461 /* 1462 * Give the user an updated idea of our state. If the 1463 * generation differs from what we told her before, she knows 1464 * that something happened while we were processing this 1465 * request, and it might be necessary to retry. 1466 */ 1467 xug->xug_gen = unp_gencnt; 1468 xug->xug_sogen = so_gencnt; 1469 xug->xug_count = unp_count; 1470 error = SYSCTL_OUT(req, xug, sizeof *xug); 1471 } 1472 free(unp_list, M_TEMP); 1473 free(xug, M_TEMP); 1474 return (error); 1475 } 1476 1477 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1478 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1479 "List of active local datagram sockets"); 1480 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1481 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1482 "List of active local stream sockets"); 1483 1484 static void 1485 unp_shutdown(struct unpcb *unp) 1486 { 1487 struct unpcb *unp2; 1488 struct socket *so; 1489 1490 UNP_LINK_WLOCK_ASSERT(); 1491 UNP_PCB_LOCK_ASSERT(unp); 1492 1493 unp2 = unp->unp_conn; 1494 if (unp->unp_socket->so_type == SOCK_STREAM && unp2 != NULL) { 1495 so = unp2->unp_socket; 1496 if (so != NULL) 1497 socantrcvmore(so); 1498 } 1499 } 1500 1501 static void 1502 unp_drop(struct unpcb *unp, int errno) 1503 { 1504 struct socket *so = unp->unp_socket; 1505 struct unpcb *unp2; 1506 1507 UNP_LINK_WLOCK_ASSERT(); 1508 UNP_PCB_LOCK_ASSERT(unp); 1509 1510 so->so_error = errno; 1511 unp2 = unp->unp_conn; 1512 if (unp2 == NULL) 1513 return; 1514 UNP_PCB_LOCK(unp2); 1515 unp_disconnect(unp, unp2); 1516 UNP_PCB_UNLOCK(unp2); 1517 } 1518 1519 static void 1520 unp_freerights(struct file **rp, int fdcount) 1521 { 1522 int i; 1523 struct file *fp; 1524 1525 for (i = 0; i < fdcount; i++) { 1526 fp = *rp; 1527 *rp++ = NULL; 1528 unp_discard(fp); 1529 } 1530 } 1531 1532 static int 1533 unp_externalize(struct mbuf *control, struct mbuf **controlp) 1534 { 1535 struct thread *td = curthread; /* XXX */ 1536 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1537 int i; 1538 int *fdp; 1539 struct file **rp; 1540 struct file *fp; 1541 void *data; 1542 socklen_t clen = control->m_len, datalen; 1543 int error, newfds; 1544 int f; 1545 u_int newlen; 1546 1547 UNP_LINK_UNLOCK_ASSERT(); 1548 1549 error = 0; 1550 if (controlp != NULL) /* controlp == NULL => free control messages */ 1551 *controlp = NULL; 1552 while (cm != NULL) { 1553 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1554 error = EINVAL; 1555 break; 1556 } 1557 data = CMSG_DATA(cm); 1558 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1559 if (cm->cmsg_level == SOL_SOCKET 1560 && cm->cmsg_type == SCM_RIGHTS) { 1561 newfds = datalen / sizeof(struct file *); 1562 rp = data; 1563 1564 /* If we're not outputting the descriptors free them. */ 1565 if (error || controlp == NULL) { 1566 unp_freerights(rp, newfds); 1567 goto next; 1568 } 1569 FILEDESC_XLOCK(td->td_proc->p_fd); 1570 /* if the new FD's will not fit free them. */ 1571 if (!fdavail(td, newfds)) { 1572 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1573 error = EMSGSIZE; 1574 unp_freerights(rp, newfds); 1575 goto next; 1576 } 1577 1578 /* 1579 * Now change each pointer to an fd in the global 1580 * table to an integer that is the index to the local 1581 * fd table entry that we set up to point to the 1582 * global one we are transferring. 1583 */ 1584 newlen = newfds * sizeof(int); 1585 *controlp = sbcreatecontrol(NULL, newlen, 1586 SCM_RIGHTS, SOL_SOCKET); 1587 if (*controlp == NULL) { 1588 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1589 error = E2BIG; 1590 unp_freerights(rp, newfds); 1591 goto next; 1592 } 1593 1594 fdp = (int *) 1595 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1596 for (i = 0; i < newfds; i++) { 1597 if (fdalloc(td, 0, &f)) 1598 panic("unp_externalize fdalloc failed"); 1599 fp = *rp++; 1600 td->td_proc->p_fd->fd_ofiles[f] = fp; 1601 unp_externalize_fp(fp); 1602 *fdp++ = f; 1603 } 1604 FILEDESC_XUNLOCK(td->td_proc->p_fd); 1605 } else { 1606 /* We can just copy anything else across. */ 1607 if (error || controlp == NULL) 1608 goto next; 1609 *controlp = sbcreatecontrol(NULL, datalen, 1610 cm->cmsg_type, cm->cmsg_level); 1611 if (*controlp == NULL) { 1612 error = ENOBUFS; 1613 goto next; 1614 } 1615 bcopy(data, 1616 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1617 datalen); 1618 } 1619 controlp = &(*controlp)->m_next; 1620 1621 next: 1622 if (CMSG_SPACE(datalen) < clen) { 1623 clen -= CMSG_SPACE(datalen); 1624 cm = (struct cmsghdr *) 1625 ((caddr_t)cm + CMSG_SPACE(datalen)); 1626 } else { 1627 clen = 0; 1628 cm = NULL; 1629 } 1630 } 1631 1632 m_freem(control); 1633 return (error); 1634 } 1635 1636 static void 1637 unp_zone_change(void *tag) 1638 { 1639 1640 uma_zone_set_max(unp_zone, maxsockets); 1641 } 1642 1643 static void 1644 unp_init(void) 1645 { 1646 1647 #ifdef VIMAGE 1648 if (!IS_DEFAULT_VNET(curvnet)) 1649 return; 1650 #endif 1651 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1652 NULL, NULL, UMA_ALIGN_PTR, 0); 1653 if (unp_zone == NULL) 1654 panic("unp_init"); 1655 uma_zone_set_max(unp_zone, maxsockets); 1656 EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change, 1657 NULL, EVENTHANDLER_PRI_ANY); 1658 LIST_INIT(&unp_dhead); 1659 LIST_INIT(&unp_shead); 1660 TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); 1661 UNP_LINK_LOCK_INIT(); 1662 UNP_LIST_LOCK_INIT(); 1663 } 1664 1665 static int 1666 unp_internalize(struct mbuf **controlp, struct thread *td) 1667 { 1668 struct mbuf *control = *controlp; 1669 struct proc *p = td->td_proc; 1670 struct filedesc *fdescp = p->p_fd; 1671 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1672 struct cmsgcred *cmcred; 1673 struct file **rp; 1674 struct file *fp; 1675 struct timeval *tv; 1676 int i, fd, *fdp; 1677 void *data; 1678 socklen_t clen = control->m_len, datalen; 1679 int error, oldfds; 1680 u_int newlen; 1681 1682 UNP_LINK_UNLOCK_ASSERT(); 1683 1684 error = 0; 1685 *controlp = NULL; 1686 while (cm != NULL) { 1687 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1688 || cm->cmsg_len > clen) { 1689 error = EINVAL; 1690 goto out; 1691 } 1692 data = CMSG_DATA(cm); 1693 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1694 1695 switch (cm->cmsg_type) { 1696 /* 1697 * Fill in credential information. 1698 */ 1699 case SCM_CREDS: 1700 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1701 SCM_CREDS, SOL_SOCKET); 1702 if (*controlp == NULL) { 1703 error = ENOBUFS; 1704 goto out; 1705 } 1706 cmcred = (struct cmsgcred *) 1707 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1708 cmcred->cmcred_pid = p->p_pid; 1709 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1710 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1711 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1712 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1713 CMGROUP_MAX); 1714 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1715 cmcred->cmcred_groups[i] = 1716 td->td_ucred->cr_groups[i]; 1717 break; 1718 1719 case SCM_RIGHTS: 1720 oldfds = datalen / sizeof (int); 1721 /* 1722 * Check that all the FDs passed in refer to legal 1723 * files. If not, reject the entire operation. 1724 */ 1725 fdp = data; 1726 FILEDESC_SLOCK(fdescp); 1727 for (i = 0; i < oldfds; i++) { 1728 fd = *fdp++; 1729 if ((unsigned)fd >= fdescp->fd_nfiles || 1730 fdescp->fd_ofiles[fd] == NULL) { 1731 FILEDESC_SUNLOCK(fdescp); 1732 error = EBADF; 1733 goto out; 1734 } 1735 fp = fdescp->fd_ofiles[fd]; 1736 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1737 FILEDESC_SUNLOCK(fdescp); 1738 error = EOPNOTSUPP; 1739 goto out; 1740 } 1741 1742 } 1743 1744 /* 1745 * Now replace the integer FDs with pointers to the 1746 * associated global file table entry.. 1747 */ 1748 newlen = oldfds * sizeof(struct file *); 1749 *controlp = sbcreatecontrol(NULL, newlen, 1750 SCM_RIGHTS, SOL_SOCKET); 1751 if (*controlp == NULL) { 1752 FILEDESC_SUNLOCK(fdescp); 1753 error = E2BIG; 1754 goto out; 1755 } 1756 fdp = data; 1757 rp = (struct file **) 1758 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1759 for (i = 0; i < oldfds; i++) { 1760 fp = fdescp->fd_ofiles[*fdp++]; 1761 *rp++ = fp; 1762 unp_internalize_fp(fp); 1763 } 1764 FILEDESC_SUNLOCK(fdescp); 1765 break; 1766 1767 case SCM_TIMESTAMP: 1768 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1769 SCM_TIMESTAMP, SOL_SOCKET); 1770 if (*controlp == NULL) { 1771 error = ENOBUFS; 1772 goto out; 1773 } 1774 tv = (struct timeval *) 1775 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1776 microtime(tv); 1777 break; 1778 1779 default: 1780 error = EINVAL; 1781 goto out; 1782 } 1783 1784 controlp = &(*controlp)->m_next; 1785 if (CMSG_SPACE(datalen) < clen) { 1786 clen -= CMSG_SPACE(datalen); 1787 cm = (struct cmsghdr *) 1788 ((caddr_t)cm + CMSG_SPACE(datalen)); 1789 } else { 1790 clen = 0; 1791 cm = NULL; 1792 } 1793 } 1794 1795 out: 1796 m_freem(control); 1797 return (error); 1798 } 1799 1800 static struct mbuf * 1801 unp_addsockcred(struct thread *td, struct mbuf *control) 1802 { 1803 struct mbuf *m, *n, *n_prev; 1804 struct sockcred *sc; 1805 const struct cmsghdr *cm; 1806 int ngroups; 1807 int i; 1808 1809 ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); 1810 m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); 1811 if (m == NULL) 1812 return (control); 1813 1814 sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); 1815 sc->sc_uid = td->td_ucred->cr_ruid; 1816 sc->sc_euid = td->td_ucred->cr_uid; 1817 sc->sc_gid = td->td_ucred->cr_rgid; 1818 sc->sc_egid = td->td_ucred->cr_gid; 1819 sc->sc_ngroups = ngroups; 1820 for (i = 0; i < sc->sc_ngroups; i++) 1821 sc->sc_groups[i] = td->td_ucred->cr_groups[i]; 1822 1823 /* 1824 * Unlink SCM_CREDS control messages (struct cmsgcred), since just 1825 * created SCM_CREDS control message (struct sockcred) has another 1826 * format. 1827 */ 1828 if (control != NULL) 1829 for (n = control, n_prev = NULL; n != NULL;) { 1830 cm = mtod(n, struct cmsghdr *); 1831 if (cm->cmsg_level == SOL_SOCKET && 1832 cm->cmsg_type == SCM_CREDS) { 1833 if (n_prev == NULL) 1834 control = n->m_next; 1835 else 1836 n_prev->m_next = n->m_next; 1837 n = m_free(n); 1838 } else { 1839 n_prev = n; 1840 n = n->m_next; 1841 } 1842 } 1843 1844 /* Prepend it to the head. */ 1845 m->m_next = control; 1846 return (m); 1847 } 1848 1849 static struct unpcb * 1850 fptounp(struct file *fp) 1851 { 1852 struct socket *so; 1853 1854 if (fp->f_type != DTYPE_SOCKET) 1855 return (NULL); 1856 if ((so = fp->f_data) == NULL) 1857 return (NULL); 1858 if (so->so_proto->pr_domain != &localdomain) 1859 return (NULL); 1860 return sotounpcb(so); 1861 } 1862 1863 static void 1864 unp_discard(struct file *fp) 1865 { 1866 1867 unp_externalize_fp(fp); 1868 (void) closef(fp, (struct thread *)NULL); 1869 } 1870 1871 static void 1872 unp_internalize_fp(struct file *fp) 1873 { 1874 struct unpcb *unp; 1875 1876 UNP_LINK_WLOCK(); 1877 if ((unp = fptounp(fp)) != NULL) { 1878 unp->unp_file = fp; 1879 unp->unp_msgcount++; 1880 } 1881 fhold(fp); 1882 unp_rights++; 1883 UNP_LINK_WUNLOCK(); 1884 } 1885 1886 static void 1887 unp_externalize_fp(struct file *fp) 1888 { 1889 struct unpcb *unp; 1890 1891 UNP_LINK_WLOCK(); 1892 if ((unp = fptounp(fp)) != NULL) 1893 unp->unp_msgcount--; 1894 unp_rights--; 1895 UNP_LINK_WUNLOCK(); 1896 } 1897 1898 /* 1899 * unp_defer indicates whether additional work has been defered for a future 1900 * pass through unp_gc(). It is thread local and does not require explicit 1901 * synchronization. 1902 */ 1903 static int unp_marked; 1904 static int unp_unreachable; 1905 1906 static void 1907 unp_accessable(struct file *fp) 1908 { 1909 struct unpcb *unp; 1910 1911 if ((unp = fptounp(fp)) == NULL) 1912 return; 1913 if (unp->unp_gcflag & UNPGC_REF) 1914 return; 1915 unp->unp_gcflag &= ~UNPGC_DEAD; 1916 unp->unp_gcflag |= UNPGC_REF; 1917 unp_marked++; 1918 } 1919 1920 static void 1921 unp_gc_process(struct unpcb *unp) 1922 { 1923 struct socket *soa; 1924 struct socket *so; 1925 struct file *fp; 1926 1927 /* Already processed. */ 1928 if (unp->unp_gcflag & UNPGC_SCANNED) 1929 return; 1930 fp = unp->unp_file; 1931 1932 /* 1933 * Check for a socket potentially in a cycle. It must be in a 1934 * queue as indicated by msgcount, and this must equal the file 1935 * reference count. Note that when msgcount is 0 the file is NULL. 1936 */ 1937 if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp && 1938 unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) { 1939 unp->unp_gcflag |= UNPGC_DEAD; 1940 unp_unreachable++; 1941 return; 1942 } 1943 1944 /* 1945 * Mark all sockets we reference with RIGHTS. 1946 */ 1947 so = unp->unp_socket; 1948 SOCKBUF_LOCK(&so->so_rcv); 1949 unp_scan(so->so_rcv.sb_mb, unp_accessable); 1950 SOCKBUF_UNLOCK(&so->so_rcv); 1951 1952 /* 1953 * Mark all sockets in our accept queue. 1954 */ 1955 ACCEPT_LOCK(); 1956 TAILQ_FOREACH(soa, &so->so_comp, so_list) { 1957 SOCKBUF_LOCK(&soa->so_rcv); 1958 unp_scan(soa->so_rcv.sb_mb, unp_accessable); 1959 SOCKBUF_UNLOCK(&soa->so_rcv); 1960 } 1961 ACCEPT_UNLOCK(); 1962 unp->unp_gcflag |= UNPGC_SCANNED; 1963 } 1964 1965 static int unp_recycled; 1966 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 1967 "Number of unreachable sockets claimed by the garbage collector."); 1968 1969 static int unp_taskcount; 1970 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 1971 "Number of times the garbage collector has run."); 1972 1973 static void 1974 unp_gc(__unused void *arg, int pending) 1975 { 1976 struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL }; 1977 struct unp_head **head; 1978 struct file **unref; 1979 struct unpcb *unp; 1980 int i; 1981 1982 unp_taskcount++; 1983 UNP_LIST_LOCK(); 1984 /* 1985 * First clear all gc flags from previous runs. 1986 */ 1987 for (head = heads; *head != NULL; head++) 1988 LIST_FOREACH(unp, *head, unp_link) 1989 unp->unp_gcflag = 0; 1990 1991 /* 1992 * Scan marking all reachable sockets with UNPGC_REF. Once a socket 1993 * is reachable all of the sockets it references are reachable. 1994 * Stop the scan once we do a complete loop without discovering 1995 * a new reachable socket. 1996 */ 1997 do { 1998 unp_unreachable = 0; 1999 unp_marked = 0; 2000 for (head = heads; *head != NULL; head++) 2001 LIST_FOREACH(unp, *head, unp_link) 2002 unp_gc_process(unp); 2003 } while (unp_marked); 2004 UNP_LIST_UNLOCK(); 2005 if (unp_unreachable == 0) 2006 return; 2007 2008 /* 2009 * Allocate space for a local list of dead unpcbs. 2010 */ 2011 unref = malloc(unp_unreachable * sizeof(struct file *), 2012 M_TEMP, M_WAITOK); 2013 2014 /* 2015 * Iterate looking for sockets which have been specifically marked 2016 * as as unreachable and store them locally. 2017 */ 2018 UNP_LIST_LOCK(); 2019 for (i = 0, head = heads; *head != NULL; head++) 2020 LIST_FOREACH(unp, *head, unp_link) 2021 if (unp->unp_gcflag & UNPGC_DEAD) { 2022 unref[i++] = unp->unp_file; 2023 fhold(unp->unp_file); 2024 KASSERT(unp->unp_file != NULL, 2025 ("unp_gc: Invalid unpcb.")); 2026 KASSERT(i <= unp_unreachable, 2027 ("unp_gc: incorrect unreachable count.")); 2028 } 2029 UNP_LIST_UNLOCK(); 2030 2031 /* 2032 * Now flush all sockets, free'ing rights. This will free the 2033 * struct files associated with these sockets but leave each socket 2034 * with one remaining ref. 2035 */ 2036 for (i = 0; i < unp_unreachable; i++) 2037 sorflush(unref[i]->f_data); 2038 2039 /* 2040 * And finally release the sockets so they can be reclaimed. 2041 */ 2042 for (i = 0; i < unp_unreachable; i++) 2043 fdrop(unref[i], NULL); 2044 unp_recycled += unp_unreachable; 2045 free(unref, M_TEMP); 2046 } 2047 2048 static void 2049 unp_dispose(struct mbuf *m) 2050 { 2051 2052 if (m) 2053 unp_scan(m, unp_discard); 2054 } 2055 2056 static void 2057 unp_scan(struct mbuf *m0, void (*op)(struct file *)) 2058 { 2059 struct mbuf *m; 2060 struct file **rp; 2061 struct cmsghdr *cm; 2062 void *data; 2063 int i; 2064 socklen_t clen, datalen; 2065 int qfds; 2066 2067 while (m0 != NULL) { 2068 for (m = m0; m; m = m->m_next) { 2069 if (m->m_type != MT_CONTROL) 2070 continue; 2071 2072 cm = mtod(m, struct cmsghdr *); 2073 clen = m->m_len; 2074 2075 while (cm != NULL) { 2076 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 2077 break; 2078 2079 data = CMSG_DATA(cm); 2080 datalen = (caddr_t)cm + cm->cmsg_len 2081 - (caddr_t)data; 2082 2083 if (cm->cmsg_level == SOL_SOCKET && 2084 cm->cmsg_type == SCM_RIGHTS) { 2085 qfds = datalen / sizeof (struct file *); 2086 rp = data; 2087 for (i = 0; i < qfds; i++) 2088 (*op)(*rp++); 2089 } 2090 2091 if (CMSG_SPACE(datalen) < clen) { 2092 clen -= CMSG_SPACE(datalen); 2093 cm = (struct cmsghdr *) 2094 ((caddr_t)cm + CMSG_SPACE(datalen)); 2095 } else { 2096 clen = 0; 2097 cm = NULL; 2098 } 2099 } 2100 } 2101 m0 = m0->m_act; 2102 } 2103 } 2104 2105 #ifdef DDB 2106 static void 2107 db_print_indent(int indent) 2108 { 2109 int i; 2110 2111 for (i = 0; i < indent; i++) 2112 db_printf(" "); 2113 } 2114 2115 static void 2116 db_print_unpflags(int unp_flags) 2117 { 2118 int comma; 2119 2120 comma = 0; 2121 if (unp_flags & UNP_HAVEPC) { 2122 db_printf("%sUNP_HAVEPC", comma ? ", " : ""); 2123 comma = 1; 2124 } 2125 if (unp_flags & UNP_HAVEPCCACHED) { 2126 db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : ""); 2127 comma = 1; 2128 } 2129 if (unp_flags & UNP_WANTCRED) { 2130 db_printf("%sUNP_WANTCRED", comma ? ", " : ""); 2131 comma = 1; 2132 } 2133 if (unp_flags & UNP_CONNWAIT) { 2134 db_printf("%sUNP_CONNWAIT", comma ? ", " : ""); 2135 comma = 1; 2136 } 2137 if (unp_flags & UNP_CONNECTING) { 2138 db_printf("%sUNP_CONNECTING", comma ? ", " : ""); 2139 comma = 1; 2140 } 2141 if (unp_flags & UNP_BINDING) { 2142 db_printf("%sUNP_BINDING", comma ? ", " : ""); 2143 comma = 1; 2144 } 2145 } 2146 2147 static void 2148 db_print_xucred(int indent, struct xucred *xu) 2149 { 2150 int comma, i; 2151 2152 db_print_indent(indent); 2153 db_printf("cr_version: %u cr_uid: %u cr_ngroups: %d\n", 2154 xu->cr_version, xu->cr_uid, xu->cr_ngroups); 2155 db_print_indent(indent); 2156 db_printf("cr_groups: "); 2157 comma = 0; 2158 for (i = 0; i < xu->cr_ngroups; i++) { 2159 db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]); 2160 comma = 1; 2161 } 2162 db_printf("\n"); 2163 } 2164 2165 static void 2166 db_print_unprefs(int indent, struct unp_head *uh) 2167 { 2168 struct unpcb *unp; 2169 int counter; 2170 2171 counter = 0; 2172 LIST_FOREACH(unp, uh, unp_reflink) { 2173 if (counter % 4 == 0) 2174 db_print_indent(indent); 2175 db_printf("%p ", unp); 2176 if (counter % 4 == 3) 2177 db_printf("\n"); 2178 counter++; 2179 } 2180 if (counter != 0 && counter % 4 != 0) 2181 db_printf("\n"); 2182 } 2183 2184 DB_SHOW_COMMAND(unpcb, db_show_unpcb) 2185 { 2186 struct unpcb *unp; 2187 2188 if (!have_addr) { 2189 db_printf("usage: show unpcb <addr>\n"); 2190 return; 2191 } 2192 unp = (struct unpcb *)addr; 2193 2194 db_printf("unp_socket: %p unp_vnode: %p\n", unp->unp_socket, 2195 unp->unp_vnode); 2196 2197 db_printf("unp_ino: %d unp_conn: %p\n", unp->unp_ino, 2198 unp->unp_conn); 2199 2200 db_printf("unp_refs:\n"); 2201 db_print_unprefs(2, &unp->unp_refs); 2202 2203 /* XXXRW: Would be nice to print the full address, if any. */ 2204 db_printf("unp_addr: %p\n", unp->unp_addr); 2205 2206 db_printf("unp_cc: %d unp_mbcnt: %d unp_gencnt: %llu\n", 2207 unp->unp_cc, unp->unp_mbcnt, 2208 (unsigned long long)unp->unp_gencnt); 2209 2210 db_printf("unp_flags: %x (", unp->unp_flags); 2211 db_print_unpflags(unp->unp_flags); 2212 db_printf(")\n"); 2213 2214 db_printf("unp_peercred:\n"); 2215 db_print_xucred(2, &unp->unp_peercred); 2216 2217 db_printf("unp_refcount: %u\n", unp->unp_refcount); 2218 } 2219 #endif 2220