1 /*- 2 * Copyright (c) 1982, 1986, 1988, 1990, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2004 The FreeBSD Foundation 5 * Copyright (c) 2004-2008 Robert N. M. Watson 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 33 */ 34 35 /* 36 * Comments on the socket life cycle: 37 * 38 * soalloc() sets of socket layer state for a socket, called only by 39 * socreate() and sonewconn(). Socket layer private. 40 * 41 * sodealloc() tears down socket layer state for a socket, called only by 42 * sofree() and sonewconn(). Socket layer private. 43 * 44 * pru_attach() associates protocol layer state with an allocated socket; 45 * called only once, may fail, aborting socket allocation. This is called 46 * from socreate() and sonewconn(). Socket layer private. 47 * 48 * pru_detach() disassociates protocol layer state from an attached socket, 49 * and will be called exactly once for sockets in which pru_attach() has 50 * been successfully called. If pru_attach() returned an error, 51 * pru_detach() will not be called. Socket layer private. 52 * 53 * pru_abort() and pru_close() notify the protocol layer that the last 54 * consumer of a socket is starting to tear down the socket, and that the 55 * protocol should terminate the connection. Historically, pru_abort() also 56 * detached protocol state from the socket state, but this is no longer the 57 * case. 58 * 59 * socreate() creates a socket and attaches protocol state. This is a public 60 * interface that may be used by socket layer consumers to create new 61 * sockets. 62 * 63 * sonewconn() creates a socket and attaches protocol state. This is a 64 * public interface that may be used by protocols to create new sockets when 65 * a new connection is received and will be available for accept() on a 66 * listen socket. 67 * 68 * soclose() destroys a socket after possibly waiting for it to disconnect. 69 * This is a public interface that socket consumers should use to close and 70 * release a socket when done with it. 71 * 72 * soabort() destroys a socket without waiting for it to disconnect (used 73 * only for incoming connections that are already partially or fully 74 * connected). This is used internally by the socket layer when clearing 75 * listen socket queues (due to overflow or close on the listen socket), but 76 * is also a public interface protocols may use to abort connections in 77 * their incomplete listen queues should they no longer be required. Sockets 78 * placed in completed connection listen queues should not be aborted for 79 * reasons described in the comment above the soclose() implementation. This 80 * is not a general purpose close routine, and except in the specific 81 * circumstances described here, should not be used. 82 * 83 * sofree() will free a socket and its protocol state if all references on 84 * the socket have been released, and is the public interface to attempt to 85 * free a socket when a reference is removed. This is a socket layer private 86 * interface. 87 * 88 * NOTE: In addition to socreate() and soclose(), which provide a single 89 * socket reference to the consumer to be managed as required, there are two 90 * calls to explicitly manage socket references, soref(), and sorele(). 91 * Currently, these are generally required only when transitioning a socket 92 * from a listen queue to a file descriptor, in order to prevent garbage 93 * collection of the socket at an untimely moment. For a number of reasons, 94 * these interfaces are not preferred, and should be avoided. 95 */ 96 97 #include <sys/cdefs.h> 98 __FBSDID("$FreeBSD$"); 99 100 #include "opt_inet.h" 101 #include "opt_inet6.h" 102 #include "opt_mac.h" 103 #include "opt_zero.h" 104 #include "opt_compat.h" 105 106 #include <sys/param.h> 107 #include <sys/systm.h> 108 #include <sys/fcntl.h> 109 #include <sys/limits.h> 110 #include <sys/lock.h> 111 #include <sys/mac.h> 112 #include <sys/malloc.h> 113 #include <sys/mbuf.h> 114 #include <sys/mutex.h> 115 #include <sys/domain.h> 116 #include <sys/file.h> /* for struct knote */ 117 #include <sys/kernel.h> 118 #include <sys/event.h> 119 #include <sys/eventhandler.h> 120 #include <sys/poll.h> 121 #include <sys/proc.h> 122 #include <sys/protosw.h> 123 #include <sys/socket.h> 124 #include <sys/socketvar.h> 125 #include <sys/resourcevar.h> 126 #include <net/route.h> 127 #include <sys/signalvar.h> 128 #include <sys/stat.h> 129 #include <sys/sx.h> 130 #include <sys/sysctl.h> 131 #include <sys/uio.h> 132 #include <sys/jail.h> 133 134 #include <security/mac/mac_framework.h> 135 136 #include <vm/uma.h> 137 138 #ifdef COMPAT_IA32 139 #include <sys/mount.h> 140 #include <sys/sysent.h> 141 #include <compat/freebsd32/freebsd32.h> 142 #endif 143 144 static int soreceive_rcvoob(struct socket *so, struct uio *uio, 145 int flags); 146 147 static void filt_sordetach(struct knote *kn); 148 static int filt_soread(struct knote *kn, long hint); 149 static void filt_sowdetach(struct knote *kn); 150 static int filt_sowrite(struct knote *kn, long hint); 151 static int filt_solisten(struct knote *kn, long hint); 152 153 static struct filterops solisten_filtops = 154 { 1, NULL, filt_sordetach, filt_solisten }; 155 static struct filterops soread_filtops = 156 { 1, NULL, filt_sordetach, filt_soread }; 157 static struct filterops sowrite_filtops = 158 { 1, NULL, filt_sowdetach, filt_sowrite }; 159 160 uma_zone_t socket_zone; 161 so_gen_t so_gencnt; /* generation count for sockets */ 162 163 int maxsockets; 164 165 MALLOC_DEFINE(M_SONAME, "soname", "socket name"); 166 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); 167 168 static int somaxconn = SOMAXCONN; 169 static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS); 170 /* XXX: we dont have SYSCTL_USHORT */ 171 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW, 172 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection " 173 "queue size"); 174 static int numopensockets; 175 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, 176 &numopensockets, 0, "Number of open sockets"); 177 #ifdef ZERO_COPY_SOCKETS 178 /* These aren't static because they're used in other files. */ 179 int so_zero_copy_send = 1; 180 int so_zero_copy_receive = 1; 181 SYSCTL_NODE(_kern_ipc, OID_AUTO, zero_copy, CTLFLAG_RD, 0, 182 "Zero copy controls"); 183 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, receive, CTLFLAG_RW, 184 &so_zero_copy_receive, 0, "Enable zero copy receive"); 185 SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO, send, CTLFLAG_RW, 186 &so_zero_copy_send, 0, "Enable zero copy send"); 187 #endif /* ZERO_COPY_SOCKETS */ 188 189 /* 190 * accept_mtx locks down per-socket fields relating to accept queues. See 191 * socketvar.h for an annotation of the protected fields of struct socket. 192 */ 193 struct mtx accept_mtx; 194 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF); 195 196 /* 197 * so_global_mtx protects so_gencnt, numopensockets, and the per-socket 198 * so_gencnt field. 199 */ 200 static struct mtx so_global_mtx; 201 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); 202 203 /* 204 * General IPC sysctl name space, used by sockets and a variety of other IPC 205 * types. 206 */ 207 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); 208 209 /* 210 * Sysctl to get and set the maximum global sockets limit. Notify protocols 211 * of the change so that they can update their dependent limits as required. 212 */ 213 static int 214 sysctl_maxsockets(SYSCTL_HANDLER_ARGS) 215 { 216 int error, newmaxsockets; 217 218 newmaxsockets = maxsockets; 219 error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); 220 if (error == 0 && req->newptr) { 221 if (newmaxsockets > maxsockets) { 222 maxsockets = newmaxsockets; 223 if (maxsockets > ((maxfiles / 4) * 3)) { 224 maxfiles = (maxsockets * 5) / 4; 225 maxfilesperproc = (maxfiles * 9) / 10; 226 } 227 EVENTHANDLER_INVOKE(maxsockets_change); 228 } else 229 error = EINVAL; 230 } 231 return (error); 232 } 233 234 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW, 235 &maxsockets, 0, sysctl_maxsockets, "IU", 236 "Maximum number of sockets avaliable"); 237 238 /* 239 * Initialise maxsockets. This SYSINIT must be run after 240 * tunable_mbinit(). 241 */ 242 static void 243 init_maxsockets(void *ignored) 244 { 245 246 TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); 247 maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); 248 } 249 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); 250 251 /* 252 * Socket operation routines. These routines are called by the routines in 253 * sys_socket.c or from a system process, and implement the semantics of 254 * socket operations by switching out to the protocol specific routines. 255 */ 256 257 /* 258 * Get a socket structure from our zone, and initialize it. Note that it 259 * would probably be better to allocate socket and PCB at the same time, but 260 * I'm not convinced that all the protocols can be easily modified to do 261 * this. 262 * 263 * soalloc() returns a socket with a ref count of 0. 264 */ 265 static struct socket * 266 soalloc(void) 267 { 268 struct socket *so; 269 270 so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO); 271 if (so == NULL) 272 return (NULL); 273 #ifdef MAC 274 if (mac_socket_init(so, M_NOWAIT) != 0) { 275 uma_zfree(socket_zone, so); 276 return (NULL); 277 } 278 #endif 279 SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd"); 280 SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv"); 281 sx_init(&so->so_snd.sb_sx, "so_snd_sx"); 282 sx_init(&so->so_rcv.sb_sx, "so_rcv_sx"); 283 TAILQ_INIT(&so->so_aiojobq); 284 mtx_lock(&so_global_mtx); 285 so->so_gencnt = ++so_gencnt; 286 ++numopensockets; 287 mtx_unlock(&so_global_mtx); 288 return (so); 289 } 290 291 /* 292 * Free the storage associated with a socket at the socket layer, tear down 293 * locks, labels, etc. All protocol state is assumed already to have been 294 * torn down (and possibly never set up) by the caller. 295 */ 296 static void 297 sodealloc(struct socket *so) 298 { 299 300 KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count)); 301 KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL")); 302 303 mtx_lock(&so_global_mtx); 304 so->so_gencnt = ++so_gencnt; 305 --numopensockets; /* Could be below, but faster here. */ 306 mtx_unlock(&so_global_mtx); 307 if (so->so_rcv.sb_hiwat) 308 (void)chgsbsize(so->so_cred->cr_uidinfo, 309 &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); 310 if (so->so_snd.sb_hiwat) 311 (void)chgsbsize(so->so_cred->cr_uidinfo, 312 &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); 313 #ifdef INET 314 /* remove acccept filter if one is present. */ 315 if (so->so_accf != NULL) 316 do_setopt_accept_filter(so, NULL); 317 #endif 318 #ifdef MAC 319 mac_socket_destroy(so); 320 #endif 321 crfree(so->so_cred); 322 sx_destroy(&so->so_snd.sb_sx); 323 sx_destroy(&so->so_rcv.sb_sx); 324 SOCKBUF_LOCK_DESTROY(&so->so_snd); 325 SOCKBUF_LOCK_DESTROY(&so->so_rcv); 326 uma_zfree(socket_zone, so); 327 } 328 329 /* 330 * socreate returns a socket with a ref count of 1. The socket should be 331 * closed with soclose(). 332 */ 333 int 334 socreate(int dom, struct socket **aso, int type, int proto, 335 struct ucred *cred, struct thread *td) 336 { 337 struct protosw *prp; 338 struct socket *so; 339 int error; 340 341 if (proto) 342 prp = pffindproto(dom, proto, type); 343 else 344 prp = pffindtype(dom, type); 345 346 if (prp == NULL || prp->pr_usrreqs->pru_attach == NULL || 347 prp->pr_usrreqs->pru_attach == pru_attach_notsupp) 348 return (EPROTONOSUPPORT); 349 350 if (jailed(cred) && jail_socket_unixiproute_only && 351 prp->pr_domain->dom_family != PF_LOCAL && 352 prp->pr_domain->dom_family != PF_INET && 353 #ifdef INET6 354 prp->pr_domain->dom_family != PF_INET6 && 355 #endif 356 prp->pr_domain->dom_family != PF_ROUTE) { 357 return (EPROTONOSUPPORT); 358 } 359 360 if (prp->pr_type != type) 361 return (EPROTOTYPE); 362 so = soalloc(); 363 if (so == NULL) 364 return (ENOBUFS); 365 366 TAILQ_INIT(&so->so_incomp); 367 TAILQ_INIT(&so->so_comp); 368 so->so_type = type; 369 so->so_cred = crhold(cred); 370 if ((prp->pr_domain->dom_family == PF_INET) || 371 (prp->pr_domain->dom_family == PF_ROUTE)) 372 so->so_fibnum = td->td_proc->p_fibnum; 373 else 374 so->so_fibnum = 0; 375 so->so_proto = prp; 376 #ifdef MAC 377 mac_socket_create(cred, so); 378 #endif 379 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 380 NULL, NULL, NULL); 381 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 382 NULL, NULL, NULL); 383 so->so_count = 1; 384 /* 385 * Auto-sizing of socket buffers is managed by the protocols and 386 * the appropriate flags must be set in the pru_attach function. 387 */ 388 error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); 389 if (error) { 390 KASSERT(so->so_count == 1, ("socreate: so_count %d", 391 so->so_count)); 392 so->so_count = 0; 393 sodealloc(so); 394 return (error); 395 } 396 *aso = so; 397 return (0); 398 } 399 400 #ifdef REGRESSION 401 static int regression_sonewconn_earlytest = 1; 402 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, 403 ®ression_sonewconn_earlytest, 0, "Perform early sonewconn limit test"); 404 #endif 405 406 /* 407 * When an attempt at a new connection is noted on a socket which accepts 408 * connections, sonewconn is called. If the connection is possible (subject 409 * to space constraints, etc.) then we allocate a new structure, propoerly 410 * linked into the data structure of the original socket, and return this. 411 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 412 * 413 * Note: the ref count on the socket is 0 on return. 414 */ 415 struct socket * 416 sonewconn(struct socket *head, int connstatus) 417 { 418 struct socket *so; 419 int over; 420 421 ACCEPT_LOCK(); 422 over = (head->so_qlen > 3 * head->so_qlimit / 2); 423 ACCEPT_UNLOCK(); 424 #ifdef REGRESSION 425 if (regression_sonewconn_earlytest && over) 426 #else 427 if (over) 428 #endif 429 return (NULL); 430 so = soalloc(); 431 if (so == NULL) 432 return (NULL); 433 if ((head->so_options & SO_ACCEPTFILTER) != 0) 434 connstatus = 0; 435 so->so_head = head; 436 so->so_type = head->so_type; 437 so->so_options = head->so_options &~ SO_ACCEPTCONN; 438 so->so_linger = head->so_linger; 439 so->so_state = head->so_state | SS_NOFDREF; 440 so->so_proto = head->so_proto; 441 so->so_cred = crhold(head->so_cred); 442 #ifdef MAC 443 SOCK_LOCK(head); 444 mac_socket_newconn(head, so); 445 SOCK_UNLOCK(head); 446 #endif 447 knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv), 448 NULL, NULL, NULL); 449 knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd), 450 NULL, NULL, NULL); 451 if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || 452 (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { 453 sodealloc(so); 454 return (NULL); 455 } 456 so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; 457 so->so_snd.sb_lowat = head->so_snd.sb_lowat; 458 so->so_rcv.sb_timeo = head->so_rcv.sb_timeo; 459 so->so_snd.sb_timeo = head->so_snd.sb_timeo; 460 so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE; 461 so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; 462 so->so_state |= connstatus; 463 ACCEPT_LOCK(); 464 if (connstatus) { 465 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 466 so->so_qstate |= SQ_COMP; 467 head->so_qlen++; 468 } else { 469 /* 470 * Keep removing sockets from the head until there's room for 471 * us to insert on the tail. In pre-locking revisions, this 472 * was a simple if(), but as we could be racing with other 473 * threads and soabort() requires dropping locks, we must 474 * loop waiting for the condition to be true. 475 */ 476 while (head->so_incqlen > head->so_qlimit) { 477 struct socket *sp; 478 sp = TAILQ_FIRST(&head->so_incomp); 479 TAILQ_REMOVE(&head->so_incomp, sp, so_list); 480 head->so_incqlen--; 481 sp->so_qstate &= ~SQ_INCOMP; 482 sp->so_head = NULL; 483 ACCEPT_UNLOCK(); 484 soabort(sp); 485 ACCEPT_LOCK(); 486 } 487 TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); 488 so->so_qstate |= SQ_INCOMP; 489 head->so_incqlen++; 490 } 491 ACCEPT_UNLOCK(); 492 if (connstatus) { 493 sorwakeup(head); 494 wakeup_one(&head->so_timeo); 495 } 496 return (so); 497 } 498 499 int 500 sobind(struct socket *so, struct sockaddr *nam, struct thread *td) 501 { 502 503 return ((*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td)); 504 } 505 506 /* 507 * solisten() transitions a socket from a non-listening state to a listening 508 * state, but can also be used to update the listen queue depth on an 509 * existing listen socket. The protocol will call back into the sockets 510 * layer using solisten_proto_check() and solisten_proto() to check and set 511 * socket-layer listen state. Call backs are used so that the protocol can 512 * acquire both protocol and socket layer locks in whatever order is required 513 * by the protocol. 514 * 515 * Protocol implementors are advised to hold the socket lock across the 516 * socket-layer test and set to avoid races at the socket layer. 517 */ 518 int 519 solisten(struct socket *so, int backlog, struct thread *td) 520 { 521 522 return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td)); 523 } 524 525 int 526 solisten_proto_check(struct socket *so) 527 { 528 529 SOCK_LOCK_ASSERT(so); 530 531 if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING | 532 SS_ISDISCONNECTING)) 533 return (EINVAL); 534 return (0); 535 } 536 537 void 538 solisten_proto(struct socket *so, int backlog) 539 { 540 541 SOCK_LOCK_ASSERT(so); 542 543 if (backlog < 0 || backlog > somaxconn) 544 backlog = somaxconn; 545 so->so_qlimit = backlog; 546 so->so_options |= SO_ACCEPTCONN; 547 } 548 549 /* 550 * Attempt to free a socket. This should really be sotryfree(). 551 * 552 * sofree() will succeed if: 553 * 554 * - There are no outstanding file descriptor references or related consumers 555 * (so_count == 0). 556 * 557 * - The socket has been closed by user space, if ever open (SS_NOFDREF). 558 * 559 * - The protocol does not have an outstanding strong reference on the socket 560 * (SS_PROTOREF). 561 * 562 * - The socket is not in a completed connection queue, so a process has been 563 * notified that it is present. If it is removed, the user process may 564 * block in accept() despite select() saying the socket was ready. 565 * 566 * Otherwise, it will quietly abort so that a future call to sofree(), when 567 * conditions are right, can succeed. 568 */ 569 void 570 sofree(struct socket *so) 571 { 572 struct protosw *pr = so->so_proto; 573 struct socket *head; 574 575 ACCEPT_LOCK_ASSERT(); 576 SOCK_LOCK_ASSERT(so); 577 578 if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 || 579 (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) { 580 SOCK_UNLOCK(so); 581 ACCEPT_UNLOCK(); 582 return; 583 } 584 585 head = so->so_head; 586 if (head != NULL) { 587 KASSERT((so->so_qstate & SQ_COMP) != 0 || 588 (so->so_qstate & SQ_INCOMP) != 0, 589 ("sofree: so_head != NULL, but neither SQ_COMP nor " 590 "SQ_INCOMP")); 591 KASSERT((so->so_qstate & SQ_COMP) == 0 || 592 (so->so_qstate & SQ_INCOMP) == 0, 593 ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP")); 594 TAILQ_REMOVE(&head->so_incomp, so, so_list); 595 head->so_incqlen--; 596 so->so_qstate &= ~SQ_INCOMP; 597 so->so_head = NULL; 598 } 599 KASSERT((so->so_qstate & SQ_COMP) == 0 && 600 (so->so_qstate & SQ_INCOMP) == 0, 601 ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)", 602 so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); 603 if (so->so_options & SO_ACCEPTCONN) { 604 KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated")); 605 KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated")); 606 } 607 SOCK_UNLOCK(so); 608 ACCEPT_UNLOCK(); 609 610 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) 611 (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); 612 if (pr->pr_usrreqs->pru_detach != NULL) 613 (*pr->pr_usrreqs->pru_detach)(so); 614 615 /* 616 * From this point on, we assume that no other references to this 617 * socket exist anywhere else in the stack. Therefore, no locks need 618 * to be acquired or held. 619 * 620 * We used to do a lot of socket buffer and socket locking here, as 621 * well as invoke sorflush() and perform wakeups. The direct call to 622 * dom_dispose() and sbrelease_internal() are an inlining of what was 623 * necessary from sorflush(). 624 * 625 * Notice that the socket buffer and kqueue state are torn down 626 * before calling pru_detach. This means that protocols shold not 627 * assume they can perform socket wakeups, etc, in their detach code. 628 */ 629 sbdestroy(&so->so_snd, so); 630 sbdestroy(&so->so_rcv, so); 631 knlist_destroy(&so->so_rcv.sb_sel.si_note); 632 knlist_destroy(&so->so_snd.sb_sel.si_note); 633 sodealloc(so); 634 } 635 636 /* 637 * Close a socket on last file table reference removal. Initiate disconnect 638 * if connected. Free socket when disconnect complete. 639 * 640 * This function will sorele() the socket. Note that soclose() may be called 641 * prior to the ref count reaching zero. The actual socket structure will 642 * not be freed until the ref count reaches zero. 643 */ 644 int 645 soclose(struct socket *so) 646 { 647 int error = 0; 648 649 KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter")); 650 651 funsetown(&so->so_sigio); 652 if (so->so_state & SS_ISCONNECTED) { 653 if ((so->so_state & SS_ISDISCONNECTING) == 0) { 654 error = sodisconnect(so); 655 if (error) 656 goto drop; 657 } 658 if (so->so_options & SO_LINGER) { 659 if ((so->so_state & SS_ISDISCONNECTING) && 660 (so->so_state & SS_NBIO)) 661 goto drop; 662 while (so->so_state & SS_ISCONNECTED) { 663 error = tsleep(&so->so_timeo, 664 PSOCK | PCATCH, "soclos", so->so_linger * hz); 665 if (error) 666 break; 667 } 668 } 669 } 670 671 drop: 672 if (so->so_proto->pr_usrreqs->pru_close != NULL) 673 (*so->so_proto->pr_usrreqs->pru_close)(so); 674 if (so->so_options & SO_ACCEPTCONN) { 675 struct socket *sp; 676 ACCEPT_LOCK(); 677 while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { 678 TAILQ_REMOVE(&so->so_incomp, sp, so_list); 679 so->so_incqlen--; 680 sp->so_qstate &= ~SQ_INCOMP; 681 sp->so_head = NULL; 682 ACCEPT_UNLOCK(); 683 soabort(sp); 684 ACCEPT_LOCK(); 685 } 686 while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { 687 TAILQ_REMOVE(&so->so_comp, sp, so_list); 688 so->so_qlen--; 689 sp->so_qstate &= ~SQ_COMP; 690 sp->so_head = NULL; 691 ACCEPT_UNLOCK(); 692 soabort(sp); 693 ACCEPT_LOCK(); 694 } 695 ACCEPT_UNLOCK(); 696 } 697 ACCEPT_LOCK(); 698 SOCK_LOCK(so); 699 KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); 700 so->so_state |= SS_NOFDREF; 701 sorele(so); 702 return (error); 703 } 704 705 /* 706 * soabort() is used to abruptly tear down a connection, such as when a 707 * resource limit is reached (listen queue depth exceeded), or if a listen 708 * socket is closed while there are sockets waiting to be accepted. 709 * 710 * This interface is tricky, because it is called on an unreferenced socket, 711 * and must be called only by a thread that has actually removed the socket 712 * from the listen queue it was on, or races with other threads are risked. 713 * 714 * This interface will call into the protocol code, so must not be called 715 * with any socket locks held. Protocols do call it while holding their own 716 * recursible protocol mutexes, but this is something that should be subject 717 * to review in the future. 718 */ 719 void 720 soabort(struct socket *so) 721 { 722 723 /* 724 * In as much as is possible, assert that no references to this 725 * socket are held. This is not quite the same as asserting that the 726 * current thread is responsible for arranging for no references, but 727 * is as close as we can get for now. 728 */ 729 KASSERT(so->so_count == 0, ("soabort: so_count")); 730 KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF")); 731 KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF")); 732 KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP")); 733 KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP")); 734 735 if (so->so_proto->pr_usrreqs->pru_abort != NULL) 736 (*so->so_proto->pr_usrreqs->pru_abort)(so); 737 ACCEPT_LOCK(); 738 SOCK_LOCK(so); 739 sofree(so); 740 } 741 742 int 743 soaccept(struct socket *so, struct sockaddr **nam) 744 { 745 int error; 746 747 SOCK_LOCK(so); 748 KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); 749 so->so_state &= ~SS_NOFDREF; 750 SOCK_UNLOCK(so); 751 error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); 752 return (error); 753 } 754 755 int 756 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) 757 { 758 int error; 759 760 if (so->so_options & SO_ACCEPTCONN) 761 return (EOPNOTSUPP); 762 /* 763 * If protocol is connection-based, can only connect once. 764 * Otherwise, if connected, try to disconnect first. This allows 765 * user to disconnect by connecting to, e.g., a null address. 766 */ 767 if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && 768 ((so->so_proto->pr_flags & PR_CONNREQUIRED) || 769 (error = sodisconnect(so)))) { 770 error = EISCONN; 771 } else { 772 /* 773 * Prevent accumulated error from previous connection from 774 * biting us. 775 */ 776 so->so_error = 0; 777 error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, td); 778 } 779 780 return (error); 781 } 782 783 int 784 soconnect2(struct socket *so1, struct socket *so2) 785 { 786 787 return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2)); 788 } 789 790 int 791 sodisconnect(struct socket *so) 792 { 793 int error; 794 795 if ((so->so_state & SS_ISCONNECTED) == 0) 796 return (ENOTCONN); 797 if (so->so_state & SS_ISDISCONNECTING) 798 return (EALREADY); 799 error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); 800 return (error); 801 } 802 803 #ifdef ZERO_COPY_SOCKETS 804 struct so_zerocopy_stats{ 805 int size_ok; 806 int align_ok; 807 int found_ifp; 808 }; 809 struct so_zerocopy_stats so_zerocp_stats = {0,0,0}; 810 #include <netinet/in.h> 811 #include <net/route.h> 812 #include <netinet/in_pcb.h> 813 #include <vm/vm.h> 814 #include <vm/vm_page.h> 815 #include <vm/vm_object.h> 816 817 /* 818 * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise 819 * sosend_dgram() and sosend_generic() use m_uiotombuf(). 820 * 821 * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or 822 * all of the data referenced by the uio. If desired, it uses zero-copy. 823 * *space will be updated to reflect data copied in. 824 * 825 * NB: If atomic I/O is requested, the caller must already have checked that 826 * space can hold resid bytes. 827 * 828 * NB: In the event of an error, the caller may need to free the partial 829 * chain pointed to by *mpp. The contents of both *uio and *space may be 830 * modified even in the case of an error. 831 */ 832 static int 833 sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, 834 int flags) 835 { 836 struct mbuf *m, **mp, *top; 837 long len, resid; 838 int error; 839 #ifdef ZERO_COPY_SOCKETS 840 int cow_send; 841 #endif 842 843 *retmp = top = NULL; 844 mp = ⊤ 845 len = 0; 846 resid = uio->uio_resid; 847 error = 0; 848 do { 849 #ifdef ZERO_COPY_SOCKETS 850 cow_send = 0; 851 #endif /* ZERO_COPY_SOCKETS */ 852 if (resid >= MINCLSIZE) { 853 #ifdef ZERO_COPY_SOCKETS 854 if (top == NULL) { 855 m = m_gethdr(M_WAITOK, MT_DATA); 856 m->m_pkthdr.len = 0; 857 m->m_pkthdr.rcvif = NULL; 858 } else 859 m = m_get(M_WAITOK, MT_DATA); 860 if (so_zero_copy_send && 861 resid>=PAGE_SIZE && 862 *space>=PAGE_SIZE && 863 uio->uio_iov->iov_len>=PAGE_SIZE) { 864 so_zerocp_stats.size_ok++; 865 so_zerocp_stats.align_ok++; 866 cow_send = socow_setup(m, uio); 867 len = cow_send; 868 } 869 if (!cow_send) { 870 m_clget(m, M_WAITOK); 871 len = min(min(MCLBYTES, resid), *space); 872 } 873 #else /* ZERO_COPY_SOCKETS */ 874 if (top == NULL) { 875 m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); 876 m->m_pkthdr.len = 0; 877 m->m_pkthdr.rcvif = NULL; 878 } else 879 m = m_getcl(M_WAIT, MT_DATA, 0); 880 len = min(min(MCLBYTES, resid), *space); 881 #endif /* ZERO_COPY_SOCKETS */ 882 } else { 883 if (top == NULL) { 884 m = m_gethdr(M_WAIT, MT_DATA); 885 m->m_pkthdr.len = 0; 886 m->m_pkthdr.rcvif = NULL; 887 888 len = min(min(MHLEN, resid), *space); 889 /* 890 * For datagram protocols, leave room 891 * for protocol headers in first mbuf. 892 */ 893 if (atomic && m && len < MHLEN) 894 MH_ALIGN(m, len); 895 } else { 896 m = m_get(M_WAIT, MT_DATA); 897 len = min(min(MLEN, resid), *space); 898 } 899 } 900 if (m == NULL) { 901 error = ENOBUFS; 902 goto out; 903 } 904 905 *space -= len; 906 #ifdef ZERO_COPY_SOCKETS 907 if (cow_send) 908 error = 0; 909 else 910 #endif /* ZERO_COPY_SOCKETS */ 911 error = uiomove(mtod(m, void *), (int)len, uio); 912 resid = uio->uio_resid; 913 m->m_len = len; 914 *mp = m; 915 top->m_pkthdr.len += len; 916 if (error) 917 goto out; 918 mp = &m->m_next; 919 if (resid <= 0) { 920 if (flags & MSG_EOR) 921 top->m_flags |= M_EOR; 922 break; 923 } 924 } while (*space > 0 && atomic); 925 out: 926 *retmp = top; 927 return (error); 928 } 929 #endif /*ZERO_COPY_SOCKETS*/ 930 931 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 932 933 int 934 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, 935 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 936 { 937 long space, resid; 938 int clen = 0, error, dontroute; 939 #ifdef ZERO_COPY_SOCKETS 940 int atomic = sosendallatonce(so) || top; 941 #endif 942 943 KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM")); 944 KASSERT(so->so_proto->pr_flags & PR_ATOMIC, 945 ("sodgram_send: !PR_ATOMIC")); 946 947 if (uio != NULL) 948 resid = uio->uio_resid; 949 else 950 resid = top->m_pkthdr.len; 951 /* 952 * In theory resid should be unsigned. However, space must be 953 * signed, as it might be less than 0 if we over-committed, and we 954 * must use a signed comparison of space and resid. On the other 955 * hand, a negative resid causes us to loop sending 0-length 956 * segments to the protocol. 957 * 958 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 959 * type sockets since that's an error. 960 */ 961 if (resid < 0) { 962 error = EINVAL; 963 goto out; 964 } 965 966 dontroute = 967 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; 968 if (td != NULL) 969 td->td_ru.ru_msgsnd++; 970 if (control != NULL) 971 clen = control->m_len; 972 973 SOCKBUF_LOCK(&so->so_snd); 974 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 975 SOCKBUF_UNLOCK(&so->so_snd); 976 error = EPIPE; 977 goto out; 978 } 979 if (so->so_error) { 980 error = so->so_error; 981 so->so_error = 0; 982 SOCKBUF_UNLOCK(&so->so_snd); 983 goto out; 984 } 985 if ((so->so_state & SS_ISCONNECTED) == 0) { 986 /* 987 * `sendto' and `sendmsg' is allowed on a connection-based 988 * socket if it supports implied connect. Return ENOTCONN if 989 * not connected and no address is supplied. 990 */ 991 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 992 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 993 if ((so->so_state & SS_ISCONFIRMING) == 0 && 994 !(resid == 0 && clen != 0)) { 995 SOCKBUF_UNLOCK(&so->so_snd); 996 error = ENOTCONN; 997 goto out; 998 } 999 } else if (addr == NULL) { 1000 if (so->so_proto->pr_flags & PR_CONNREQUIRED) 1001 error = ENOTCONN; 1002 else 1003 error = EDESTADDRREQ; 1004 SOCKBUF_UNLOCK(&so->so_snd); 1005 goto out; 1006 } 1007 } 1008 1009 /* 1010 * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a 1011 * problem and need fixing. 1012 */ 1013 space = sbspace(&so->so_snd); 1014 if (flags & MSG_OOB) 1015 space += 1024; 1016 space -= clen; 1017 SOCKBUF_UNLOCK(&so->so_snd); 1018 if (resid > space) { 1019 error = EMSGSIZE; 1020 goto out; 1021 } 1022 if (uio == NULL) { 1023 resid = 0; 1024 if (flags & MSG_EOR) 1025 top->m_flags |= M_EOR; 1026 } else { 1027 #ifdef ZERO_COPY_SOCKETS 1028 error = sosend_copyin(uio, &top, atomic, &space, flags); 1029 if (error) 1030 goto out; 1031 #else 1032 /* 1033 * Copy the data from userland into a mbuf chain. 1034 * If no data is to be copied in, a single empty mbuf 1035 * is returned. 1036 */ 1037 top = m_uiotombuf(uio, M_WAITOK, space, max_hdr, 1038 (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0))); 1039 if (top == NULL) { 1040 error = EFAULT; /* only possible error */ 1041 goto out; 1042 } 1043 space -= resid - uio->uio_resid; 1044 #endif 1045 resid = uio->uio_resid; 1046 } 1047 KASSERT(resid == 0, ("sosend_dgram: resid != 0")); 1048 /* 1049 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock 1050 * than with. 1051 */ 1052 if (dontroute) { 1053 SOCK_LOCK(so); 1054 so->so_options |= SO_DONTROUTE; 1055 SOCK_UNLOCK(so); 1056 } 1057 /* 1058 * XXX all the SBS_CANTSENDMORE checks previously done could be out 1059 * of date. We could have recieved a reset packet in an interrupt or 1060 * maybe we slept while doing page faults in uiomove() etc. We could 1061 * probably recheck again inside the locking protection here, but 1062 * there are probably other places that this also happens. We must 1063 * rethink this. 1064 */ 1065 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 1066 (flags & MSG_OOB) ? PRUS_OOB : 1067 /* 1068 * If the user set MSG_EOF, the protocol understands this flag and 1069 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND. 1070 */ 1071 ((flags & MSG_EOF) && 1072 (so->so_proto->pr_flags & PR_IMPLOPCL) && 1073 (resid <= 0)) ? 1074 PRUS_EOF : 1075 /* If there is more to send set PRUS_MORETOCOME */ 1076 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 1077 top, addr, control, td); 1078 if (dontroute) { 1079 SOCK_LOCK(so); 1080 so->so_options &= ~SO_DONTROUTE; 1081 SOCK_UNLOCK(so); 1082 } 1083 clen = 0; 1084 control = NULL; 1085 top = NULL; 1086 out: 1087 if (top != NULL) 1088 m_freem(top); 1089 if (control != NULL) 1090 m_freem(control); 1091 return (error); 1092 } 1093 1094 /* 1095 * Send on a socket. If send must go all at once and message is larger than 1096 * send buffering, then hard error. Lock against other senders. If must go 1097 * all at once and not enough room now, then inform user that this would 1098 * block and do nothing. Otherwise, if nonblocking, send as much as 1099 * possible. The data to be sent is described by "uio" if nonzero, otherwise 1100 * by the mbuf chain "top" (which must be null if uio is not). Data provided 1101 * in mbuf chain must be small enough to send all at once. 1102 * 1103 * Returns nonzero on error, timeout or signal; callers must check for short 1104 * counts if EINTR/ERESTART are returned. Data and control buffers are freed 1105 * on return. 1106 */ 1107 int 1108 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, 1109 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1110 { 1111 long space, resid; 1112 int clen = 0, error, dontroute; 1113 int atomic = sosendallatonce(so) || top; 1114 1115 if (uio != NULL) 1116 resid = uio->uio_resid; 1117 else 1118 resid = top->m_pkthdr.len; 1119 /* 1120 * In theory resid should be unsigned. However, space must be 1121 * signed, as it might be less than 0 if we over-committed, and we 1122 * must use a signed comparison of space and resid. On the other 1123 * hand, a negative resid causes us to loop sending 0-length 1124 * segments to the protocol. 1125 * 1126 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM 1127 * type sockets since that's an error. 1128 */ 1129 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { 1130 error = EINVAL; 1131 goto out; 1132 } 1133 1134 dontroute = 1135 (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && 1136 (so->so_proto->pr_flags & PR_ATOMIC); 1137 if (td != NULL) 1138 td->td_ru.ru_msgsnd++; 1139 if (control != NULL) 1140 clen = control->m_len; 1141 1142 error = sblock(&so->so_snd, SBLOCKWAIT(flags)); 1143 if (error) 1144 goto out; 1145 1146 restart: 1147 do { 1148 SOCKBUF_LOCK(&so->so_snd); 1149 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1150 SOCKBUF_UNLOCK(&so->so_snd); 1151 error = EPIPE; 1152 goto release; 1153 } 1154 if (so->so_error) { 1155 error = so->so_error; 1156 so->so_error = 0; 1157 SOCKBUF_UNLOCK(&so->so_snd); 1158 goto release; 1159 } 1160 if ((so->so_state & SS_ISCONNECTED) == 0) { 1161 /* 1162 * `sendto' and `sendmsg' is allowed on a connection- 1163 * based socket if it supports implied connect. 1164 * Return ENOTCONN if not connected and no address is 1165 * supplied. 1166 */ 1167 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && 1168 (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { 1169 if ((so->so_state & SS_ISCONFIRMING) == 0 && 1170 !(resid == 0 && clen != 0)) { 1171 SOCKBUF_UNLOCK(&so->so_snd); 1172 error = ENOTCONN; 1173 goto release; 1174 } 1175 } else if (addr == NULL) { 1176 SOCKBUF_UNLOCK(&so->so_snd); 1177 if (so->so_proto->pr_flags & PR_CONNREQUIRED) 1178 error = ENOTCONN; 1179 else 1180 error = EDESTADDRREQ; 1181 goto release; 1182 } 1183 } 1184 space = sbspace(&so->so_snd); 1185 if (flags & MSG_OOB) 1186 space += 1024; 1187 if ((atomic && resid > so->so_snd.sb_hiwat) || 1188 clen > so->so_snd.sb_hiwat) { 1189 SOCKBUF_UNLOCK(&so->so_snd); 1190 error = EMSGSIZE; 1191 goto release; 1192 } 1193 if (space < resid + clen && 1194 (atomic || space < so->so_snd.sb_lowat || space < clen)) { 1195 if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) { 1196 SOCKBUF_UNLOCK(&so->so_snd); 1197 error = EWOULDBLOCK; 1198 goto release; 1199 } 1200 error = sbwait(&so->so_snd); 1201 SOCKBUF_UNLOCK(&so->so_snd); 1202 if (error) 1203 goto release; 1204 goto restart; 1205 } 1206 SOCKBUF_UNLOCK(&so->so_snd); 1207 space -= clen; 1208 do { 1209 if (uio == NULL) { 1210 resid = 0; 1211 if (flags & MSG_EOR) 1212 top->m_flags |= M_EOR; 1213 } else { 1214 #ifdef ZERO_COPY_SOCKETS 1215 error = sosend_copyin(uio, &top, atomic, 1216 &space, flags); 1217 if (error != 0) 1218 goto release; 1219 #else 1220 /* 1221 * Copy the data from userland into a mbuf 1222 * chain. If no data is to be copied in, 1223 * a single empty mbuf is returned. 1224 */ 1225 top = m_uiotombuf(uio, M_WAITOK, space, 1226 (atomic ? max_hdr : 0), 1227 (atomic ? M_PKTHDR : 0) | 1228 ((flags & MSG_EOR) ? M_EOR : 0)); 1229 if (top == NULL) { 1230 error = EFAULT; /* only possible error */ 1231 goto release; 1232 } 1233 space -= resid - uio->uio_resid; 1234 #endif 1235 resid = uio->uio_resid; 1236 } 1237 if (dontroute) { 1238 SOCK_LOCK(so); 1239 so->so_options |= SO_DONTROUTE; 1240 SOCK_UNLOCK(so); 1241 } 1242 /* 1243 * XXX all the SBS_CANTSENDMORE checks previously 1244 * done could be out of date. We could have recieved 1245 * a reset packet in an interrupt or maybe we slept 1246 * while doing page faults in uiomove() etc. We 1247 * could probably recheck again inside the locking 1248 * protection here, but there are probably other 1249 * places that this also happens. We must rethink 1250 * this. 1251 */ 1252 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 1253 (flags & MSG_OOB) ? PRUS_OOB : 1254 /* 1255 * If the user set MSG_EOF, the protocol understands 1256 * this flag and nothing left to send then use 1257 * PRU_SEND_EOF instead of PRU_SEND. 1258 */ 1259 ((flags & MSG_EOF) && 1260 (so->so_proto->pr_flags & PR_IMPLOPCL) && 1261 (resid <= 0)) ? 1262 PRUS_EOF : 1263 /* If there is more to send set PRUS_MORETOCOME. */ 1264 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, 1265 top, addr, control, td); 1266 if (dontroute) { 1267 SOCK_LOCK(so); 1268 so->so_options &= ~SO_DONTROUTE; 1269 SOCK_UNLOCK(so); 1270 } 1271 clen = 0; 1272 control = NULL; 1273 top = NULL; 1274 if (error) 1275 goto release; 1276 } while (resid && space > 0); 1277 } while (resid); 1278 1279 release: 1280 sbunlock(&so->so_snd); 1281 out: 1282 if (top != NULL) 1283 m_freem(top); 1284 if (control != NULL) 1285 m_freem(control); 1286 return (error); 1287 } 1288 1289 int 1290 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, 1291 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 1292 { 1293 1294 return (so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top, 1295 control, flags, td)); 1296 } 1297 1298 /* 1299 * The part of soreceive() that implements reading non-inline out-of-band 1300 * data from a socket. For more complete comments, see soreceive(), from 1301 * which this code originated. 1302 * 1303 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is 1304 * unable to return an mbuf chain to the caller. 1305 */ 1306 static int 1307 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) 1308 { 1309 struct protosw *pr = so->so_proto; 1310 struct mbuf *m; 1311 int error; 1312 1313 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); 1314 1315 m = m_get(M_WAIT, MT_DATA); 1316 error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); 1317 if (error) 1318 goto bad; 1319 do { 1320 #ifdef ZERO_COPY_SOCKETS 1321 if (so_zero_copy_receive) { 1322 int disposable; 1323 1324 if ((m->m_flags & M_EXT) 1325 && (m->m_ext.ext_type == EXT_DISPOSABLE)) 1326 disposable = 1; 1327 else 1328 disposable = 0; 1329 1330 error = uiomoveco(mtod(m, void *), 1331 min(uio->uio_resid, m->m_len), 1332 uio, disposable); 1333 } else 1334 #endif /* ZERO_COPY_SOCKETS */ 1335 error = uiomove(mtod(m, void *), 1336 (int) min(uio->uio_resid, m->m_len), uio); 1337 m = m_free(m); 1338 } while (uio->uio_resid && error == 0 && m); 1339 bad: 1340 if (m != NULL) 1341 m_freem(m); 1342 return (error); 1343 } 1344 1345 /* 1346 * Following replacement or removal of the first mbuf on the first mbuf chain 1347 * of a socket buffer, push necessary state changes back into the socket 1348 * buffer so that other consumers see the values consistently. 'nextrecord' 1349 * is the callers locally stored value of the original value of 1350 * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes. 1351 * NOTE: 'nextrecord' may be NULL. 1352 */ 1353 static __inline void 1354 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) 1355 { 1356 1357 SOCKBUF_LOCK_ASSERT(sb); 1358 /* 1359 * First, update for the new value of nextrecord. If necessary, make 1360 * it the first record. 1361 */ 1362 if (sb->sb_mb != NULL) 1363 sb->sb_mb->m_nextpkt = nextrecord; 1364 else 1365 sb->sb_mb = nextrecord; 1366 1367 /* 1368 * Now update any dependent socket buffer fields to reflect the new 1369 * state. This is an expanded inline of SB_EMPTY_FIXUP(), with the 1370 * addition of a second clause that takes care of the case where 1371 * sb_mb has been updated, but remains the last record. 1372 */ 1373 if (sb->sb_mb == NULL) { 1374 sb->sb_mbtail = NULL; 1375 sb->sb_lastrecord = NULL; 1376 } else if (sb->sb_mb->m_nextpkt == NULL) 1377 sb->sb_lastrecord = sb->sb_mb; 1378 } 1379 1380 1381 /* 1382 * Implement receive operations on a socket. We depend on the way that 1383 * records are added to the sockbuf by sbappend. In particular, each record 1384 * (mbufs linked through m_next) must begin with an address if the protocol 1385 * so specifies, followed by an optional mbuf or mbufs containing ancillary 1386 * data, and then zero or more mbufs of data. In order to allow parallelism 1387 * between network receive and copying to user space, as well as avoid 1388 * sleeping with a mutex held, we release the socket buffer mutex during the 1389 * user space copy. Although the sockbuf is locked, new data may still be 1390 * appended, and thus we must maintain consistency of the sockbuf during that 1391 * time. 1392 * 1393 * The caller may receive the data as a single mbuf chain by supplying an 1394 * mbuf **mp0 for use in returning the chain. The uio is then used only for 1395 * the count in uio_resid. 1396 */ 1397 int 1398 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, 1399 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1400 { 1401 struct mbuf *m, **mp; 1402 int flags, len, error, offset; 1403 struct protosw *pr = so->so_proto; 1404 struct mbuf *nextrecord; 1405 int moff, type = 0; 1406 int orig_resid = uio->uio_resid; 1407 1408 mp = mp0; 1409 if (psa != NULL) 1410 *psa = NULL; 1411 if (controlp != NULL) 1412 *controlp = NULL; 1413 if (flagsp != NULL) 1414 flags = *flagsp &~ MSG_EOR; 1415 else 1416 flags = 0; 1417 if (flags & MSG_OOB) 1418 return (soreceive_rcvoob(so, uio, flags)); 1419 if (mp != NULL) 1420 *mp = NULL; 1421 if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) 1422 && uio->uio_resid) 1423 (*pr->pr_usrreqs->pru_rcvd)(so, 0); 1424 1425 error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); 1426 if (error) 1427 return (error); 1428 1429 restart: 1430 SOCKBUF_LOCK(&so->so_rcv); 1431 m = so->so_rcv.sb_mb; 1432 /* 1433 * If we have less data than requested, block awaiting more (subject 1434 * to any timeout) if: 1435 * 1. the current count is less than the low water mark, or 1436 * 2. MSG_WAITALL is set, and it is possible to do the entire 1437 * receive operation at once if we block (resid <= hiwat). 1438 * 3. MSG_DONTWAIT is not set 1439 * If MSG_WAITALL is set but resid is larger than the receive buffer, 1440 * we have to do the receive in sections, and thus risk returning a 1441 * short count if a timeout or signal occurs after we start. 1442 */ 1443 if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && 1444 so->so_rcv.sb_cc < uio->uio_resid) && 1445 (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || 1446 ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && 1447 m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { 1448 KASSERT(m != NULL || !so->so_rcv.sb_cc, 1449 ("receive: m == %p so->so_rcv.sb_cc == %u", 1450 m, so->so_rcv.sb_cc)); 1451 if (so->so_error) { 1452 if (m != NULL) 1453 goto dontblock; 1454 error = so->so_error; 1455 if ((flags & MSG_PEEK) == 0) 1456 so->so_error = 0; 1457 SOCKBUF_UNLOCK(&so->so_rcv); 1458 goto release; 1459 } 1460 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1461 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 1462 if (m == NULL) { 1463 SOCKBUF_UNLOCK(&so->so_rcv); 1464 goto release; 1465 } else 1466 goto dontblock; 1467 } 1468 for (; m != NULL; m = m->m_next) 1469 if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { 1470 m = so->so_rcv.sb_mb; 1471 goto dontblock; 1472 } 1473 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && 1474 (so->so_proto->pr_flags & PR_CONNREQUIRED)) { 1475 SOCKBUF_UNLOCK(&so->so_rcv); 1476 error = ENOTCONN; 1477 goto release; 1478 } 1479 if (uio->uio_resid == 0) { 1480 SOCKBUF_UNLOCK(&so->so_rcv); 1481 goto release; 1482 } 1483 if ((so->so_state & SS_NBIO) || 1484 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 1485 SOCKBUF_UNLOCK(&so->so_rcv); 1486 error = EWOULDBLOCK; 1487 goto release; 1488 } 1489 SBLASTRECORDCHK(&so->so_rcv); 1490 SBLASTMBUFCHK(&so->so_rcv); 1491 error = sbwait(&so->so_rcv); 1492 SOCKBUF_UNLOCK(&so->so_rcv); 1493 if (error) 1494 goto release; 1495 goto restart; 1496 } 1497 dontblock: 1498 /* 1499 * From this point onward, we maintain 'nextrecord' as a cache of the 1500 * pointer to the next record in the socket buffer. We must keep the 1501 * various socket buffer pointers and local stack versions of the 1502 * pointers in sync, pushing out modifications before dropping the 1503 * socket buffer mutex, and re-reading them when picking it up. 1504 * 1505 * Otherwise, we will race with the network stack appending new data 1506 * or records onto the socket buffer by using inconsistent/stale 1507 * versions of the field, possibly resulting in socket buffer 1508 * corruption. 1509 * 1510 * By holding the high-level sblock(), we prevent simultaneous 1511 * readers from pulling off the front of the socket buffer. 1512 */ 1513 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1514 if (uio->uio_td) 1515 uio->uio_td->td_ru.ru_msgrcv++; 1516 KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb")); 1517 SBLASTRECORDCHK(&so->so_rcv); 1518 SBLASTMBUFCHK(&so->so_rcv); 1519 nextrecord = m->m_nextpkt; 1520 if (pr->pr_flags & PR_ADDR) { 1521 KASSERT(m->m_type == MT_SONAME, 1522 ("m->m_type == %d", m->m_type)); 1523 orig_resid = 0; 1524 if (psa != NULL) 1525 *psa = sodupsockaddr(mtod(m, struct sockaddr *), 1526 M_NOWAIT); 1527 if (flags & MSG_PEEK) { 1528 m = m->m_next; 1529 } else { 1530 sbfree(&so->so_rcv, m); 1531 so->so_rcv.sb_mb = m_free(m); 1532 m = so->so_rcv.sb_mb; 1533 sockbuf_pushsync(&so->so_rcv, nextrecord); 1534 } 1535 } 1536 1537 /* 1538 * Process one or more MT_CONTROL mbufs present before any data mbufs 1539 * in the first mbuf chain on the socket buffer. If MSG_PEEK, we 1540 * just copy the data; if !MSG_PEEK, we call into the protocol to 1541 * perform externalization (or freeing if controlp == NULL). 1542 */ 1543 if (m != NULL && m->m_type == MT_CONTROL) { 1544 struct mbuf *cm = NULL, *cmn; 1545 struct mbuf **cme = &cm; 1546 1547 do { 1548 if (flags & MSG_PEEK) { 1549 if (controlp != NULL) { 1550 *controlp = m_copy(m, 0, m->m_len); 1551 controlp = &(*controlp)->m_next; 1552 } 1553 m = m->m_next; 1554 } else { 1555 sbfree(&so->so_rcv, m); 1556 so->so_rcv.sb_mb = m->m_next; 1557 m->m_next = NULL; 1558 *cme = m; 1559 cme = &(*cme)->m_next; 1560 m = so->so_rcv.sb_mb; 1561 } 1562 } while (m != NULL && m->m_type == MT_CONTROL); 1563 if ((flags & MSG_PEEK) == 0) 1564 sockbuf_pushsync(&so->so_rcv, nextrecord); 1565 while (cm != NULL) { 1566 cmn = cm->m_next; 1567 cm->m_next = NULL; 1568 if (pr->pr_domain->dom_externalize != NULL) { 1569 SOCKBUF_UNLOCK(&so->so_rcv); 1570 error = (*pr->pr_domain->dom_externalize) 1571 (cm, controlp); 1572 SOCKBUF_LOCK(&so->so_rcv); 1573 } else if (controlp != NULL) 1574 *controlp = cm; 1575 else 1576 m_freem(cm); 1577 if (controlp != NULL) { 1578 orig_resid = 0; 1579 while (*controlp != NULL) 1580 controlp = &(*controlp)->m_next; 1581 } 1582 cm = cmn; 1583 } 1584 if (m != NULL) 1585 nextrecord = so->so_rcv.sb_mb->m_nextpkt; 1586 else 1587 nextrecord = so->so_rcv.sb_mb; 1588 orig_resid = 0; 1589 } 1590 if (m != NULL) { 1591 if ((flags & MSG_PEEK) == 0) { 1592 KASSERT(m->m_nextpkt == nextrecord, 1593 ("soreceive: post-control, nextrecord !sync")); 1594 if (nextrecord == NULL) { 1595 KASSERT(so->so_rcv.sb_mb == m, 1596 ("soreceive: post-control, sb_mb!=m")); 1597 KASSERT(so->so_rcv.sb_lastrecord == m, 1598 ("soreceive: post-control, lastrecord!=m")); 1599 } 1600 } 1601 type = m->m_type; 1602 if (type == MT_OOBDATA) 1603 flags |= MSG_OOB; 1604 } else { 1605 if ((flags & MSG_PEEK) == 0) { 1606 KASSERT(so->so_rcv.sb_mb == nextrecord, 1607 ("soreceive: sb_mb != nextrecord")); 1608 if (so->so_rcv.sb_mb == NULL) { 1609 KASSERT(so->so_rcv.sb_lastrecord == NULL, 1610 ("soreceive: sb_lastercord != NULL")); 1611 } 1612 } 1613 } 1614 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1615 SBLASTRECORDCHK(&so->so_rcv); 1616 SBLASTMBUFCHK(&so->so_rcv); 1617 1618 /* 1619 * Now continue to read any data mbufs off of the head of the socket 1620 * buffer until the read request is satisfied. Note that 'type' is 1621 * used to store the type of any mbuf reads that have happened so far 1622 * such that soreceive() can stop reading if the type changes, which 1623 * causes soreceive() to return only one of regular data and inline 1624 * out-of-band data in a single socket receive operation. 1625 */ 1626 moff = 0; 1627 offset = 0; 1628 while (m != NULL && uio->uio_resid > 0 && error == 0) { 1629 /* 1630 * If the type of mbuf has changed since the last mbuf 1631 * examined ('type'), end the receive operation. 1632 */ 1633 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1634 if (m->m_type == MT_OOBDATA) { 1635 if (type != MT_OOBDATA) 1636 break; 1637 } else if (type == MT_OOBDATA) 1638 break; 1639 else 1640 KASSERT(m->m_type == MT_DATA, 1641 ("m->m_type == %d", m->m_type)); 1642 so->so_rcv.sb_state &= ~SBS_RCVATMARK; 1643 len = uio->uio_resid; 1644 if (so->so_oobmark && len > so->so_oobmark - offset) 1645 len = so->so_oobmark - offset; 1646 if (len > m->m_len - moff) 1647 len = m->m_len - moff; 1648 /* 1649 * If mp is set, just pass back the mbufs. Otherwise copy 1650 * them out via the uio, then free. Sockbuf must be 1651 * consistent here (points to current mbuf, it points to next 1652 * record) when we drop priority; we must note any additions 1653 * to the sockbuf when we block interrupts again. 1654 */ 1655 if (mp == NULL) { 1656 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1657 SBLASTRECORDCHK(&so->so_rcv); 1658 SBLASTMBUFCHK(&so->so_rcv); 1659 SOCKBUF_UNLOCK(&so->so_rcv); 1660 #ifdef ZERO_COPY_SOCKETS 1661 if (so_zero_copy_receive) { 1662 int disposable; 1663 1664 if ((m->m_flags & M_EXT) 1665 && (m->m_ext.ext_type == EXT_DISPOSABLE)) 1666 disposable = 1; 1667 else 1668 disposable = 0; 1669 1670 error = uiomoveco(mtod(m, char *) + moff, 1671 (int)len, uio, 1672 disposable); 1673 } else 1674 #endif /* ZERO_COPY_SOCKETS */ 1675 error = uiomove(mtod(m, char *) + moff, (int)len, uio); 1676 SOCKBUF_LOCK(&so->so_rcv); 1677 if (error) { 1678 /* 1679 * The MT_SONAME mbuf has already been removed 1680 * from the record, so it is necessary to 1681 * remove the data mbufs, if any, to preserve 1682 * the invariant in the case of PR_ADDR that 1683 * requires MT_SONAME mbufs at the head of 1684 * each record. 1685 */ 1686 if (m && pr->pr_flags & PR_ATOMIC && 1687 ((flags & MSG_PEEK) == 0)) 1688 (void)sbdroprecord_locked(&so->so_rcv); 1689 SOCKBUF_UNLOCK(&so->so_rcv); 1690 goto release; 1691 } 1692 } else 1693 uio->uio_resid -= len; 1694 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1695 if (len == m->m_len - moff) { 1696 if (m->m_flags & M_EOR) 1697 flags |= MSG_EOR; 1698 if (flags & MSG_PEEK) { 1699 m = m->m_next; 1700 moff = 0; 1701 } else { 1702 nextrecord = m->m_nextpkt; 1703 sbfree(&so->so_rcv, m); 1704 if (mp != NULL) { 1705 *mp = m; 1706 mp = &m->m_next; 1707 so->so_rcv.sb_mb = m = m->m_next; 1708 *mp = NULL; 1709 } else { 1710 so->so_rcv.sb_mb = m_free(m); 1711 m = so->so_rcv.sb_mb; 1712 } 1713 sockbuf_pushsync(&so->so_rcv, nextrecord); 1714 SBLASTRECORDCHK(&so->so_rcv); 1715 SBLASTMBUFCHK(&so->so_rcv); 1716 } 1717 } else { 1718 if (flags & MSG_PEEK) 1719 moff += len; 1720 else { 1721 if (mp != NULL) { 1722 int copy_flag; 1723 1724 if (flags & MSG_DONTWAIT) 1725 copy_flag = M_DONTWAIT; 1726 else 1727 copy_flag = M_WAIT; 1728 if (copy_flag == M_WAIT) 1729 SOCKBUF_UNLOCK(&so->so_rcv); 1730 *mp = m_copym(m, 0, len, copy_flag); 1731 if (copy_flag == M_WAIT) 1732 SOCKBUF_LOCK(&so->so_rcv); 1733 if (*mp == NULL) { 1734 /* 1735 * m_copym() couldn't 1736 * allocate an mbuf. Adjust 1737 * uio_resid back (it was 1738 * adjusted down by len 1739 * bytes, which we didn't end 1740 * up "copying" over). 1741 */ 1742 uio->uio_resid += len; 1743 break; 1744 } 1745 } 1746 m->m_data += len; 1747 m->m_len -= len; 1748 so->so_rcv.sb_cc -= len; 1749 } 1750 } 1751 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1752 if (so->so_oobmark) { 1753 if ((flags & MSG_PEEK) == 0) { 1754 so->so_oobmark -= len; 1755 if (so->so_oobmark == 0) { 1756 so->so_rcv.sb_state |= SBS_RCVATMARK; 1757 break; 1758 } 1759 } else { 1760 offset += len; 1761 if (offset == so->so_oobmark) 1762 break; 1763 } 1764 } 1765 if (flags & MSG_EOR) 1766 break; 1767 /* 1768 * If the MSG_WAITALL flag is set (for non-atomic socket), we 1769 * must not quit until "uio->uio_resid == 0" or an error 1770 * termination. If a signal/timeout occurs, return with a 1771 * short count but without error. Keep sockbuf locked 1772 * against other readers. 1773 */ 1774 while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 && 1775 !sosendallatonce(so) && nextrecord == NULL) { 1776 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1777 if (so->so_error || so->so_rcv.sb_state & SBS_CANTRCVMORE) 1778 break; 1779 /* 1780 * Notify the protocol that some data has been 1781 * drained before blocking. 1782 */ 1783 if (pr->pr_flags & PR_WANTRCVD) { 1784 SOCKBUF_UNLOCK(&so->so_rcv); 1785 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 1786 SOCKBUF_LOCK(&so->so_rcv); 1787 } 1788 SBLASTRECORDCHK(&so->so_rcv); 1789 SBLASTMBUFCHK(&so->so_rcv); 1790 error = sbwait(&so->so_rcv); 1791 if (error) { 1792 SOCKBUF_UNLOCK(&so->so_rcv); 1793 goto release; 1794 } 1795 m = so->so_rcv.sb_mb; 1796 if (m != NULL) 1797 nextrecord = m->m_nextpkt; 1798 } 1799 } 1800 1801 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1802 if (m != NULL && pr->pr_flags & PR_ATOMIC) { 1803 flags |= MSG_TRUNC; 1804 if ((flags & MSG_PEEK) == 0) 1805 (void) sbdroprecord_locked(&so->so_rcv); 1806 } 1807 if ((flags & MSG_PEEK) == 0) { 1808 if (m == NULL) { 1809 /* 1810 * First part is an inline SB_EMPTY_FIXUP(). Second 1811 * part makes sure sb_lastrecord is up-to-date if 1812 * there is still data in the socket buffer. 1813 */ 1814 so->so_rcv.sb_mb = nextrecord; 1815 if (so->so_rcv.sb_mb == NULL) { 1816 so->so_rcv.sb_mbtail = NULL; 1817 so->so_rcv.sb_lastrecord = NULL; 1818 } else if (nextrecord->m_nextpkt == NULL) 1819 so->so_rcv.sb_lastrecord = nextrecord; 1820 } 1821 SBLASTRECORDCHK(&so->so_rcv); 1822 SBLASTMBUFCHK(&so->so_rcv); 1823 /* 1824 * If soreceive() is being done from the socket callback, 1825 * then don't need to generate ACK to peer to update window, 1826 * since ACK will be generated on return to TCP. 1827 */ 1828 if (!(flags & MSG_SOCALLBCK) && 1829 (pr->pr_flags & PR_WANTRCVD)) { 1830 SOCKBUF_UNLOCK(&so->so_rcv); 1831 (*pr->pr_usrreqs->pru_rcvd)(so, flags); 1832 SOCKBUF_LOCK(&so->so_rcv); 1833 } 1834 } 1835 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1836 if (orig_resid == uio->uio_resid && orig_resid && 1837 (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { 1838 SOCKBUF_UNLOCK(&so->so_rcv); 1839 goto restart; 1840 } 1841 SOCKBUF_UNLOCK(&so->so_rcv); 1842 1843 if (flagsp != NULL) 1844 *flagsp |= flags; 1845 release: 1846 sbunlock(&so->so_rcv); 1847 return (error); 1848 } 1849 1850 /* 1851 * Optimized version of soreceive() for simple datagram cases from userspace. 1852 * Unlike in the stream case, we're able to drop a datagram if copyout() 1853 * fails, and because we handle datagrams atomically, we don't need to use a 1854 * sleep lock to prevent I/O interlacing. 1855 */ 1856 int 1857 soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, 1858 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1859 { 1860 struct mbuf *m, *m2; 1861 int flags, len, error, offset; 1862 struct protosw *pr = so->so_proto; 1863 struct mbuf *nextrecord; 1864 1865 if (psa != NULL) 1866 *psa = NULL; 1867 if (controlp != NULL) 1868 *controlp = NULL; 1869 if (flagsp != NULL) 1870 flags = *flagsp &~ MSG_EOR; 1871 else 1872 flags = 0; 1873 1874 /* 1875 * For any complicated cases, fall back to the full 1876 * soreceive_generic(). 1877 */ 1878 if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB)) 1879 return (soreceive_generic(so, psa, uio, mp0, controlp, 1880 flagsp)); 1881 1882 /* 1883 * Enforce restrictions on use. 1884 */ 1885 KASSERT((pr->pr_flags & PR_WANTRCVD) == 0, 1886 ("soreceive_dgram: wantrcvd")); 1887 KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic")); 1888 KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0, 1889 ("soreceive_dgram: SBS_RCVATMARK")); 1890 KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0, 1891 ("soreceive_dgram: P_CONNREQUIRED")); 1892 1893 /* 1894 * Loop blocking while waiting for a datagram. 1895 */ 1896 SOCKBUF_LOCK(&so->so_rcv); 1897 while ((m = so->so_rcv.sb_mb) == NULL) { 1898 KASSERT(so->so_rcv.sb_cc == 0, 1899 ("soreceive_dgram: sb_mb NULL but sb_cc %u", 1900 so->so_rcv.sb_cc)); 1901 if (so->so_error) { 1902 error = so->so_error; 1903 so->so_error = 0; 1904 SOCKBUF_UNLOCK(&so->so_rcv); 1905 return (error); 1906 } 1907 if (so->so_rcv.sb_state & SBS_CANTRCVMORE || 1908 uio->uio_resid == 0) { 1909 SOCKBUF_UNLOCK(&so->so_rcv); 1910 return (0); 1911 } 1912 if ((so->so_state & SS_NBIO) || 1913 (flags & (MSG_DONTWAIT|MSG_NBIO))) { 1914 SOCKBUF_UNLOCK(&so->so_rcv); 1915 return (EWOULDBLOCK); 1916 } 1917 SBLASTRECORDCHK(&so->so_rcv); 1918 SBLASTMBUFCHK(&so->so_rcv); 1919 error = sbwait(&so->so_rcv); 1920 if (error) { 1921 SOCKBUF_UNLOCK(&so->so_rcv); 1922 return (error); 1923 } 1924 } 1925 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1926 1927 if (uio->uio_td) 1928 uio->uio_td->td_ru.ru_msgrcv++; 1929 SBLASTRECORDCHK(&so->so_rcv); 1930 SBLASTMBUFCHK(&so->so_rcv); 1931 nextrecord = m->m_nextpkt; 1932 if (nextrecord == NULL) { 1933 KASSERT(so->so_rcv.sb_lastrecord == m, 1934 ("soreceive_dgram: lastrecord != m")); 1935 } 1936 1937 KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord, 1938 ("soreceive_dgram: m_nextpkt != nextrecord")); 1939 1940 /* 1941 * Pull 'm' and its chain off the front of the packet queue. 1942 */ 1943 so->so_rcv.sb_mb = NULL; 1944 sockbuf_pushsync(&so->so_rcv, nextrecord); 1945 1946 /* 1947 * Walk 'm's chain and free that many bytes from the socket buffer. 1948 */ 1949 for (m2 = m; m2 != NULL; m2 = m2->m_next) 1950 sbfree(&so->so_rcv, m2); 1951 1952 /* 1953 * Do a few last checks before we let go of the lock. 1954 */ 1955 SBLASTRECORDCHK(&so->so_rcv); 1956 SBLASTMBUFCHK(&so->so_rcv); 1957 SOCKBUF_UNLOCK(&so->so_rcv); 1958 1959 if (pr->pr_flags & PR_ADDR) { 1960 KASSERT(m->m_type == MT_SONAME, 1961 ("m->m_type == %d", m->m_type)); 1962 if (psa != NULL) 1963 *psa = sodupsockaddr(mtod(m, struct sockaddr *), 1964 M_NOWAIT); 1965 m = m_free(m); 1966 } 1967 if (m == NULL) { 1968 /* XXXRW: Can this happen? */ 1969 return (0); 1970 } 1971 1972 /* 1973 * Packet to copyout() is now in 'm' and it is disconnected from the 1974 * queue. 1975 * 1976 * Process one or more MT_CONTROL mbufs present before any data mbufs 1977 * in the first mbuf chain on the socket buffer. We call into the 1978 * protocol to perform externalization (or freeing if controlp == 1979 * NULL). 1980 */ 1981 if (m->m_type == MT_CONTROL) { 1982 struct mbuf *cm = NULL, *cmn; 1983 struct mbuf **cme = &cm; 1984 1985 do { 1986 m2 = m->m_next; 1987 m->m_next = NULL; 1988 *cme = m; 1989 cme = &(*cme)->m_next; 1990 m = m2; 1991 } while (m != NULL && m->m_type == MT_CONTROL); 1992 while (cm != NULL) { 1993 cmn = cm->m_next; 1994 cm->m_next = NULL; 1995 if (pr->pr_domain->dom_externalize != NULL) { 1996 error = (*pr->pr_domain->dom_externalize) 1997 (cm, controlp); 1998 } else if (controlp != NULL) 1999 *controlp = cm; 2000 else 2001 m_freem(cm); 2002 if (controlp != NULL) { 2003 while (*controlp != NULL) 2004 controlp = &(*controlp)->m_next; 2005 } 2006 cm = cmn; 2007 } 2008 } 2009 KASSERT(m->m_type == MT_DATA, ("soreceive_dgram: !data")); 2010 2011 offset = 0; 2012 while (m != NULL && uio->uio_resid > 0) { 2013 len = uio->uio_resid; 2014 if (len > m->m_len) 2015 len = m->m_len; 2016 error = uiomove(mtod(m, char *), (int)len, uio); 2017 if (error) { 2018 m_freem(m); 2019 return (error); 2020 } 2021 m = m_free(m); 2022 } 2023 if (m != NULL) 2024 flags |= MSG_TRUNC; 2025 m_freem(m); 2026 if (flagsp != NULL) 2027 *flagsp |= flags; 2028 return (0); 2029 } 2030 2031 int 2032 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, 2033 struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 2034 { 2035 2036 return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, 2037 controlp, flagsp)); 2038 } 2039 2040 int 2041 soshutdown(struct socket *so, int how) 2042 { 2043 struct protosw *pr = so->so_proto; 2044 2045 if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) 2046 return (EINVAL); 2047 if (pr->pr_usrreqs->pru_flush != NULL) { 2048 (*pr->pr_usrreqs->pru_flush)(so, how); 2049 } 2050 if (how != SHUT_WR) 2051 sorflush(so); 2052 if (how != SHUT_RD) 2053 return ((*pr->pr_usrreqs->pru_shutdown)(so)); 2054 return (0); 2055 } 2056 2057 void 2058 sorflush(struct socket *so) 2059 { 2060 struct sockbuf *sb = &so->so_rcv; 2061 struct protosw *pr = so->so_proto; 2062 struct sockbuf asb; 2063 2064 /* 2065 * In order to avoid calling dom_dispose with the socket buffer mutex 2066 * held, and in order to generally avoid holding the lock for a long 2067 * time, we make a copy of the socket buffer and clear the original 2068 * (except locks, state). The new socket buffer copy won't have 2069 * initialized locks so we can only call routines that won't use or 2070 * assert those locks. 2071 * 2072 * Dislodge threads currently blocked in receive and wait to acquire 2073 * a lock against other simultaneous readers before clearing the 2074 * socket buffer. Don't let our acquire be interrupted by a signal 2075 * despite any existing socket disposition on interruptable waiting. 2076 */ 2077 socantrcvmore(so); 2078 (void) sblock(sb, SBL_WAIT | SBL_NOINTR); 2079 2080 /* 2081 * Invalidate/clear most of the sockbuf structure, but leave selinfo 2082 * and mutex data unchanged. 2083 */ 2084 SOCKBUF_LOCK(sb); 2085 bzero(&asb, offsetof(struct sockbuf, sb_startzero)); 2086 bcopy(&sb->sb_startzero, &asb.sb_startzero, 2087 sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 2088 bzero(&sb->sb_startzero, 2089 sizeof(*sb) - offsetof(struct sockbuf, sb_startzero)); 2090 SOCKBUF_UNLOCK(sb); 2091 sbunlock(sb); 2092 2093 /* 2094 * Dispose of special rights and flush the socket buffer. Don't call 2095 * any unsafe routines (that rely on locks being initialized) on asb. 2096 */ 2097 if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) 2098 (*pr->pr_domain->dom_dispose)(asb.sb_mb); 2099 sbrelease_internal(&asb, so); 2100 } 2101 2102 /* 2103 * Perhaps this routine, and sooptcopyout(), below, ought to come in an 2104 * additional variant to handle the case where the option value needs to be 2105 * some kind of integer, but not a specific size. In addition to their use 2106 * here, these functions are also called by the protocol-level pr_ctloutput() 2107 * routines. 2108 */ 2109 int 2110 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) 2111 { 2112 size_t valsize; 2113 2114 /* 2115 * If the user gives us more than we wanted, we ignore it, but if we 2116 * don't get the minimum length the caller wants, we return EINVAL. 2117 * On success, sopt->sopt_valsize is set to however much we actually 2118 * retrieved. 2119 */ 2120 if ((valsize = sopt->sopt_valsize) < minlen) 2121 return EINVAL; 2122 if (valsize > len) 2123 sopt->sopt_valsize = valsize = len; 2124 2125 if (sopt->sopt_td != NULL) 2126 return (copyin(sopt->sopt_val, buf, valsize)); 2127 2128 bcopy(sopt->sopt_val, buf, valsize); 2129 return (0); 2130 } 2131 2132 /* 2133 * Kernel version of setsockopt(2). 2134 * 2135 * XXX: optlen is size_t, not socklen_t 2136 */ 2137 int 2138 so_setsockopt(struct socket *so, int level, int optname, void *optval, 2139 size_t optlen) 2140 { 2141 struct sockopt sopt; 2142 2143 sopt.sopt_level = level; 2144 sopt.sopt_name = optname; 2145 sopt.sopt_dir = SOPT_SET; 2146 sopt.sopt_val = optval; 2147 sopt.sopt_valsize = optlen; 2148 sopt.sopt_td = NULL; 2149 return (sosetopt(so, &sopt)); 2150 } 2151 2152 int 2153 sosetopt(struct socket *so, struct sockopt *sopt) 2154 { 2155 int error, optval; 2156 struct linger l; 2157 struct timeval tv; 2158 u_long val; 2159 #ifdef MAC 2160 struct mac extmac; 2161 #endif 2162 2163 error = 0; 2164 if (sopt->sopt_level != SOL_SOCKET) { 2165 if (so->so_proto && so->so_proto->pr_ctloutput) 2166 return ((*so->so_proto->pr_ctloutput) 2167 (so, sopt)); 2168 error = ENOPROTOOPT; 2169 } else { 2170 switch (sopt->sopt_name) { 2171 #ifdef INET 2172 case SO_ACCEPTFILTER: 2173 error = do_setopt_accept_filter(so, sopt); 2174 if (error) 2175 goto bad; 2176 break; 2177 #endif 2178 case SO_LINGER: 2179 error = sooptcopyin(sopt, &l, sizeof l, sizeof l); 2180 if (error) 2181 goto bad; 2182 2183 SOCK_LOCK(so); 2184 so->so_linger = l.l_linger; 2185 if (l.l_onoff) 2186 so->so_options |= SO_LINGER; 2187 else 2188 so->so_options &= ~SO_LINGER; 2189 SOCK_UNLOCK(so); 2190 break; 2191 2192 case SO_DEBUG: 2193 case SO_KEEPALIVE: 2194 case SO_DONTROUTE: 2195 case SO_USELOOPBACK: 2196 case SO_BROADCAST: 2197 case SO_REUSEADDR: 2198 case SO_REUSEPORT: 2199 case SO_OOBINLINE: 2200 case SO_TIMESTAMP: 2201 case SO_BINTIME: 2202 case SO_NOSIGPIPE: 2203 case SO_NO_DDP: 2204 case SO_NO_OFFLOAD: 2205 error = sooptcopyin(sopt, &optval, sizeof optval, 2206 sizeof optval); 2207 if (error) 2208 goto bad; 2209 SOCK_LOCK(so); 2210 if (optval) 2211 so->so_options |= sopt->sopt_name; 2212 else 2213 so->so_options &= ~sopt->sopt_name; 2214 SOCK_UNLOCK(so); 2215 break; 2216 2217 case SO_SETFIB: 2218 error = sooptcopyin(sopt, &optval, sizeof optval, 2219 sizeof optval); 2220 if (optval < 1 || optval > rt_numfibs) { 2221 error = EINVAL; 2222 goto bad; 2223 } 2224 if ((so->so_proto->pr_domain->dom_family == PF_INET) || 2225 (so->so_proto->pr_domain->dom_family == PF_ROUTE)) { 2226 so->so_fibnum = optval; 2227 /* Note: ignore error */ 2228 if (so->so_proto && so->so_proto->pr_ctloutput) 2229 (*so->so_proto->pr_ctloutput)(so, sopt); 2230 } else { 2231 so->so_fibnum = 0; 2232 } 2233 break; 2234 case SO_SNDBUF: 2235 case SO_RCVBUF: 2236 case SO_SNDLOWAT: 2237 case SO_RCVLOWAT: 2238 error = sooptcopyin(sopt, &optval, sizeof optval, 2239 sizeof optval); 2240 if (error) 2241 goto bad; 2242 2243 /* 2244 * Values < 1 make no sense for any of these options, 2245 * so disallow them. 2246 */ 2247 if (optval < 1) { 2248 error = EINVAL; 2249 goto bad; 2250 } 2251 2252 switch (sopt->sopt_name) { 2253 case SO_SNDBUF: 2254 case SO_RCVBUF: 2255 if (sbreserve(sopt->sopt_name == SO_SNDBUF ? 2256 &so->so_snd : &so->so_rcv, (u_long)optval, 2257 so, curthread) == 0) { 2258 error = ENOBUFS; 2259 goto bad; 2260 } 2261 (sopt->sopt_name == SO_SNDBUF ? &so->so_snd : 2262 &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE; 2263 break; 2264 2265 /* 2266 * Make sure the low-water is never greater than the 2267 * high-water. 2268 */ 2269 case SO_SNDLOWAT: 2270 SOCKBUF_LOCK(&so->so_snd); 2271 so->so_snd.sb_lowat = 2272 (optval > so->so_snd.sb_hiwat) ? 2273 so->so_snd.sb_hiwat : optval; 2274 SOCKBUF_UNLOCK(&so->so_snd); 2275 break; 2276 case SO_RCVLOWAT: 2277 SOCKBUF_LOCK(&so->so_rcv); 2278 so->so_rcv.sb_lowat = 2279 (optval > so->so_rcv.sb_hiwat) ? 2280 so->so_rcv.sb_hiwat : optval; 2281 SOCKBUF_UNLOCK(&so->so_rcv); 2282 break; 2283 } 2284 break; 2285 2286 case SO_SNDTIMEO: 2287 case SO_RCVTIMEO: 2288 #ifdef COMPAT_IA32 2289 if (SV_CURPROC_FLAG(SV_ILP32)) { 2290 struct timeval32 tv32; 2291 2292 error = sooptcopyin(sopt, &tv32, sizeof tv32, 2293 sizeof tv32); 2294 CP(tv32, tv, tv_sec); 2295 CP(tv32, tv, tv_usec); 2296 } else 2297 #endif 2298 error = sooptcopyin(sopt, &tv, sizeof tv, 2299 sizeof tv); 2300 if (error) 2301 goto bad; 2302 2303 /* assert(hz > 0); */ 2304 if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || 2305 tv.tv_usec < 0 || tv.tv_usec >= 1000000) { 2306 error = EDOM; 2307 goto bad; 2308 } 2309 /* assert(tick > 0); */ 2310 /* assert(ULONG_MAX - INT_MAX >= 1000000); */ 2311 val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; 2312 if (val > INT_MAX) { 2313 error = EDOM; 2314 goto bad; 2315 } 2316 if (val == 0 && tv.tv_usec != 0) 2317 val = 1; 2318 2319 switch (sopt->sopt_name) { 2320 case SO_SNDTIMEO: 2321 so->so_snd.sb_timeo = val; 2322 break; 2323 case SO_RCVTIMEO: 2324 so->so_rcv.sb_timeo = val; 2325 break; 2326 } 2327 break; 2328 2329 case SO_LABEL: 2330 #ifdef MAC 2331 error = sooptcopyin(sopt, &extmac, sizeof extmac, 2332 sizeof extmac); 2333 if (error) 2334 goto bad; 2335 error = mac_setsockopt_label(sopt->sopt_td->td_ucred, 2336 so, &extmac); 2337 #else 2338 error = EOPNOTSUPP; 2339 #endif 2340 break; 2341 2342 default: 2343 error = ENOPROTOOPT; 2344 break; 2345 } 2346 if (error == 0 && so->so_proto != NULL && 2347 so->so_proto->pr_ctloutput != NULL) { 2348 (void) ((*so->so_proto->pr_ctloutput) 2349 (so, sopt)); 2350 } 2351 } 2352 bad: 2353 return (error); 2354 } 2355 2356 /* 2357 * Helper routine for getsockopt. 2358 */ 2359 int 2360 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) 2361 { 2362 int error; 2363 size_t valsize; 2364 2365 error = 0; 2366 2367 /* 2368 * Documented get behavior is that we always return a value, possibly 2369 * truncated to fit in the user's buffer. Traditional behavior is 2370 * that we always tell the user precisely how much we copied, rather 2371 * than something useful like the total amount we had available for 2372 * her. Note that this interface is not idempotent; the entire 2373 * answer must generated ahead of time. 2374 */ 2375 valsize = min(len, sopt->sopt_valsize); 2376 sopt->sopt_valsize = valsize; 2377 if (sopt->sopt_val != NULL) { 2378 if (sopt->sopt_td != NULL) 2379 error = copyout(buf, sopt->sopt_val, valsize); 2380 else 2381 bcopy(buf, sopt->sopt_val, valsize); 2382 } 2383 return (error); 2384 } 2385 2386 int 2387 sogetopt(struct socket *so, struct sockopt *sopt) 2388 { 2389 int error, optval; 2390 struct linger l; 2391 struct timeval tv; 2392 #ifdef MAC 2393 struct mac extmac; 2394 #endif 2395 2396 error = 0; 2397 if (sopt->sopt_level != SOL_SOCKET) { 2398 if (so->so_proto && so->so_proto->pr_ctloutput) { 2399 return ((*so->so_proto->pr_ctloutput) 2400 (so, sopt)); 2401 } else 2402 return (ENOPROTOOPT); 2403 } else { 2404 switch (sopt->sopt_name) { 2405 #ifdef INET 2406 case SO_ACCEPTFILTER: 2407 error = do_getopt_accept_filter(so, sopt); 2408 break; 2409 #endif 2410 case SO_LINGER: 2411 SOCK_LOCK(so); 2412 l.l_onoff = so->so_options & SO_LINGER; 2413 l.l_linger = so->so_linger; 2414 SOCK_UNLOCK(so); 2415 error = sooptcopyout(sopt, &l, sizeof l); 2416 break; 2417 2418 case SO_USELOOPBACK: 2419 case SO_DONTROUTE: 2420 case SO_DEBUG: 2421 case SO_KEEPALIVE: 2422 case SO_REUSEADDR: 2423 case SO_REUSEPORT: 2424 case SO_BROADCAST: 2425 case SO_OOBINLINE: 2426 case SO_ACCEPTCONN: 2427 case SO_TIMESTAMP: 2428 case SO_BINTIME: 2429 case SO_NOSIGPIPE: 2430 optval = so->so_options & sopt->sopt_name; 2431 integer: 2432 error = sooptcopyout(sopt, &optval, sizeof optval); 2433 break; 2434 2435 case SO_TYPE: 2436 optval = so->so_type; 2437 goto integer; 2438 2439 case SO_ERROR: 2440 SOCK_LOCK(so); 2441 optval = so->so_error; 2442 so->so_error = 0; 2443 SOCK_UNLOCK(so); 2444 goto integer; 2445 2446 case SO_SNDBUF: 2447 optval = so->so_snd.sb_hiwat; 2448 goto integer; 2449 2450 case SO_RCVBUF: 2451 optval = so->so_rcv.sb_hiwat; 2452 goto integer; 2453 2454 case SO_SNDLOWAT: 2455 optval = so->so_snd.sb_lowat; 2456 goto integer; 2457 2458 case SO_RCVLOWAT: 2459 optval = so->so_rcv.sb_lowat; 2460 goto integer; 2461 2462 case SO_SNDTIMEO: 2463 case SO_RCVTIMEO: 2464 optval = (sopt->sopt_name == SO_SNDTIMEO ? 2465 so->so_snd.sb_timeo : so->so_rcv.sb_timeo); 2466 2467 tv.tv_sec = optval / hz; 2468 tv.tv_usec = (optval % hz) * tick; 2469 #ifdef COMPAT_IA32 2470 if (SV_CURPROC_FLAG(SV_ILP32)) { 2471 struct timeval32 tv32; 2472 2473 CP(tv, tv32, tv_sec); 2474 CP(tv, tv32, tv_usec); 2475 error = sooptcopyout(sopt, &tv32, sizeof tv32); 2476 } else 2477 #endif 2478 error = sooptcopyout(sopt, &tv, sizeof tv); 2479 break; 2480 2481 case SO_LABEL: 2482 #ifdef MAC 2483 error = sooptcopyin(sopt, &extmac, sizeof(extmac), 2484 sizeof(extmac)); 2485 if (error) 2486 return (error); 2487 error = mac_getsockopt_label(sopt->sopt_td->td_ucred, 2488 so, &extmac); 2489 if (error) 2490 return (error); 2491 error = sooptcopyout(sopt, &extmac, sizeof extmac); 2492 #else 2493 error = EOPNOTSUPP; 2494 #endif 2495 break; 2496 2497 case SO_PEERLABEL: 2498 #ifdef MAC 2499 error = sooptcopyin(sopt, &extmac, sizeof(extmac), 2500 sizeof(extmac)); 2501 if (error) 2502 return (error); 2503 error = mac_getsockopt_peerlabel( 2504 sopt->sopt_td->td_ucred, so, &extmac); 2505 if (error) 2506 return (error); 2507 error = sooptcopyout(sopt, &extmac, sizeof extmac); 2508 #else 2509 error = EOPNOTSUPP; 2510 #endif 2511 break; 2512 2513 case SO_LISTENQLIMIT: 2514 optval = so->so_qlimit; 2515 goto integer; 2516 2517 case SO_LISTENQLEN: 2518 optval = so->so_qlen; 2519 goto integer; 2520 2521 case SO_LISTENINCQLEN: 2522 optval = so->so_incqlen; 2523 goto integer; 2524 2525 default: 2526 error = ENOPROTOOPT; 2527 break; 2528 } 2529 return (error); 2530 } 2531 } 2532 2533 /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ 2534 int 2535 soopt_getm(struct sockopt *sopt, struct mbuf **mp) 2536 { 2537 struct mbuf *m, *m_prev; 2538 int sopt_size = sopt->sopt_valsize; 2539 2540 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 2541 if (m == NULL) 2542 return ENOBUFS; 2543 if (sopt_size > MLEN) { 2544 MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT); 2545 if ((m->m_flags & M_EXT) == 0) { 2546 m_free(m); 2547 return ENOBUFS; 2548 } 2549 m->m_len = min(MCLBYTES, sopt_size); 2550 } else { 2551 m->m_len = min(MLEN, sopt_size); 2552 } 2553 sopt_size -= m->m_len; 2554 *mp = m; 2555 m_prev = m; 2556 2557 while (sopt_size) { 2558 MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA); 2559 if (m == NULL) { 2560 m_freem(*mp); 2561 return ENOBUFS; 2562 } 2563 if (sopt_size > MLEN) { 2564 MCLGET(m, sopt->sopt_td != NULL ? M_WAIT : 2565 M_DONTWAIT); 2566 if ((m->m_flags & M_EXT) == 0) { 2567 m_freem(m); 2568 m_freem(*mp); 2569 return ENOBUFS; 2570 } 2571 m->m_len = min(MCLBYTES, sopt_size); 2572 } else { 2573 m->m_len = min(MLEN, sopt_size); 2574 } 2575 sopt_size -= m->m_len; 2576 m_prev->m_next = m; 2577 m_prev = m; 2578 } 2579 return (0); 2580 } 2581 2582 /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ 2583 int 2584 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) 2585 { 2586 struct mbuf *m0 = m; 2587 2588 if (sopt->sopt_val == NULL) 2589 return (0); 2590 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 2591 if (sopt->sopt_td != NULL) { 2592 int error; 2593 2594 error = copyin(sopt->sopt_val, mtod(m, char *), 2595 m->m_len); 2596 if (error != 0) { 2597 m_freem(m0); 2598 return(error); 2599 } 2600 } else 2601 bcopy(sopt->sopt_val, mtod(m, char *), m->m_len); 2602 sopt->sopt_valsize -= m->m_len; 2603 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; 2604 m = m->m_next; 2605 } 2606 if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ 2607 panic("ip6_sooptmcopyin"); 2608 return (0); 2609 } 2610 2611 /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ 2612 int 2613 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) 2614 { 2615 struct mbuf *m0 = m; 2616 size_t valsize = 0; 2617 2618 if (sopt->sopt_val == NULL) 2619 return (0); 2620 while (m != NULL && sopt->sopt_valsize >= m->m_len) { 2621 if (sopt->sopt_td != NULL) { 2622 int error; 2623 2624 error = copyout(mtod(m, char *), sopt->sopt_val, 2625 m->m_len); 2626 if (error != 0) { 2627 m_freem(m0); 2628 return(error); 2629 } 2630 } else 2631 bcopy(mtod(m, char *), sopt->sopt_val, m->m_len); 2632 sopt->sopt_valsize -= m->m_len; 2633 sopt->sopt_val = (char *)sopt->sopt_val + m->m_len; 2634 valsize += m->m_len; 2635 m = m->m_next; 2636 } 2637 if (m != NULL) { 2638 /* enough soopt buffer should be given from user-land */ 2639 m_freem(m0); 2640 return(EINVAL); 2641 } 2642 sopt->sopt_valsize = valsize; 2643 return (0); 2644 } 2645 2646 /* 2647 * sohasoutofband(): protocol notifies socket layer of the arrival of new 2648 * out-of-band data, which will then notify socket consumers. 2649 */ 2650 void 2651 sohasoutofband(struct socket *so) 2652 { 2653 2654 if (so->so_sigio != NULL) 2655 pgsigio(&so->so_sigio, SIGURG, 0); 2656 selwakeuppri(&so->so_rcv.sb_sel, PSOCK); 2657 } 2658 2659 int 2660 sopoll(struct socket *so, int events, struct ucred *active_cred, 2661 struct thread *td) 2662 { 2663 2664 return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred, 2665 td)); 2666 } 2667 2668 int 2669 sopoll_generic(struct socket *so, int events, struct ucred *active_cred, 2670 struct thread *td) 2671 { 2672 int revents = 0; 2673 2674 SOCKBUF_LOCK(&so->so_snd); 2675 SOCKBUF_LOCK(&so->so_rcv); 2676 if (events & (POLLIN | POLLRDNORM)) 2677 if (soreadable(so)) 2678 revents |= events & (POLLIN | POLLRDNORM); 2679 2680 if (events & POLLINIGNEOF) 2681 if (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat || 2682 !TAILQ_EMPTY(&so->so_comp) || so->so_error) 2683 revents |= POLLINIGNEOF; 2684 2685 if (events & (POLLOUT | POLLWRNORM)) 2686 if (sowriteable(so)) 2687 revents |= events & (POLLOUT | POLLWRNORM); 2688 2689 if (events & (POLLPRI | POLLRDBAND)) 2690 if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK)) 2691 revents |= events & (POLLPRI | POLLRDBAND); 2692 2693 if (revents == 0) { 2694 if (events & 2695 (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | 2696 POLLRDBAND)) { 2697 selrecord(td, &so->so_rcv.sb_sel); 2698 so->so_rcv.sb_flags |= SB_SEL; 2699 } 2700 2701 if (events & (POLLOUT | POLLWRNORM)) { 2702 selrecord(td, &so->so_snd.sb_sel); 2703 so->so_snd.sb_flags |= SB_SEL; 2704 } 2705 } 2706 2707 SOCKBUF_UNLOCK(&so->so_rcv); 2708 SOCKBUF_UNLOCK(&so->so_snd); 2709 return (revents); 2710 } 2711 2712 int 2713 soo_kqfilter(struct file *fp, struct knote *kn) 2714 { 2715 struct socket *so = kn->kn_fp->f_data; 2716 struct sockbuf *sb; 2717 2718 switch (kn->kn_filter) { 2719 case EVFILT_READ: 2720 if (so->so_options & SO_ACCEPTCONN) 2721 kn->kn_fop = &solisten_filtops; 2722 else 2723 kn->kn_fop = &soread_filtops; 2724 sb = &so->so_rcv; 2725 break; 2726 case EVFILT_WRITE: 2727 kn->kn_fop = &sowrite_filtops; 2728 sb = &so->so_snd; 2729 break; 2730 default: 2731 return (EINVAL); 2732 } 2733 2734 SOCKBUF_LOCK(sb); 2735 knlist_add(&sb->sb_sel.si_note, kn, 1); 2736 sb->sb_flags |= SB_KNOTE; 2737 SOCKBUF_UNLOCK(sb); 2738 return (0); 2739 } 2740 2741 /* 2742 * Some routines that return EOPNOTSUPP for entry points that are not 2743 * supported by a protocol. Fill in as needed. 2744 */ 2745 int 2746 pru_accept_notsupp(struct socket *so, struct sockaddr **nam) 2747 { 2748 2749 return EOPNOTSUPP; 2750 } 2751 2752 int 2753 pru_attach_notsupp(struct socket *so, int proto, struct thread *td) 2754 { 2755 2756 return EOPNOTSUPP; 2757 } 2758 2759 int 2760 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 2761 { 2762 2763 return EOPNOTSUPP; 2764 } 2765 2766 int 2767 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td) 2768 { 2769 2770 return EOPNOTSUPP; 2771 } 2772 2773 int 2774 pru_connect2_notsupp(struct socket *so1, struct socket *so2) 2775 { 2776 2777 return EOPNOTSUPP; 2778 } 2779 2780 int 2781 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, 2782 struct ifnet *ifp, struct thread *td) 2783 { 2784 2785 return EOPNOTSUPP; 2786 } 2787 2788 int 2789 pru_disconnect_notsupp(struct socket *so) 2790 { 2791 2792 return EOPNOTSUPP; 2793 } 2794 2795 int 2796 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td) 2797 { 2798 2799 return EOPNOTSUPP; 2800 } 2801 2802 int 2803 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam) 2804 { 2805 2806 return EOPNOTSUPP; 2807 } 2808 2809 int 2810 pru_rcvd_notsupp(struct socket *so, int flags) 2811 { 2812 2813 return EOPNOTSUPP; 2814 } 2815 2816 int 2817 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) 2818 { 2819 2820 return EOPNOTSUPP; 2821 } 2822 2823 int 2824 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m, 2825 struct sockaddr *addr, struct mbuf *control, struct thread *td) 2826 { 2827 2828 return EOPNOTSUPP; 2829 } 2830 2831 /* 2832 * This isn't really a ``null'' operation, but it's the default one and 2833 * doesn't do anything destructive. 2834 */ 2835 int 2836 pru_sense_null(struct socket *so, struct stat *sb) 2837 { 2838 2839 sb->st_blksize = so->so_snd.sb_hiwat; 2840 return 0; 2841 } 2842 2843 int 2844 pru_shutdown_notsupp(struct socket *so) 2845 { 2846 2847 return EOPNOTSUPP; 2848 } 2849 2850 int 2851 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam) 2852 { 2853 2854 return EOPNOTSUPP; 2855 } 2856 2857 int 2858 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio, 2859 struct mbuf *top, struct mbuf *control, int flags, struct thread *td) 2860 { 2861 2862 return EOPNOTSUPP; 2863 } 2864 2865 int 2866 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr, 2867 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 2868 { 2869 2870 return EOPNOTSUPP; 2871 } 2872 2873 int 2874 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred, 2875 struct thread *td) 2876 { 2877 2878 return EOPNOTSUPP; 2879 } 2880 2881 static void 2882 filt_sordetach(struct knote *kn) 2883 { 2884 struct socket *so = kn->kn_fp->f_data; 2885 2886 SOCKBUF_LOCK(&so->so_rcv); 2887 knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1); 2888 if (knlist_empty(&so->so_rcv.sb_sel.si_note)) 2889 so->so_rcv.sb_flags &= ~SB_KNOTE; 2890 SOCKBUF_UNLOCK(&so->so_rcv); 2891 } 2892 2893 /*ARGSUSED*/ 2894 static int 2895 filt_soread(struct knote *kn, long hint) 2896 { 2897 struct socket *so; 2898 2899 so = kn->kn_fp->f_data; 2900 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 2901 2902 kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl; 2903 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { 2904 kn->kn_flags |= EV_EOF; 2905 kn->kn_fflags = so->so_error; 2906 return (1); 2907 } else if (so->so_error) /* temporary udp error */ 2908 return (1); 2909 else if (kn->kn_sfflags & NOTE_LOWAT) 2910 return (kn->kn_data >= kn->kn_sdata); 2911 else 2912 return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat); 2913 } 2914 2915 static void 2916 filt_sowdetach(struct knote *kn) 2917 { 2918 struct socket *so = kn->kn_fp->f_data; 2919 2920 SOCKBUF_LOCK(&so->so_snd); 2921 knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1); 2922 if (knlist_empty(&so->so_snd.sb_sel.si_note)) 2923 so->so_snd.sb_flags &= ~SB_KNOTE; 2924 SOCKBUF_UNLOCK(&so->so_snd); 2925 } 2926 2927 /*ARGSUSED*/ 2928 static int 2929 filt_sowrite(struct knote *kn, long hint) 2930 { 2931 struct socket *so; 2932 2933 so = kn->kn_fp->f_data; 2934 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2935 kn->kn_data = sbspace(&so->so_snd); 2936 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2937 kn->kn_flags |= EV_EOF; 2938 kn->kn_fflags = so->so_error; 2939 return (1); 2940 } else if (so->so_error) /* temporary udp error */ 2941 return (1); 2942 else if (((so->so_state & SS_ISCONNECTED) == 0) && 2943 (so->so_proto->pr_flags & PR_CONNREQUIRED)) 2944 return (0); 2945 else if (kn->kn_sfflags & NOTE_LOWAT) 2946 return (kn->kn_data >= kn->kn_sdata); 2947 else 2948 return (kn->kn_data >= so->so_snd.sb_lowat); 2949 } 2950 2951 /*ARGSUSED*/ 2952 static int 2953 filt_solisten(struct knote *kn, long hint) 2954 { 2955 struct socket *so = kn->kn_fp->f_data; 2956 2957 kn->kn_data = so->so_qlen; 2958 return (! TAILQ_EMPTY(&so->so_comp)); 2959 } 2960 2961 int 2962 socheckuid(struct socket *so, uid_t uid) 2963 { 2964 2965 if (so == NULL) 2966 return (EPERM); 2967 if (so->so_cred->cr_uid != uid) 2968 return (EPERM); 2969 return (0); 2970 } 2971 2972 static int 2973 sysctl_somaxconn(SYSCTL_HANDLER_ARGS) 2974 { 2975 int error; 2976 int val; 2977 2978 val = somaxconn; 2979 error = sysctl_handle_int(oidp, &val, 0, req); 2980 if (error || !req->newptr ) 2981 return (error); 2982 2983 if (val < 1 || val > USHRT_MAX) 2984 return (EINVAL); 2985 2986 somaxconn = val; 2987 return (0); 2988 } 2989 2990 /* 2991 * These functions are used by protocols to notify the socket layer (and its 2992 * consumers) of state changes in the sockets driven by protocol-side events. 2993 */ 2994 2995 /* 2996 * Procedures to manipulate state flags of socket and do appropriate wakeups. 2997 * 2998 * Normal sequence from the active (originating) side is that 2999 * soisconnecting() is called during processing of connect() call, resulting 3000 * in an eventual call to soisconnected() if/when the connection is 3001 * established. When the connection is torn down soisdisconnecting() is 3002 * called during processing of disconnect() call, and soisdisconnected() is 3003 * called when the connection to the peer is totally severed. The semantics 3004 * of these routines are such that connectionless protocols can call 3005 * soisconnected() and soisdisconnected() only, bypassing the in-progress 3006 * calls when setting up a ``connection'' takes no time. 3007 * 3008 * From the passive side, a socket is created with two queues of sockets: 3009 * so_incomp for connections in progress and so_comp for connections already 3010 * made and awaiting user acceptance. As a protocol is preparing incoming 3011 * connections, it creates a socket structure queued on so_incomp by calling 3012 * sonewconn(). When the connection is established, soisconnected() is 3013 * called, and transfers the socket structure to so_comp, making it available 3014 * to accept(). 3015 * 3016 * If a socket is closed with sockets on either so_incomp or so_comp, these 3017 * sockets are dropped. 3018 * 3019 * If higher-level protocols are implemented in the kernel, the wakeups done 3020 * here will sometimes cause software-interrupt process scheduling. 3021 */ 3022 void 3023 soisconnecting(struct socket *so) 3024 { 3025 3026 SOCK_LOCK(so); 3027 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 3028 so->so_state |= SS_ISCONNECTING; 3029 SOCK_UNLOCK(so); 3030 } 3031 3032 void 3033 soisconnected(struct socket *so) 3034 { 3035 struct socket *head; 3036 3037 ACCEPT_LOCK(); 3038 SOCK_LOCK(so); 3039 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 3040 so->so_state |= SS_ISCONNECTED; 3041 head = so->so_head; 3042 if (head != NULL && (so->so_qstate & SQ_INCOMP)) { 3043 if ((so->so_options & SO_ACCEPTFILTER) == 0) { 3044 SOCK_UNLOCK(so); 3045 TAILQ_REMOVE(&head->so_incomp, so, so_list); 3046 head->so_incqlen--; 3047 so->so_qstate &= ~SQ_INCOMP; 3048 TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); 3049 head->so_qlen++; 3050 so->so_qstate |= SQ_COMP; 3051 ACCEPT_UNLOCK(); 3052 sorwakeup(head); 3053 wakeup_one(&head->so_timeo); 3054 } else { 3055 ACCEPT_UNLOCK(); 3056 so->so_upcall = 3057 head->so_accf->so_accept_filter->accf_callback; 3058 so->so_upcallarg = head->so_accf->so_accept_filter_arg; 3059 so->so_rcv.sb_flags |= SB_UPCALL; 3060 so->so_options &= ~SO_ACCEPTFILTER; 3061 SOCK_UNLOCK(so); 3062 so->so_upcall(so, so->so_upcallarg, M_DONTWAIT); 3063 } 3064 return; 3065 } 3066 SOCK_UNLOCK(so); 3067 ACCEPT_UNLOCK(); 3068 wakeup(&so->so_timeo); 3069 sorwakeup(so); 3070 sowwakeup(so); 3071 } 3072 3073 void 3074 soisdisconnecting(struct socket *so) 3075 { 3076 3077 /* 3078 * Note: This code assumes that SOCK_LOCK(so) and 3079 * SOCKBUF_LOCK(&so->so_rcv) are the same. 3080 */ 3081 SOCKBUF_LOCK(&so->so_rcv); 3082 so->so_state &= ~SS_ISCONNECTING; 3083 so->so_state |= SS_ISDISCONNECTING; 3084 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 3085 sorwakeup_locked(so); 3086 SOCKBUF_LOCK(&so->so_snd); 3087 so->so_snd.sb_state |= SBS_CANTSENDMORE; 3088 sowwakeup_locked(so); 3089 wakeup(&so->so_timeo); 3090 } 3091 3092 void 3093 soisdisconnected(struct socket *so) 3094 { 3095 3096 /* 3097 * Note: This code assumes that SOCK_LOCK(so) and 3098 * SOCKBUF_LOCK(&so->so_rcv) are the same. 3099 */ 3100 SOCKBUF_LOCK(&so->so_rcv); 3101 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 3102 so->so_state |= SS_ISDISCONNECTED; 3103 so->so_rcv.sb_state |= SBS_CANTRCVMORE; 3104 sorwakeup_locked(so); 3105 SOCKBUF_LOCK(&so->so_snd); 3106 so->so_snd.sb_state |= SBS_CANTSENDMORE; 3107 sbdrop_locked(&so->so_snd, so->so_snd.sb_cc); 3108 sowwakeup_locked(so); 3109 wakeup(&so->so_timeo); 3110 } 3111 3112 /* 3113 * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. 3114 */ 3115 struct sockaddr * 3116 sodupsockaddr(const struct sockaddr *sa, int mflags) 3117 { 3118 struct sockaddr *sa2; 3119 3120 sa2 = malloc(sa->sa_len, M_SONAME, mflags); 3121 if (sa2) 3122 bcopy(sa, sa2, sa->sa_len); 3123 return sa2; 3124 } 3125 3126 /* 3127 * Create an external-format (``xsocket'') structure using the information in 3128 * the kernel-format socket structure pointed to by so. This is done to 3129 * reduce the spew of irrelevant information over this interface, to isolate 3130 * user code from changes in the kernel structure, and potentially to provide 3131 * information-hiding if we decide that some of this information should be 3132 * hidden from users. 3133 */ 3134 void 3135 sotoxsocket(struct socket *so, struct xsocket *xso) 3136 { 3137 3138 xso->xso_len = sizeof *xso; 3139 xso->xso_so = so; 3140 xso->so_type = so->so_type; 3141 xso->so_options = so->so_options; 3142 xso->so_linger = so->so_linger; 3143 xso->so_state = so->so_state; 3144 xso->so_pcb = so->so_pcb; 3145 xso->xso_protocol = so->so_proto->pr_protocol; 3146 xso->xso_family = so->so_proto->pr_domain->dom_family; 3147 xso->so_qlen = so->so_qlen; 3148 xso->so_incqlen = so->so_incqlen; 3149 xso->so_qlimit = so->so_qlimit; 3150 xso->so_timeo = so->so_timeo; 3151 xso->so_error = so->so_error; 3152 xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; 3153 xso->so_oobmark = so->so_oobmark; 3154 sbtoxsockbuf(&so->so_snd, &xso->so_snd); 3155 sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); 3156 xso->so_uid = so->so_cred->cr_uid; 3157 } 3158 3159 3160 /* 3161 * Socket accessor functions to provide external consumers with 3162 * a safe interface to socket state 3163 * 3164 */ 3165 3166 void 3167 so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg) 3168 { 3169 3170 TAILQ_FOREACH(so, &so->so_comp, so_list) 3171 func(so, arg); 3172 } 3173 3174 struct sockbuf * 3175 so_sockbuf_rcv(struct socket *so) 3176 { 3177 3178 return (&so->so_rcv); 3179 } 3180 3181 struct sockbuf * 3182 so_sockbuf_snd(struct socket *so) 3183 { 3184 3185 return (&so->so_snd); 3186 } 3187 3188 int 3189 so_state_get(const struct socket *so) 3190 { 3191 3192 return (so->so_state); 3193 } 3194 3195 void 3196 so_state_set(struct socket *so, int val) 3197 { 3198 3199 so->so_state = val; 3200 } 3201 3202 int 3203 so_options_get(const struct socket *so) 3204 { 3205 3206 return (so->so_options); 3207 } 3208 3209 void 3210 so_options_set(struct socket *so, int val) 3211 { 3212 3213 so->so_options = val; 3214 } 3215 3216 int 3217 so_error_get(const struct socket *so) 3218 { 3219 3220 return (so->so_error); 3221 } 3222 3223 void 3224 so_error_set(struct socket *so, int val) 3225 { 3226 3227 so->so_error = val; 3228 } 3229 3230 int 3231 so_linger_get(const struct socket *so) 3232 { 3233 3234 return (so->so_linger); 3235 } 3236 3237 void 3238 so_linger_set(struct socket *so, int val) 3239 { 3240 3241 so->so_linger = val; 3242 } 3243 3244 struct protosw * 3245 so_protosw_get(const struct socket *so) 3246 { 3247 3248 return (so->so_proto); 3249 } 3250 3251 void 3252 so_protosw_set(struct socket *so, struct protosw *val) 3253 { 3254 3255 so->so_proto = val; 3256 } 3257 3258 void 3259 so_sorwakeup(struct socket *so) 3260 { 3261 3262 sorwakeup(so); 3263 } 3264 3265 void 3266 so_sowwakeup(struct socket *so) 3267 { 3268 3269 sowwakeup(so); 3270 } 3271 3272 void 3273 so_sorwakeup_locked(struct socket *so) 3274 { 3275 3276 sorwakeup_locked(so); 3277 } 3278 3279 void 3280 so_sowwakeup_locked(struct socket *so) 3281 { 3282 3283 sowwakeup_locked(so); 3284 } 3285 3286 void 3287 so_lock(struct socket *so) 3288 { 3289 SOCK_LOCK(so); 3290 } 3291 3292 void 3293 so_unlock(struct socket *so) 3294 { 3295 SOCK_UNLOCK(so); 3296 } 3297