19454b2d8SWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 32677b542eSDavid E. O'Brien #include <sys/cdefs.h> 33677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 34677b542eSDavid E. O'Brien 355b86eac4SJesper Skriver #include "opt_param.h" 36335654d7SRobert Watson 37df8bae1dSRodney W. Grimes #include <sys/param.h> 38960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */ 39ff5c09daSGarrett Wollman #include <sys/kernel.h> 40fb919e4dSMark Murray #include <sys/lock.h> 41df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 42960ed29cSSeigo Tanimura #include <sys/mutex.h> 43fb919e4dSMark Murray #include <sys/proc.h> 44df8bae1dSRodney W. Grimes #include <sys/protosw.h> 452f9a2132SBrian Feldman #include <sys/resourcevar.h> 46960ed29cSSeigo Tanimura #include <sys/signalvar.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 48df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 49ff5c09daSGarrett Wollman #include <sys/sysctl.h> 5026f9a767SRodney W. Grimes 51f14cce87SRobert Watson /* 52f14cce87SRobert Watson * Function pointer set by the AIO routines so that the socket buffer code 53f14cce87SRobert Watson * can call back into the AIO module if it is loaded. 54f14cce87SRobert Watson */ 5521d56e9cSAlfred Perlstein void (*aio_swake)(struct socket *, struct sockbuf *); 5621d56e9cSAlfred Perlstein 57df8bae1dSRodney W. Grimes /* 58f14cce87SRobert Watson * Primitive routines for operating on socket buffers 59df8bae1dSRodney W. Grimes */ 60df8bae1dSRodney W. Grimes 6179cb7eb4SDavid Greenman u_long sb_max = SB_MAX; 6251da11a2SMark Murray static u_long sb_max_adj = 6379cb7eb4SDavid Greenman SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 64df8bae1dSRodney W. Grimes 654b29bc4fSGarrett Wollman static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 664b29bc4fSGarrett Wollman 67eaa6dfbcSRobert Watson static void sbdrop_internal(register struct sockbuf *sb, register int len); 68eaa6dfbcSRobert Watson static void sbflush_internal(register struct sockbuf *sb); 69eaa6dfbcSRobert Watson static void sbrelease_internal(struct sockbuf *sb, struct socket *so); 70eaa6dfbcSRobert Watson 71df8bae1dSRodney W. Grimes /* 72df8bae1dSRodney W. Grimes * Socantsendmore indicates that no more data will be sent on the 73df8bae1dSRodney W. Grimes * socket; it would normally be applied to a socket when the user 74df8bae1dSRodney W. Grimes * informs the system that no more data is to be sent, by the protocol 75df8bae1dSRodney W. Grimes * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 76df8bae1dSRodney W. Grimes * will be received, and will normally be applied to the socket by a 77df8bae1dSRodney W. Grimes * protocol when it detects that the peer will send no more data. 78df8bae1dSRodney W. Grimes * Data queued for reading in the socket may yet be read. 79df8bae1dSRodney W. Grimes */ 80a34b7046SRobert Watson void 81a34b7046SRobert Watson socantsendmore_locked(so) 82a34b7046SRobert Watson struct socket *so; 83a34b7046SRobert Watson { 84a34b7046SRobert Watson 85a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_snd); 86a34b7046SRobert Watson 87a34b7046SRobert Watson so->so_snd.sb_state |= SBS_CANTSENDMORE; 88a34b7046SRobert Watson sowwakeup_locked(so); 89a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 90a34b7046SRobert Watson } 91df8bae1dSRodney W. Grimes 9226f9a767SRodney W. Grimes void 93df8bae1dSRodney W. Grimes socantsendmore(so) 94df8bae1dSRodney W. Grimes struct socket *so; 95df8bae1dSRodney W. Grimes { 96df8bae1dSRodney W. Grimes 97a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_snd); 98a34b7046SRobert Watson socantsendmore_locked(so); 99a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 100a34b7046SRobert Watson } 101a34b7046SRobert Watson 102a34b7046SRobert Watson void 103a34b7046SRobert Watson socantrcvmore_locked(so) 104a34b7046SRobert Watson struct socket *so; 105a34b7046SRobert Watson { 106a34b7046SRobert Watson 107a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_rcv); 108a34b7046SRobert Watson 109a34b7046SRobert Watson so->so_rcv.sb_state |= SBS_CANTRCVMORE; 110a34b7046SRobert Watson sorwakeup_locked(so); 111a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 112df8bae1dSRodney W. Grimes } 113df8bae1dSRodney W. Grimes 11426f9a767SRodney W. Grimes void 115df8bae1dSRodney W. Grimes socantrcvmore(so) 116df8bae1dSRodney W. Grimes struct socket *so; 117df8bae1dSRodney W. Grimes { 118df8bae1dSRodney W. Grimes 119a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 120a34b7046SRobert Watson socantrcvmore_locked(so); 121a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 122df8bae1dSRodney W. Grimes } 123df8bae1dSRodney W. Grimes 124df8bae1dSRodney W. Grimes /* 125df8bae1dSRodney W. Grimes * Wait for data to arrive at/drain from a socket buffer. 126df8bae1dSRodney W. Grimes */ 12726f9a767SRodney W. Grimes int 128df8bae1dSRodney W. Grimes sbwait(sb) 129df8bae1dSRodney W. Grimes struct sockbuf *sb; 130df8bae1dSRodney W. Grimes { 131df8bae1dSRodney W. Grimes 13231f555a1SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 13331f555a1SRobert Watson 134df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WAIT; 13531f555a1SRobert Watson return (msleep(&sb->sb_cc, &sb->sb_mtx, 13647daf5d5SBruce Evans (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 137df8bae1dSRodney W. Grimes sb->sb_timeo)); 138df8bae1dSRodney W. Grimes } 139df8bae1dSRodney W. Grimes 140df8bae1dSRodney W. Grimes /* 141df8bae1dSRodney W. Grimes * Lock a sockbuf already known to be locked; 142df8bae1dSRodney W. Grimes * return any error returned from sleep (EINTR). 143df8bae1dSRodney W. Grimes */ 14426f9a767SRodney W. Grimes int 145df8bae1dSRodney W. Grimes sb_lock(sb) 146df8bae1dSRodney W. Grimes register struct sockbuf *sb; 147df8bae1dSRodney W. Grimes { 148df8bae1dSRodney W. Grimes int error; 149df8bae1dSRodney W. Grimes 15031f555a1SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 15131f555a1SRobert Watson 152df8bae1dSRodney W. Grimes while (sb->sb_flags & SB_LOCK) { 153df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WANT; 15431f555a1SRobert Watson error = msleep(&sb->sb_flags, &sb->sb_mtx, 155df8bae1dSRodney W. Grimes (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 15647daf5d5SBruce Evans "sblock", 0); 157797f2d22SPoul-Henning Kamp if (error) 158df8bae1dSRodney W. Grimes return (error); 159df8bae1dSRodney W. Grimes } 160df8bae1dSRodney W. Grimes sb->sb_flags |= SB_LOCK; 161df8bae1dSRodney W. Grimes return (0); 162df8bae1dSRodney W. Grimes } 163df8bae1dSRodney W. Grimes 164df8bae1dSRodney W. Grimes /* 165a34b7046SRobert Watson * Wakeup processes waiting on a socket buffer. Do asynchronous 166a34b7046SRobert Watson * notification via SIGIO if the socket has the SS_ASYNC flag set. 167a34b7046SRobert Watson * 168a34b7046SRobert Watson * Called with the socket buffer lock held; will release the lock by the end 169a34b7046SRobert Watson * of the function. This allows the caller to acquire the socket buffer lock 170a34b7046SRobert Watson * while testing for the need for various sorts of wakeup and hold it through 171a34b7046SRobert Watson * to the point where it's no longer required. We currently hold the lock 172a34b7046SRobert Watson * through calls out to other subsystems (with the exception of kqueue), and 173a34b7046SRobert Watson * then release it to avoid lock order issues. It's not clear that's 174a34b7046SRobert Watson * correct. 175df8bae1dSRodney W. Grimes */ 17626f9a767SRodney W. Grimes void 177df8bae1dSRodney W. Grimes sowakeup(so, sb) 178df8bae1dSRodney W. Grimes register struct socket *so; 179df8bae1dSRodney W. Grimes register struct sockbuf *sb; 180df8bae1dSRodney W. Grimes { 181d48d4b25SSeigo Tanimura 182a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 183a34b7046SRobert Watson 184512824f8SSeigo Tanimura selwakeuppri(&sb->sb_sel, PSOCK); 185df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_SEL; 186df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_WAIT) { 187df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_WAIT; 18880208239SAlfred Perlstein wakeup(&sb->sb_cc); 189df8bae1dSRodney W. Grimes } 190ad3b9257SJohn-Mark Gurney KNOTE_LOCKED(&sb->sb_sel.si_note, 0); 191a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1924cc20ab1SSeigo Tanimura if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 193f1320723SAlfred Perlstein pgsigio(&so->so_sigio, SIGIO, 0); 1944cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_UPCALL) 195a163d034SWarner Losh (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 1964cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_AIO) 197bfbbc4aaSJason Evans aio_swake(so, sb); 198a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 199df8bae1dSRodney W. Grimes } 200df8bae1dSRodney W. Grimes 201df8bae1dSRodney W. Grimes /* 202df8bae1dSRodney W. Grimes * Socket buffer (struct sockbuf) utility routines. 203df8bae1dSRodney W. Grimes * 204df8bae1dSRodney W. Grimes * Each socket contains two socket buffers: one for sending data and 205df8bae1dSRodney W. Grimes * one for receiving data. Each buffer contains a queue of mbufs, 206df8bae1dSRodney W. Grimes * information about the number of mbufs and amount of data in the 207df8bae1dSRodney W. Grimes * queue, and other fields allowing select() statements and notification 208df8bae1dSRodney W. Grimes * on data availability to be implemented. 209df8bae1dSRodney W. Grimes * 210df8bae1dSRodney W. Grimes * Data stored in a socket buffer is maintained as a list of records. 211df8bae1dSRodney W. Grimes * Each record is a list of mbufs chained together with the m_next 212df8bae1dSRodney W. Grimes * field. Records are chained together with the m_nextpkt field. The upper 213df8bae1dSRodney W. Grimes * level routine soreceive() expects the following conventions to be 214df8bae1dSRodney W. Grimes * observed when placing information in the receive buffer: 215df8bae1dSRodney W. Grimes * 216df8bae1dSRodney W. Grimes * 1. If the protocol requires each message be preceded by the sender's 217df8bae1dSRodney W. Grimes * name, then a record containing that name must be present before 218df8bae1dSRodney W. Grimes * any associated data (mbuf's must be of type MT_SONAME). 219df8bae1dSRodney W. Grimes * 2. If the protocol supports the exchange of ``access rights'' (really 220df8bae1dSRodney W. Grimes * just additional data associated with the message), and there are 221df8bae1dSRodney W. Grimes * ``rights'' to be received, then a record containing this data 222df8bae1dSRodney W. Grimes * should be present (mbuf's must be of type MT_RIGHTS). 223df8bae1dSRodney W. Grimes * 3. If a name or rights record exists, then it must be followed by 224df8bae1dSRodney W. Grimes * a data record, perhaps of zero length. 225df8bae1dSRodney W. Grimes * 226df8bae1dSRodney W. Grimes * Before using a new socket structure it is first necessary to reserve 227df8bae1dSRodney W. Grimes * buffer space to the socket, by calling sbreserve(). This should commit 228df8bae1dSRodney W. Grimes * some of the available buffer space in the system buffer pool for the 229df8bae1dSRodney W. Grimes * socket (currently, it does nothing but enforce limits). The space 230df8bae1dSRodney W. Grimes * should be released by calling sbrelease() when the socket is destroyed. 231df8bae1dSRodney W. Grimes */ 232df8bae1dSRodney W. Grimes 23326f9a767SRodney W. Grimes int 234df8bae1dSRodney W. Grimes soreserve(so, sndcc, rcvcc) 235df8bae1dSRodney W. Grimes register struct socket *so; 236df8bae1dSRodney W. Grimes u_long sndcc, rcvcc; 237df8bae1dSRodney W. Grimes { 238b40ce416SJulian Elischer struct thread *td = curthread; 239df8bae1dSRodney W. Grimes 2403f11a2f3SRobert Watson SOCKBUF_LOCK(&so->so_snd); 2419535efc0SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 2423f11a2f3SRobert Watson if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 2433f11a2f3SRobert Watson goto bad; 2443f11a2f3SRobert Watson if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 2453f11a2f3SRobert Watson goto bad2; 246df8bae1dSRodney W. Grimes if (so->so_rcv.sb_lowat == 0) 247df8bae1dSRodney W. Grimes so->so_rcv.sb_lowat = 1; 248df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat == 0) 249df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = MCLBYTES; 250df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 251df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 2523f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 2539535efc0SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 254df8bae1dSRodney W. Grimes return (0); 255df8bae1dSRodney W. Grimes bad2: 2563f11a2f3SRobert Watson sbrelease_locked(&so->so_snd, so); 257df8bae1dSRodney W. Grimes bad: 2583f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 2593f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 260df8bae1dSRodney W. Grimes return (ENOBUFS); 261df8bae1dSRodney W. Grimes } 262df8bae1dSRodney W. Grimes 26379cb7eb4SDavid Greenman static int 26479cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 26579cb7eb4SDavid Greenman { 26679cb7eb4SDavid Greenman int error = 0; 26779cb7eb4SDavid Greenman u_long old_sb_max = sb_max; 26879cb7eb4SDavid Greenman 2691b978d45SHartmut Brandt error = SYSCTL_OUT(req, arg1, sizeof(u_long)); 27079cb7eb4SDavid Greenman if (error || !req->newptr) 27179cb7eb4SDavid Greenman return (error); 2721b978d45SHartmut Brandt error = SYSCTL_IN(req, arg1, sizeof(u_long)); 27379cb7eb4SDavid Greenman if (error) 27479cb7eb4SDavid Greenman return (error); 27579cb7eb4SDavid Greenman if (sb_max < MSIZE + MCLBYTES) { 27679cb7eb4SDavid Greenman sb_max = old_sb_max; 27779cb7eb4SDavid Greenman return (EINVAL); 27879cb7eb4SDavid Greenman } 27979cb7eb4SDavid Greenman sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 28079cb7eb4SDavid Greenman return (0); 28179cb7eb4SDavid Greenman } 28279cb7eb4SDavid Greenman 283df8bae1dSRodney W. Grimes /* 284df8bae1dSRodney W. Grimes * Allot mbufs to a sockbuf. 285df8bae1dSRodney W. Grimes * Attempt to scale mbmax so that mbcnt doesn't become limiting 286df8bae1dSRodney W. Grimes * if buffering efficiency is near the normal case. 287df8bae1dSRodney W. Grimes */ 28826f9a767SRodney W. Grimes int 2893f11a2f3SRobert Watson sbreserve_locked(sb, cc, so, td) 290df8bae1dSRodney W. Grimes struct sockbuf *sb; 291df8bae1dSRodney W. Grimes u_long cc; 292ecf72308SBrian Feldman struct socket *so; 293b40ce416SJulian Elischer struct thread *td; 294df8bae1dSRodney W. Grimes { 29591d5354aSJohn Baldwin rlim_t sbsize_limit; 296ecf72308SBrian Feldman 2973f11a2f3SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 2983f11a2f3SRobert Watson 299ecf72308SBrian Feldman /* 300b40ce416SJulian Elischer * td will only be NULL when we're in an interrupt 301ecf72308SBrian Feldman * (e.g. in tcp_input()) 302ecf72308SBrian Feldman */ 30379cb7eb4SDavid Greenman if (cc > sb_max_adj) 304df8bae1dSRodney W. Grimes return (0); 30591d5354aSJohn Baldwin if (td != NULL) { 30691d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 30791d5354aSJohn Baldwin sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE); 30891d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 30991d5354aSJohn Baldwin } else 31091d5354aSJohn Baldwin sbsize_limit = RLIM_INFINITY; 311f535380cSDon Lewis if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 31291d5354aSJohn Baldwin sbsize_limit)) 313ecf72308SBrian Feldman return (0); 3144b29bc4fSGarrett Wollman sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 315df8bae1dSRodney W. Grimes if (sb->sb_lowat > sb->sb_hiwat) 316df8bae1dSRodney W. Grimes sb->sb_lowat = sb->sb_hiwat; 317df8bae1dSRodney W. Grimes return (1); 318df8bae1dSRodney W. Grimes } 319df8bae1dSRodney W. Grimes 3203f11a2f3SRobert Watson int 3213f11a2f3SRobert Watson sbreserve(sb, cc, so, td) 3223f11a2f3SRobert Watson struct sockbuf *sb; 3233f11a2f3SRobert Watson u_long cc; 3243f11a2f3SRobert Watson struct socket *so; 3253f11a2f3SRobert Watson struct thread *td; 3263f11a2f3SRobert Watson { 3273f11a2f3SRobert Watson int error; 3283f11a2f3SRobert Watson 3293f11a2f3SRobert Watson SOCKBUF_LOCK(sb); 3303f11a2f3SRobert Watson error = sbreserve_locked(sb, cc, so, td); 3313f11a2f3SRobert Watson SOCKBUF_UNLOCK(sb); 3323f11a2f3SRobert Watson return (error); 3333f11a2f3SRobert Watson } 3343f11a2f3SRobert Watson 335df8bae1dSRodney W. Grimes /* 336df8bae1dSRodney W. Grimes * Free mbufs held by a socket, and reserved mbuf space. 337df8bae1dSRodney W. Grimes */ 338eaa6dfbcSRobert Watson static void 339eaa6dfbcSRobert Watson sbrelease_internal(sb, so) 340eaa6dfbcSRobert Watson struct sockbuf *sb; 341eaa6dfbcSRobert Watson struct socket *so; 342eaa6dfbcSRobert Watson { 343eaa6dfbcSRobert Watson 344eaa6dfbcSRobert Watson sbflush_internal(sb); 345eaa6dfbcSRobert Watson (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 346eaa6dfbcSRobert Watson RLIM_INFINITY); 347eaa6dfbcSRobert Watson sb->sb_mbmax = 0; 348eaa6dfbcSRobert Watson } 349eaa6dfbcSRobert Watson 35026f9a767SRodney W. Grimes void 351a34b7046SRobert Watson sbrelease_locked(sb, so) 352df8bae1dSRodney W. Grimes struct sockbuf *sb; 353ecf72308SBrian Feldman struct socket *so; 354df8bae1dSRodney W. Grimes { 355df8bae1dSRodney W. Grimes 356a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 357a34b7046SRobert Watson 358eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 359df8bae1dSRodney W. Grimes } 360df8bae1dSRodney W. Grimes 361a34b7046SRobert Watson void 362a34b7046SRobert Watson sbrelease(sb, so) 363a34b7046SRobert Watson struct sockbuf *sb; 364a34b7046SRobert Watson struct socket *so; 365a34b7046SRobert Watson { 366a34b7046SRobert Watson 367a34b7046SRobert Watson SOCKBUF_LOCK(sb); 368a34b7046SRobert Watson sbrelease_locked(sb, so); 369a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 370a34b7046SRobert Watson } 371eaa6dfbcSRobert Watson 372eaa6dfbcSRobert Watson void 373eaa6dfbcSRobert Watson sbdestroy(sb, so) 374eaa6dfbcSRobert Watson struct sockbuf *sb; 375eaa6dfbcSRobert Watson struct socket *so; 376eaa6dfbcSRobert Watson { 377eaa6dfbcSRobert Watson 378eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 379eaa6dfbcSRobert Watson } 380eaa6dfbcSRobert Watson 381eaa6dfbcSRobert Watson 382df8bae1dSRodney W. Grimes /* 383df8bae1dSRodney W. Grimes * Routines to add and remove 384df8bae1dSRodney W. Grimes * data from an mbuf queue. 385df8bae1dSRodney W. Grimes * 386df8bae1dSRodney W. Grimes * The routines sbappend() or sbappendrecord() are normally called to 387df8bae1dSRodney W. Grimes * append new mbufs to a socket buffer, after checking that adequate 388df8bae1dSRodney W. Grimes * space is available, comparing the function sbspace() with the amount 389df8bae1dSRodney W. Grimes * of data to be added. sbappendrecord() differs from sbappend() in 390df8bae1dSRodney W. Grimes * that data supplied is treated as the beginning of a new record. 391df8bae1dSRodney W. Grimes * To place a sender's address, optional access rights, and data in a 392df8bae1dSRodney W. Grimes * socket receive buffer, sbappendaddr() should be used. To place 393df8bae1dSRodney W. Grimes * access rights and data in a socket receive buffer, sbappendrights() 394df8bae1dSRodney W. Grimes * should be used. In either case, the new data begins a new record. 395df8bae1dSRodney W. Grimes * Note that unlike sbappend() and sbappendrecord(), these routines check 396df8bae1dSRodney W. Grimes * for the caller that there will be enough space to store the data. 397df8bae1dSRodney W. Grimes * Each fails if there is not enough space, or if it cannot find mbufs 398df8bae1dSRodney W. Grimes * to store additional information in. 399df8bae1dSRodney W. Grimes * 400df8bae1dSRodney W. Grimes * Reliable protocols may use the socket send buffer to hold data 401df8bae1dSRodney W. Grimes * awaiting acknowledgement. Data is normally copied from a socket 402df8bae1dSRodney W. Grimes * send buffer in a protocol with m_copy for output to a peer, 403df8bae1dSRodney W. Grimes * and then removing the data from the socket buffer with sbdrop() 404df8bae1dSRodney W. Grimes * or sbdroprecord() when the data is acknowledged by the peer. 405df8bae1dSRodney W. Grimes */ 406df8bae1dSRodney W. Grimes 407395bb186SSam Leffler #ifdef SOCKBUF_DEBUG 408395bb186SSam Leffler void 409395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line) 410395bb186SSam Leffler { 411395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 412395bb186SSam Leffler 413a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 414a34b7046SRobert Watson 415395bb186SSam Leffler while (m && m->m_nextpkt) 416395bb186SSam Leffler m = m->m_nextpkt; 417395bb186SSam Leffler 418395bb186SSam Leffler if (m != sb->sb_lastrecord) { 419395bb186SSam Leffler printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 420395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_lastrecord, m); 421395bb186SSam Leffler printf("packet chain:\n"); 422395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 423395bb186SSam Leffler printf("\t%p\n", m); 424395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 425395bb186SSam Leffler } 426395bb186SSam Leffler } 427395bb186SSam Leffler 428395bb186SSam Leffler void 429395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line) 430395bb186SSam Leffler { 431395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 432395bb186SSam Leffler struct mbuf *n; 433395bb186SSam Leffler 434a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 435a34b7046SRobert Watson 436395bb186SSam Leffler while (m && m->m_nextpkt) 437395bb186SSam Leffler m = m->m_nextpkt; 438395bb186SSam Leffler 439395bb186SSam Leffler while (m && m->m_next) 440395bb186SSam Leffler m = m->m_next; 441395bb186SSam Leffler 442395bb186SSam Leffler if (m != sb->sb_mbtail) { 443395bb186SSam Leffler printf("%s: sb_mb %p sb_mbtail %p last %p\n", 444395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_mbtail, m); 445395bb186SSam Leffler printf("packet tree:\n"); 446395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 447395bb186SSam Leffler printf("\t"); 448395bb186SSam Leffler for (n = m; n != NULL; n = n->m_next) 449395bb186SSam Leffler printf("%p ", n); 450395bb186SSam Leffler printf("\n"); 451395bb186SSam Leffler } 452395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 453395bb186SSam Leffler } 454395bb186SSam Leffler } 455395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */ 456395bb186SSam Leffler 457395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do { \ 458a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); \ 459395bb186SSam Leffler if ((sb)->sb_lastrecord != NULL) \ 460395bb186SSam Leffler (sb)->sb_lastrecord->m_nextpkt = (m0); \ 461395bb186SSam Leffler else \ 462395bb186SSam Leffler (sb)->sb_mb = (m0); \ 463395bb186SSam Leffler (sb)->sb_lastrecord = (m0); \ 464395bb186SSam Leffler } while (/*CONSTCOND*/0) 465395bb186SSam Leffler 466df8bae1dSRodney W. Grimes /* 467df8bae1dSRodney W. Grimes * Append mbuf chain m to the last record in the 468df8bae1dSRodney W. Grimes * socket buffer sb. The additional space associated 469df8bae1dSRodney W. Grimes * the mbuf chain is recorded in sb. Empty mbufs are 470df8bae1dSRodney W. Grimes * discarded and mbufs are compacted where possible. 471df8bae1dSRodney W. Grimes */ 47226f9a767SRodney W. Grimes void 473a34b7046SRobert Watson sbappend_locked(sb, m) 474df8bae1dSRodney W. Grimes struct sockbuf *sb; 475df8bae1dSRodney W. Grimes struct mbuf *m; 476df8bae1dSRodney W. Grimes { 477df8bae1dSRodney W. Grimes register struct mbuf *n; 478df8bae1dSRodney W. Grimes 479a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 480a34b7046SRobert Watson 481df8bae1dSRodney W. Grimes if (m == 0) 482df8bae1dSRodney W. Grimes return; 483a34b7046SRobert Watson 484395bb186SSam Leffler SBLASTRECORDCHK(sb); 485797f2d22SPoul-Henning Kamp n = sb->sb_mb; 486797f2d22SPoul-Henning Kamp if (n) { 487df8bae1dSRodney W. Grimes while (n->m_nextpkt) 488df8bae1dSRodney W. Grimes n = n->m_nextpkt; 489df8bae1dSRodney W. Grimes do { 490df8bae1dSRodney W. Grimes if (n->m_flags & M_EOR) { 491a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 492df8bae1dSRodney W. Grimes return; 493df8bae1dSRodney W. Grimes } 494df8bae1dSRodney W. Grimes } while (n->m_next && (n = n->m_next)); 495395bb186SSam Leffler } else { 496395bb186SSam Leffler /* 497395bb186SSam Leffler * XXX Would like to simply use sb_mbtail here, but 498395bb186SSam Leffler * XXX I need to verify that I won't miss an EOR that 499395bb186SSam Leffler * XXX way. 500395bb186SSam Leffler */ 501395bb186SSam Leffler if ((n = sb->sb_lastrecord) != NULL) { 502395bb186SSam Leffler do { 503395bb186SSam Leffler if (n->m_flags & M_EOR) { 504a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 505395bb186SSam Leffler return; 506395bb186SSam Leffler } 507395bb186SSam Leffler } while (n->m_next && (n = n->m_next)); 508395bb186SSam Leffler } else { 509395bb186SSam Leffler /* 510395bb186SSam Leffler * If this is the first record in the socket buffer, 511395bb186SSam Leffler * it's also the last record. 512395bb186SSam Leffler */ 513395bb186SSam Leffler sb->sb_lastrecord = m; 514395bb186SSam Leffler } 515df8bae1dSRodney W. Grimes } 516df8bae1dSRodney W. Grimes sbcompress(sb, m, n); 517395bb186SSam Leffler SBLASTRECORDCHK(sb); 518395bb186SSam Leffler } 519395bb186SSam Leffler 520395bb186SSam Leffler /* 521a34b7046SRobert Watson * Append mbuf chain m to the last record in the 522a34b7046SRobert Watson * socket buffer sb. The additional space associated 523a34b7046SRobert Watson * the mbuf chain is recorded in sb. Empty mbufs are 524a34b7046SRobert Watson * discarded and mbufs are compacted where possible. 525a34b7046SRobert Watson */ 526a34b7046SRobert Watson void 527a34b7046SRobert Watson sbappend(sb, m) 528a34b7046SRobert Watson struct sockbuf *sb; 529a34b7046SRobert Watson struct mbuf *m; 530a34b7046SRobert Watson { 531a34b7046SRobert Watson 532a34b7046SRobert Watson SOCKBUF_LOCK(sb); 533a34b7046SRobert Watson sbappend_locked(sb, m); 534a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 535a34b7046SRobert Watson } 536a34b7046SRobert Watson 537a34b7046SRobert Watson /* 538395bb186SSam Leffler * This version of sbappend() should only be used when the caller 539395bb186SSam Leffler * absolutely knows that there will never be more than one record 540395bb186SSam Leffler * in the socket buffer, that is, a stream protocol (such as TCP). 541395bb186SSam Leffler */ 542395bb186SSam Leffler void 543a34b7046SRobert Watson sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) 544395bb186SSam Leffler { 545a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 546395bb186SSam Leffler 547395bb186SSam Leffler KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 548395bb186SSam Leffler KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 549395bb186SSam Leffler 550395bb186SSam Leffler SBLASTMBUFCHK(sb); 551395bb186SSam Leffler 552395bb186SSam Leffler sbcompress(sb, m, sb->sb_mbtail); 553395bb186SSam Leffler 554395bb186SSam Leffler sb->sb_lastrecord = sb->sb_mb; 555395bb186SSam Leffler SBLASTRECORDCHK(sb); 556df8bae1dSRodney W. Grimes } 557df8bae1dSRodney W. Grimes 558a34b7046SRobert Watson /* 559a34b7046SRobert Watson * This version of sbappend() should only be used when the caller 560a34b7046SRobert Watson * absolutely knows that there will never be more than one record 561a34b7046SRobert Watson * in the socket buffer, that is, a stream protocol (such as TCP). 562a34b7046SRobert Watson */ 563a34b7046SRobert Watson void 564a34b7046SRobert Watson sbappendstream(struct sockbuf *sb, struct mbuf *m) 565a34b7046SRobert Watson { 566a34b7046SRobert Watson 567a34b7046SRobert Watson SOCKBUF_LOCK(sb); 568a34b7046SRobert Watson sbappendstream_locked(sb, m); 569a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 570a34b7046SRobert Watson } 571a34b7046SRobert Watson 572df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG 57326f9a767SRodney W. Grimes void 574df8bae1dSRodney W. Grimes sbcheck(sb) 5757ed60de8SPoul-Henning Kamp struct sockbuf *sb; 576df8bae1dSRodney W. Grimes { 5777ed60de8SPoul-Henning Kamp struct mbuf *m; 5787ed60de8SPoul-Henning Kamp struct mbuf *n = 0; 5797ed60de8SPoul-Henning Kamp u_long len = 0, mbcnt = 0; 580df8bae1dSRodney W. Grimes 581a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 582a34b7046SRobert Watson 5830931333fSBill Fenner for (m = sb->sb_mb; m; m = n) { 5840931333fSBill Fenner n = m->m_nextpkt; 5850931333fSBill Fenner for (; m; m = m->m_next) { 586df8bae1dSRodney W. Grimes len += m->m_len; 587df8bae1dSRodney W. Grimes mbcnt += MSIZE; 588313861b8SJulian Elischer if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 589df8bae1dSRodney W. Grimes mbcnt += m->m_ext.ext_size; 5900931333fSBill Fenner } 591df8bae1dSRodney W. Grimes } 592df8bae1dSRodney W. Grimes if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 593395bb186SSam Leffler printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc, 594df8bae1dSRodney W. Grimes mbcnt, sb->sb_mbcnt); 595df8bae1dSRodney W. Grimes panic("sbcheck"); 596df8bae1dSRodney W. Grimes } 597df8bae1dSRodney W. Grimes } 598df8bae1dSRodney W. Grimes #endif 599df8bae1dSRodney W. Grimes 600df8bae1dSRodney W. Grimes /* 601df8bae1dSRodney W. Grimes * As above, except the mbuf chain 602df8bae1dSRodney W. Grimes * begins a new record. 603df8bae1dSRodney W. Grimes */ 60426f9a767SRodney W. Grimes void 605a34b7046SRobert Watson sbappendrecord_locked(sb, m0) 606df8bae1dSRodney W. Grimes register struct sockbuf *sb; 607df8bae1dSRodney W. Grimes register struct mbuf *m0; 608df8bae1dSRodney W. Grimes { 609df8bae1dSRodney W. Grimes register struct mbuf *m; 610df8bae1dSRodney W. Grimes 611a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 612a34b7046SRobert Watson 613df8bae1dSRodney W. Grimes if (m0 == 0) 614df8bae1dSRodney W. Grimes return; 615797f2d22SPoul-Henning Kamp m = sb->sb_mb; 616797f2d22SPoul-Henning Kamp if (m) 617df8bae1dSRodney W. Grimes while (m->m_nextpkt) 618df8bae1dSRodney W. Grimes m = m->m_nextpkt; 619df8bae1dSRodney W. Grimes /* 620df8bae1dSRodney W. Grimes * Put the first mbuf on the queue. 621df8bae1dSRodney W. Grimes * Note this permits zero length records. 622df8bae1dSRodney W. Grimes */ 623df8bae1dSRodney W. Grimes sballoc(sb, m0); 624395bb186SSam Leffler SBLASTRECORDCHK(sb); 625395bb186SSam Leffler SBLINKRECORD(sb, m0); 626df8bae1dSRodney W. Grimes if (m) 627df8bae1dSRodney W. Grimes m->m_nextpkt = m0; 628df8bae1dSRodney W. Grimes else 629df8bae1dSRodney W. Grimes sb->sb_mb = m0; 630df8bae1dSRodney W. Grimes m = m0->m_next; 631df8bae1dSRodney W. Grimes m0->m_next = 0; 632df8bae1dSRodney W. Grimes if (m && (m0->m_flags & M_EOR)) { 633df8bae1dSRodney W. Grimes m0->m_flags &= ~M_EOR; 634df8bae1dSRodney W. Grimes m->m_flags |= M_EOR; 635df8bae1dSRodney W. Grimes } 636df8bae1dSRodney W. Grimes sbcompress(sb, m, m0); 637df8bae1dSRodney W. Grimes } 638df8bae1dSRodney W. Grimes 639df8bae1dSRodney W. Grimes /* 640a34b7046SRobert Watson * As above, except the mbuf chain 641a34b7046SRobert Watson * begins a new record. 642a34b7046SRobert Watson */ 643a34b7046SRobert Watson void 644a34b7046SRobert Watson sbappendrecord(sb, m0) 645a34b7046SRobert Watson register struct sockbuf *sb; 646a34b7046SRobert Watson register struct mbuf *m0; 647a34b7046SRobert Watson { 648a34b7046SRobert Watson 649a34b7046SRobert Watson SOCKBUF_LOCK(sb); 650a34b7046SRobert Watson sbappendrecord_locked(sb, m0); 651a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 652a34b7046SRobert Watson } 653a34b7046SRobert Watson 654a34b7046SRobert Watson /* 655df8bae1dSRodney W. Grimes * Append address and data, and optionally, control (ancillary) data 656df8bae1dSRodney W. Grimes * to the receive queue of a socket. If present, 657df8bae1dSRodney W. Grimes * m0 must include a packet header with total length. 658df8bae1dSRodney W. Grimes * Returns 0 if no space in sockbuf or insufficient mbufs. 659df8bae1dSRodney W. Grimes */ 66026f9a767SRodney W. Grimes int 661a34b7046SRobert Watson sbappendaddr_locked(sb, asa, m0, control) 6627ed60de8SPoul-Henning Kamp struct sockbuf *sb; 663e7dd9a10SRobert Watson const struct sockaddr *asa; 664df8bae1dSRodney W. Grimes struct mbuf *m0, *control; 665df8bae1dSRodney W. Grimes { 666395bb186SSam Leffler struct mbuf *m, *n, *nlast; 667df8bae1dSRodney W. Grimes int space = asa->sa_len; 668df8bae1dSRodney W. Grimes 669a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 670a34b7046SRobert Watson 671df8bae1dSRodney W. Grimes if (m0 && (m0->m_flags & M_PKTHDR) == 0) 672a34b7046SRobert Watson panic("sbappendaddr_locked"); 673df8bae1dSRodney W. Grimes if (m0) 674df8bae1dSRodney W. Grimes space += m0->m_pkthdr.len; 6757ed60de8SPoul-Henning Kamp space += m_length(control, &n); 676a34b7046SRobert Watson 677df8bae1dSRodney W. Grimes if (space > sbspace(sb)) 678df8bae1dSRodney W. Grimes return (0); 679c43cad1aSScott Long #if MSIZE <= 256 680df8bae1dSRodney W. Grimes if (asa->sa_len > MLEN) 681df8bae1dSRodney W. Grimes return (0); 682c43cad1aSScott Long #endif 683a163d034SWarner Losh MGET(m, M_DONTWAIT, MT_SONAME); 684df8bae1dSRodney W. Grimes if (m == 0) 685df8bae1dSRodney W. Grimes return (0); 686df8bae1dSRodney W. Grimes m->m_len = asa->sa_len; 68780208239SAlfred Perlstein bcopy(asa, mtod(m, caddr_t), asa->sa_len); 688df8bae1dSRodney W. Grimes if (n) 689df8bae1dSRodney W. Grimes n->m_next = m0; /* concatenate data to control */ 690df8bae1dSRodney W. Grimes else 691df8bae1dSRodney W. Grimes control = m0; 692df8bae1dSRodney W. Grimes m->m_next = control; 693395bb186SSam Leffler for (n = m; n->m_next != NULL; n = n->m_next) 694df8bae1dSRodney W. Grimes sballoc(sb, n); 695395bb186SSam Leffler sballoc(sb, n); 696395bb186SSam Leffler nlast = n; 697395bb186SSam Leffler SBLINKRECORD(sb, m); 698395bb186SSam Leffler 699395bb186SSam Leffler sb->sb_mbtail = nlast; 700395bb186SSam Leffler SBLASTMBUFCHK(sb); 701395bb186SSam Leffler 702395bb186SSam Leffler SBLASTRECORDCHK(sb); 703df8bae1dSRodney W. Grimes return (1); 704df8bae1dSRodney W. Grimes } 705df8bae1dSRodney W. Grimes 706a34b7046SRobert Watson /* 707a34b7046SRobert Watson * Append address and data, and optionally, control (ancillary) data 708a34b7046SRobert Watson * to the receive queue of a socket. If present, 709a34b7046SRobert Watson * m0 must include a packet header with total length. 710a34b7046SRobert Watson * Returns 0 if no space in sockbuf or insufficient mbufs. 711a34b7046SRobert Watson */ 71226f9a767SRodney W. Grimes int 713a34b7046SRobert Watson sbappendaddr(sb, asa, m0, control) 714a34b7046SRobert Watson struct sockbuf *sb; 715a34b7046SRobert Watson const struct sockaddr *asa; 716a34b7046SRobert Watson struct mbuf *m0, *control; 717a34b7046SRobert Watson { 718a34b7046SRobert Watson int retval; 719a34b7046SRobert Watson 720a34b7046SRobert Watson SOCKBUF_LOCK(sb); 721a34b7046SRobert Watson retval = sbappendaddr_locked(sb, asa, m0, control); 722a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 723a34b7046SRobert Watson return (retval); 724a34b7046SRobert Watson } 725a34b7046SRobert Watson 726a34b7046SRobert Watson int 727a34b7046SRobert Watson sbappendcontrol_locked(sb, m0, control) 728df8bae1dSRodney W. Grimes struct sockbuf *sb; 729df8bae1dSRodney W. Grimes struct mbuf *control, *m0; 730df8bae1dSRodney W. Grimes { 731395bb186SSam Leffler struct mbuf *m, *n, *mlast; 7327ed60de8SPoul-Henning Kamp int space; 733df8bae1dSRodney W. Grimes 734a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 735a34b7046SRobert Watson 736df8bae1dSRodney W. Grimes if (control == 0) 737a34b7046SRobert Watson panic("sbappendcontrol_locked"); 7387ed60de8SPoul-Henning Kamp space = m_length(control, &n) + m_length(m0, NULL); 739a34b7046SRobert Watson 740df8bae1dSRodney W. Grimes if (space > sbspace(sb)) 741df8bae1dSRodney W. Grimes return (0); 742df8bae1dSRodney W. Grimes n->m_next = m0; /* concatenate data to control */ 743395bb186SSam Leffler 744395bb186SSam Leffler SBLASTRECORDCHK(sb); 745395bb186SSam Leffler 746395bb186SSam Leffler for (m = control; m->m_next; m = m->m_next) 747df8bae1dSRodney W. Grimes sballoc(sb, m); 748395bb186SSam Leffler sballoc(sb, m); 749395bb186SSam Leffler mlast = m; 750395bb186SSam Leffler SBLINKRECORD(sb, control); 751395bb186SSam Leffler 752395bb186SSam Leffler sb->sb_mbtail = mlast; 753395bb186SSam Leffler SBLASTMBUFCHK(sb); 754395bb186SSam Leffler 755395bb186SSam Leffler SBLASTRECORDCHK(sb); 756df8bae1dSRodney W. Grimes return (1); 757df8bae1dSRodney W. Grimes } 758df8bae1dSRodney W. Grimes 759a34b7046SRobert Watson int 760a34b7046SRobert Watson sbappendcontrol(sb, m0, control) 761a34b7046SRobert Watson struct sockbuf *sb; 762a34b7046SRobert Watson struct mbuf *control, *m0; 763a34b7046SRobert Watson { 764a34b7046SRobert Watson int retval; 765a34b7046SRobert Watson 766a34b7046SRobert Watson SOCKBUF_LOCK(sb); 767a34b7046SRobert Watson retval = sbappendcontrol_locked(sb, m0, control); 768a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 769a34b7046SRobert Watson return (retval); 770a34b7046SRobert Watson } 771a34b7046SRobert Watson 772df8bae1dSRodney W. Grimes /* 7737da7362bSRobert Watson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 7747da7362bSRobert Watson * (n). If (n) is NULL, the buffer is presumed empty. 7757da7362bSRobert Watson * 7767da7362bSRobert Watson * When the data is compressed, mbufs in the chain may be handled in one of 7777da7362bSRobert Watson * three ways: 7787da7362bSRobert Watson * 7797da7362bSRobert Watson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 7807da7362bSRobert Watson * record boundary, and no change in data type). 7817da7362bSRobert Watson * 7827da7362bSRobert Watson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 7837da7362bSRobert Watson * an mbuf already in the socket buffer. This can occur if an 7847da7362bSRobert Watson * appropriate mbuf exists, there is room, and no merging of data types 7857da7362bSRobert Watson * will occur. 7867da7362bSRobert Watson * 7877da7362bSRobert Watson * (3) The mbuf may be appended to the end of the existing mbuf chain. 7887da7362bSRobert Watson * 7897da7362bSRobert Watson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 7907da7362bSRobert Watson * end-of-record. 791df8bae1dSRodney W. Grimes */ 79226f9a767SRodney W. Grimes void 793df8bae1dSRodney W. Grimes sbcompress(sb, m, n) 794df8bae1dSRodney W. Grimes register struct sockbuf *sb; 795df8bae1dSRodney W. Grimes register struct mbuf *m, *n; 796df8bae1dSRodney W. Grimes { 797df8bae1dSRodney W. Grimes register int eor = 0; 798df8bae1dSRodney W. Grimes register struct mbuf *o; 799df8bae1dSRodney W. Grimes 800a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 801a34b7046SRobert Watson 802df8bae1dSRodney W. Grimes while (m) { 803df8bae1dSRodney W. Grimes eor |= m->m_flags & M_EOR; 804df8bae1dSRodney W. Grimes if (m->m_len == 0 && 805df8bae1dSRodney W. Grimes (eor == 0 || 806df8bae1dSRodney W. Grimes (((o = m->m_next) || (o = n)) && 807df8bae1dSRodney W. Grimes o->m_type == m->m_type))) { 808395bb186SSam Leffler if (sb->sb_lastrecord == m) 809395bb186SSam Leffler sb->sb_lastrecord = m->m_next; 810df8bae1dSRodney W. Grimes m = m_free(m); 811df8bae1dSRodney W. Grimes continue; 812df8bae1dSRodney W. Grimes } 81332af0d74SDavid Malone if (n && (n->m_flags & M_EOR) == 0 && 81432af0d74SDavid Malone M_WRITABLE(n) && 81532af0d74SDavid Malone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 81632af0d74SDavid Malone m->m_len <= M_TRAILINGSPACE(n) && 817df8bae1dSRodney W. Grimes n->m_type == m->m_type) { 818df8bae1dSRodney W. Grimes bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 819df8bae1dSRodney W. Grimes (unsigned)m->m_len); 820df8bae1dSRodney W. Grimes n->m_len += m->m_len; 821df8bae1dSRodney W. Grimes sb->sb_cc += m->m_len; 82234333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 823b3f1af6bSTim J. Robbins /* XXX: Probably don't need.*/ 82404ac9b97SKelly Yancey sb->sb_ctl += m->m_len; 825df8bae1dSRodney W. Grimes m = m_free(m); 826df8bae1dSRodney W. Grimes continue; 827df8bae1dSRodney W. Grimes } 828df8bae1dSRodney W. Grimes if (n) 829df8bae1dSRodney W. Grimes n->m_next = m; 830df8bae1dSRodney W. Grimes else 831df8bae1dSRodney W. Grimes sb->sb_mb = m; 832395bb186SSam Leffler sb->sb_mbtail = m; 833df8bae1dSRodney W. Grimes sballoc(sb, m); 834df8bae1dSRodney W. Grimes n = m; 835df8bae1dSRodney W. Grimes m->m_flags &= ~M_EOR; 836df8bae1dSRodney W. Grimes m = m->m_next; 837df8bae1dSRodney W. Grimes n->m_next = 0; 838df8bae1dSRodney W. Grimes } 839df8bae1dSRodney W. Grimes if (eor) { 8407da7362bSRobert Watson KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 841df8bae1dSRodney W. Grimes n->m_flags |= eor; 842df8bae1dSRodney W. Grimes } 843395bb186SSam Leffler SBLASTMBUFCHK(sb); 844df8bae1dSRodney W. Grimes } 845df8bae1dSRodney W. Grimes 846df8bae1dSRodney W. Grimes /* 847df8bae1dSRodney W. Grimes * Free all mbufs in a sockbuf. 848df8bae1dSRodney W. Grimes * Check that all resources are reclaimed. 849df8bae1dSRodney W. Grimes */ 850eaa6dfbcSRobert Watson static void 851eaa6dfbcSRobert Watson sbflush_internal(sb) 852df8bae1dSRodney W. Grimes register struct sockbuf *sb; 853df8bae1dSRodney W. Grimes { 854df8bae1dSRodney W. Grimes 855df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_LOCK) 856a34b7046SRobert Watson panic("sbflush_locked: locked"); 85723f84772SPierre Beyssac while (sb->sb_mbcnt) { 85823f84772SPierre Beyssac /* 85923f84772SPierre Beyssac * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: 86023f84772SPierre Beyssac * we would loop forever. Panic instead. 86123f84772SPierre Beyssac */ 86223f84772SPierre Beyssac if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 86323f84772SPierre Beyssac break; 864eaa6dfbcSRobert Watson sbdrop_internal(sb, (int)sb->sb_cc); 86523f84772SPierre Beyssac } 8660931333fSBill Fenner if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) 867a34b7046SRobert Watson panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); 868a34b7046SRobert Watson } 869a34b7046SRobert Watson 870a34b7046SRobert Watson void 871eaa6dfbcSRobert Watson sbflush_locked(sb) 872eaa6dfbcSRobert Watson register struct sockbuf *sb; 873eaa6dfbcSRobert Watson { 874eaa6dfbcSRobert Watson 875eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 876eaa6dfbcSRobert Watson sbflush_internal(sb); 877eaa6dfbcSRobert Watson } 878eaa6dfbcSRobert Watson 879eaa6dfbcSRobert Watson void 880a34b7046SRobert Watson sbflush(sb) 881a34b7046SRobert Watson register struct sockbuf *sb; 882a34b7046SRobert Watson { 883a34b7046SRobert Watson 884a34b7046SRobert Watson SOCKBUF_LOCK(sb); 885a34b7046SRobert Watson sbflush_locked(sb); 886a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 887df8bae1dSRodney W. Grimes } 888df8bae1dSRodney W. Grimes 889df8bae1dSRodney W. Grimes /* 890df8bae1dSRodney W. Grimes * Drop data from (the front of) a sockbuf. 891df8bae1dSRodney W. Grimes */ 892eaa6dfbcSRobert Watson static void 893eaa6dfbcSRobert Watson sbdrop_internal(sb, len) 894df8bae1dSRodney W. Grimes register struct sockbuf *sb; 895df8bae1dSRodney W. Grimes register int len; 896df8bae1dSRodney W. Grimes { 897ecde8f7cSMatthew Dillon register struct mbuf *m; 898df8bae1dSRodney W. Grimes struct mbuf *next; 899df8bae1dSRodney W. Grimes 900df8bae1dSRodney W. Grimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 901df8bae1dSRodney W. Grimes while (len > 0) { 902df8bae1dSRodney W. Grimes if (m == 0) { 903df8bae1dSRodney W. Grimes if (next == 0) 904df8bae1dSRodney W. Grimes panic("sbdrop"); 905df8bae1dSRodney W. Grimes m = next; 906df8bae1dSRodney W. Grimes next = m->m_nextpkt; 907df8bae1dSRodney W. Grimes continue; 908df8bae1dSRodney W. Grimes } 909df8bae1dSRodney W. Grimes if (m->m_len > len) { 910df8bae1dSRodney W. Grimes m->m_len -= len; 911df8bae1dSRodney W. Grimes m->m_data += len; 912df8bae1dSRodney W. Grimes sb->sb_cc -= len; 91334333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 91404ac9b97SKelly Yancey sb->sb_ctl -= len; 915df8bae1dSRodney W. Grimes break; 916df8bae1dSRodney W. Grimes } 917df8bae1dSRodney W. Grimes len -= m->m_len; 918df8bae1dSRodney W. Grimes sbfree(sb, m); 919ecde8f7cSMatthew Dillon m = m_free(m); 920df8bae1dSRodney W. Grimes } 921df8bae1dSRodney W. Grimes while (m && m->m_len == 0) { 922df8bae1dSRodney W. Grimes sbfree(sb, m); 923ecde8f7cSMatthew Dillon m = m_free(m); 924df8bae1dSRodney W. Grimes } 925df8bae1dSRodney W. Grimes if (m) { 926df8bae1dSRodney W. Grimes sb->sb_mb = m; 927df8bae1dSRodney W. Grimes m->m_nextpkt = next; 928df8bae1dSRodney W. Grimes } else 929df8bae1dSRodney W. Grimes sb->sb_mb = next; 930395bb186SSam Leffler /* 931395bb186SSam Leffler * First part is an inline SB_EMPTY_FIXUP(). Second part 932395bb186SSam Leffler * makes sure sb_lastrecord is up-to-date if we dropped 933395bb186SSam Leffler * part of the last record. 934395bb186SSam Leffler */ 935395bb186SSam Leffler m = sb->sb_mb; 936395bb186SSam Leffler if (m == NULL) { 937395bb186SSam Leffler sb->sb_mbtail = NULL; 938395bb186SSam Leffler sb->sb_lastrecord = NULL; 939395bb186SSam Leffler } else if (m->m_nextpkt == NULL) { 940395bb186SSam Leffler sb->sb_lastrecord = m; 941395bb186SSam Leffler } 942df8bae1dSRodney W. Grimes } 943df8bae1dSRodney W. Grimes 944df8bae1dSRodney W. Grimes /* 945a34b7046SRobert Watson * Drop data from (the front of) a sockbuf. 946a34b7046SRobert Watson */ 947a34b7046SRobert Watson void 948eaa6dfbcSRobert Watson sbdrop_locked(sb, len) 949eaa6dfbcSRobert Watson register struct sockbuf *sb; 950eaa6dfbcSRobert Watson register int len; 951eaa6dfbcSRobert Watson { 952eaa6dfbcSRobert Watson 953eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 954eaa6dfbcSRobert Watson 955eaa6dfbcSRobert Watson sbdrop_internal(sb, len); 956eaa6dfbcSRobert Watson } 957eaa6dfbcSRobert Watson 958eaa6dfbcSRobert Watson void 959a34b7046SRobert Watson sbdrop(sb, len) 960a34b7046SRobert Watson register struct sockbuf *sb; 961a34b7046SRobert Watson register int len; 962a34b7046SRobert Watson { 963a34b7046SRobert Watson 964a34b7046SRobert Watson SOCKBUF_LOCK(sb); 965a34b7046SRobert Watson sbdrop_locked(sb, len); 966a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 967a34b7046SRobert Watson } 968a34b7046SRobert Watson 969a34b7046SRobert Watson /* 970df8bae1dSRodney W. Grimes * Drop a record off the front of a sockbuf 971df8bae1dSRodney W. Grimes * and move the next record to the front. 972df8bae1dSRodney W. Grimes */ 97326f9a767SRodney W. Grimes void 974a34b7046SRobert Watson sbdroprecord_locked(sb) 975df8bae1dSRodney W. Grimes register struct sockbuf *sb; 976df8bae1dSRodney W. Grimes { 977ecde8f7cSMatthew Dillon register struct mbuf *m; 978df8bae1dSRodney W. Grimes 979a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 980a34b7046SRobert Watson 981df8bae1dSRodney W. Grimes m = sb->sb_mb; 982df8bae1dSRodney W. Grimes if (m) { 983df8bae1dSRodney W. Grimes sb->sb_mb = m->m_nextpkt; 984df8bae1dSRodney W. Grimes do { 985df8bae1dSRodney W. Grimes sbfree(sb, m); 986ecde8f7cSMatthew Dillon m = m_free(m); 987797f2d22SPoul-Henning Kamp } while (m); 988df8bae1dSRodney W. Grimes } 989395bb186SSam Leffler SB_EMPTY_FIXUP(sb); 990df8bae1dSRodney W. Grimes } 9911e4ad9ceSGarrett Wollman 99282c23ebaSBill Fenner /* 993a34b7046SRobert Watson * Drop a record off the front of a sockbuf 994a34b7046SRobert Watson * and move the next record to the front. 995a34b7046SRobert Watson */ 996a34b7046SRobert Watson void 997a34b7046SRobert Watson sbdroprecord(sb) 998a34b7046SRobert Watson register struct sockbuf *sb; 999a34b7046SRobert Watson { 1000a34b7046SRobert Watson 1001a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1002a34b7046SRobert Watson sbdroprecord_locked(sb); 1003a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1004a34b7046SRobert Watson } 1005a34b7046SRobert Watson 1006639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 1007639acc13SGarrett Wollman static int dummy; 1008639acc13SGarrett Wollman SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); 10091b978d45SHartmut Brandt SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 10101b978d45SHartmut Brandt &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); 10111b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 1012639acc13SGarrett Wollman &sb_efficiency, 0, ""); 1013