19454b2d8SWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 32677b542eSDavid E. O'Brien #include <sys/cdefs.h> 33677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 34677b542eSDavid E. O'Brien 355b86eac4SJesper Skriver #include "opt_param.h" 36335654d7SRobert Watson 37df8bae1dSRodney W. Grimes #include <sys/param.h> 38960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */ 39ff5c09daSGarrett Wollman #include <sys/kernel.h> 40fb919e4dSMark Murray #include <sys/lock.h> 41df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 42960ed29cSSeigo Tanimura #include <sys/mutex.h> 43fb919e4dSMark Murray #include <sys/proc.h> 44df8bae1dSRodney W. Grimes #include <sys/protosw.h> 452f9a2132SBrian Feldman #include <sys/resourcevar.h> 46960ed29cSSeigo Tanimura #include <sys/signalvar.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 48df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 49ff5c09daSGarrett Wollman #include <sys/sysctl.h> 5026f9a767SRodney W. Grimes 51f14cce87SRobert Watson /* 52f14cce87SRobert Watson * Function pointer set by the AIO routines so that the socket buffer code 53f14cce87SRobert Watson * can call back into the AIO module if it is loaded. 54f14cce87SRobert Watson */ 5521d56e9cSAlfred Perlstein void (*aio_swake)(struct socket *, struct sockbuf *); 5621d56e9cSAlfred Perlstein 57df8bae1dSRodney W. Grimes /* 58f14cce87SRobert Watson * Primitive routines for operating on socket buffers 59df8bae1dSRodney W. Grimes */ 60df8bae1dSRodney W. Grimes 6179cb7eb4SDavid Greenman u_long sb_max = SB_MAX; 6251da11a2SMark Murray static u_long sb_max_adj = 6379cb7eb4SDavid Greenman SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 64df8bae1dSRodney W. Grimes 654b29bc4fSGarrett Wollman static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 664b29bc4fSGarrett Wollman 67df8bae1dSRodney W. Grimes /* 68df8bae1dSRodney W. Grimes * Socantsendmore indicates that no more data will be sent on the 69df8bae1dSRodney W. Grimes * socket; it would normally be applied to a socket when the user 70df8bae1dSRodney W. Grimes * informs the system that no more data is to be sent, by the protocol 71df8bae1dSRodney W. Grimes * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 72df8bae1dSRodney W. Grimes * will be received, and will normally be applied to the socket by a 73df8bae1dSRodney W. Grimes * protocol when it detects that the peer will send no more data. 74df8bae1dSRodney W. Grimes * Data queued for reading in the socket may yet be read. 75df8bae1dSRodney W. Grimes */ 76a34b7046SRobert Watson void 77a34b7046SRobert Watson socantsendmore_locked(so) 78a34b7046SRobert Watson struct socket *so; 79a34b7046SRobert Watson { 80a34b7046SRobert Watson 81a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_snd); 82a34b7046SRobert Watson 83a34b7046SRobert Watson so->so_snd.sb_state |= SBS_CANTSENDMORE; 84a34b7046SRobert Watson sowwakeup_locked(so); 85a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 86a34b7046SRobert Watson } 87df8bae1dSRodney W. Grimes 8826f9a767SRodney W. Grimes void 89df8bae1dSRodney W. Grimes socantsendmore(so) 90df8bae1dSRodney W. Grimes struct socket *so; 91df8bae1dSRodney W. Grimes { 92df8bae1dSRodney W. Grimes 93a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_snd); 94a34b7046SRobert Watson socantsendmore_locked(so); 95a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 96a34b7046SRobert Watson } 97a34b7046SRobert Watson 98a34b7046SRobert Watson void 99a34b7046SRobert Watson socantrcvmore_locked(so) 100a34b7046SRobert Watson struct socket *so; 101a34b7046SRobert Watson { 102a34b7046SRobert Watson 103a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_rcv); 104a34b7046SRobert Watson 105a34b7046SRobert Watson so->so_rcv.sb_state |= SBS_CANTRCVMORE; 106a34b7046SRobert Watson sorwakeup_locked(so); 107a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 108df8bae1dSRodney W. Grimes } 109df8bae1dSRodney W. Grimes 11026f9a767SRodney W. Grimes void 111df8bae1dSRodney W. Grimes socantrcvmore(so) 112df8bae1dSRodney W. Grimes struct socket *so; 113df8bae1dSRodney W. Grimes { 114df8bae1dSRodney W. Grimes 115a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 116a34b7046SRobert Watson socantrcvmore_locked(so); 117a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 118df8bae1dSRodney W. Grimes } 119df8bae1dSRodney W. Grimes 120df8bae1dSRodney W. Grimes /* 121df8bae1dSRodney W. Grimes * Wait for data to arrive at/drain from a socket buffer. 122df8bae1dSRodney W. Grimes */ 12326f9a767SRodney W. Grimes int 124df8bae1dSRodney W. Grimes sbwait(sb) 125df8bae1dSRodney W. Grimes struct sockbuf *sb; 126df8bae1dSRodney W. Grimes { 127df8bae1dSRodney W. Grimes 12831f555a1SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 12931f555a1SRobert Watson 130df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WAIT; 13131f555a1SRobert Watson return (msleep(&sb->sb_cc, &sb->sb_mtx, 13247daf5d5SBruce Evans (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 133df8bae1dSRodney W. Grimes sb->sb_timeo)); 134df8bae1dSRodney W. Grimes } 135df8bae1dSRodney W. Grimes 136df8bae1dSRodney W. Grimes /* 137df8bae1dSRodney W. Grimes * Lock a sockbuf already known to be locked; 138df8bae1dSRodney W. Grimes * return any error returned from sleep (EINTR). 139df8bae1dSRodney W. Grimes */ 14026f9a767SRodney W. Grimes int 141df8bae1dSRodney W. Grimes sb_lock(sb) 142df8bae1dSRodney W. Grimes register struct sockbuf *sb; 143df8bae1dSRodney W. Grimes { 144df8bae1dSRodney W. Grimes int error; 145df8bae1dSRodney W. Grimes 14631f555a1SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 14731f555a1SRobert Watson 148df8bae1dSRodney W. Grimes while (sb->sb_flags & SB_LOCK) { 149df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WANT; 15031f555a1SRobert Watson error = msleep(&sb->sb_flags, &sb->sb_mtx, 151df8bae1dSRodney W. Grimes (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 15247daf5d5SBruce Evans "sblock", 0); 153797f2d22SPoul-Henning Kamp if (error) 154df8bae1dSRodney W. Grimes return (error); 155df8bae1dSRodney W. Grimes } 156df8bae1dSRodney W. Grimes sb->sb_flags |= SB_LOCK; 157df8bae1dSRodney W. Grimes return (0); 158df8bae1dSRodney W. Grimes } 159df8bae1dSRodney W. Grimes 160df8bae1dSRodney W. Grimes /* 161a34b7046SRobert Watson * Wakeup processes waiting on a socket buffer. Do asynchronous 162a34b7046SRobert Watson * notification via SIGIO if the socket has the SS_ASYNC flag set. 163a34b7046SRobert Watson * 164a34b7046SRobert Watson * Called with the socket buffer lock held; will release the lock by the end 165a34b7046SRobert Watson * of the function. This allows the caller to acquire the socket buffer lock 166a34b7046SRobert Watson * while testing for the need for various sorts of wakeup and hold it through 167a34b7046SRobert Watson * to the point where it's no longer required. We currently hold the lock 168a34b7046SRobert Watson * through calls out to other subsystems (with the exception of kqueue), and 169a34b7046SRobert Watson * then release it to avoid lock order issues. It's not clear that's 170a34b7046SRobert Watson * correct. 171df8bae1dSRodney W. Grimes */ 17226f9a767SRodney W. Grimes void 173df8bae1dSRodney W. Grimes sowakeup(so, sb) 174df8bae1dSRodney W. Grimes register struct socket *so; 175df8bae1dSRodney W. Grimes register struct sockbuf *sb; 176df8bae1dSRodney W. Grimes { 177d48d4b25SSeigo Tanimura 178a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 179a34b7046SRobert Watson 180512824f8SSeigo Tanimura selwakeuppri(&sb->sb_sel, PSOCK); 181df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_SEL; 182df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_WAIT) { 183df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_WAIT; 18480208239SAlfred Perlstein wakeup(&sb->sb_cc); 185df8bae1dSRodney W. Grimes } 186ad3b9257SJohn-Mark Gurney KNOTE_LOCKED(&sb->sb_sel.si_note, 0); 187a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1884cc20ab1SSeigo Tanimura if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 189f1320723SAlfred Perlstein pgsigio(&so->so_sigio, SIGIO, 0); 1904cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_UPCALL) 191a163d034SWarner Losh (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); 1924cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_AIO) 193bfbbc4aaSJason Evans aio_swake(so, sb); 194a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 195df8bae1dSRodney W. Grimes } 196df8bae1dSRodney W. Grimes 197df8bae1dSRodney W. Grimes /* 198df8bae1dSRodney W. Grimes * Socket buffer (struct sockbuf) utility routines. 199df8bae1dSRodney W. Grimes * 200df8bae1dSRodney W. Grimes * Each socket contains two socket buffers: one for sending data and 201df8bae1dSRodney W. Grimes * one for receiving data. Each buffer contains a queue of mbufs, 202df8bae1dSRodney W. Grimes * information about the number of mbufs and amount of data in the 203df8bae1dSRodney W. Grimes * queue, and other fields allowing select() statements and notification 204df8bae1dSRodney W. Grimes * on data availability to be implemented. 205df8bae1dSRodney W. Grimes * 206df8bae1dSRodney W. Grimes * Data stored in a socket buffer is maintained as a list of records. 207df8bae1dSRodney W. Grimes * Each record is a list of mbufs chained together with the m_next 208df8bae1dSRodney W. Grimes * field. Records are chained together with the m_nextpkt field. The upper 209df8bae1dSRodney W. Grimes * level routine soreceive() expects the following conventions to be 210df8bae1dSRodney W. Grimes * observed when placing information in the receive buffer: 211df8bae1dSRodney W. Grimes * 212df8bae1dSRodney W. Grimes * 1. If the protocol requires each message be preceded by the sender's 213df8bae1dSRodney W. Grimes * name, then a record containing that name must be present before 214df8bae1dSRodney W. Grimes * any associated data (mbuf's must be of type MT_SONAME). 215df8bae1dSRodney W. Grimes * 2. If the protocol supports the exchange of ``access rights'' (really 216df8bae1dSRodney W. Grimes * just additional data associated with the message), and there are 217df8bae1dSRodney W. Grimes * ``rights'' to be received, then a record containing this data 218df8bae1dSRodney W. Grimes * should be present (mbuf's must be of type MT_RIGHTS). 219df8bae1dSRodney W. Grimes * 3. If a name or rights record exists, then it must be followed by 220df8bae1dSRodney W. Grimes * a data record, perhaps of zero length. 221df8bae1dSRodney W. Grimes * 222df8bae1dSRodney W. Grimes * Before using a new socket structure it is first necessary to reserve 223df8bae1dSRodney W. Grimes * buffer space to the socket, by calling sbreserve(). This should commit 224df8bae1dSRodney W. Grimes * some of the available buffer space in the system buffer pool for the 225df8bae1dSRodney W. Grimes * socket (currently, it does nothing but enforce limits). The space 226df8bae1dSRodney W. Grimes * should be released by calling sbrelease() when the socket is destroyed. 227df8bae1dSRodney W. Grimes */ 228df8bae1dSRodney W. Grimes 22926f9a767SRodney W. Grimes int 230df8bae1dSRodney W. Grimes soreserve(so, sndcc, rcvcc) 231df8bae1dSRodney W. Grimes register struct socket *so; 232df8bae1dSRodney W. Grimes u_long sndcc, rcvcc; 233df8bae1dSRodney W. Grimes { 234b40ce416SJulian Elischer struct thread *td = curthread; 235df8bae1dSRodney W. Grimes 2363f11a2f3SRobert Watson SOCKBUF_LOCK(&so->so_snd); 2379535efc0SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 2383f11a2f3SRobert Watson if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 2393f11a2f3SRobert Watson goto bad; 2403f11a2f3SRobert Watson if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 2413f11a2f3SRobert Watson goto bad2; 242df8bae1dSRodney W. Grimes if (so->so_rcv.sb_lowat == 0) 243df8bae1dSRodney W. Grimes so->so_rcv.sb_lowat = 1; 244df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat == 0) 245df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = MCLBYTES; 246df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 247df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 2483f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 2499535efc0SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 250df8bae1dSRodney W. Grimes return (0); 251df8bae1dSRodney W. Grimes bad2: 2523f11a2f3SRobert Watson sbrelease_locked(&so->so_snd, so); 253df8bae1dSRodney W. Grimes bad: 2543f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 2553f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 256df8bae1dSRodney W. Grimes return (ENOBUFS); 257df8bae1dSRodney W. Grimes } 258df8bae1dSRodney W. Grimes 25979cb7eb4SDavid Greenman static int 26079cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 26179cb7eb4SDavid Greenman { 26279cb7eb4SDavid Greenman int error = 0; 26379cb7eb4SDavid Greenman u_long old_sb_max = sb_max; 26479cb7eb4SDavid Greenman 2651b978d45SHartmut Brandt error = SYSCTL_OUT(req, arg1, sizeof(u_long)); 26679cb7eb4SDavid Greenman if (error || !req->newptr) 26779cb7eb4SDavid Greenman return (error); 2681b978d45SHartmut Brandt error = SYSCTL_IN(req, arg1, sizeof(u_long)); 26979cb7eb4SDavid Greenman if (error) 27079cb7eb4SDavid Greenman return (error); 27179cb7eb4SDavid Greenman if (sb_max < MSIZE + MCLBYTES) { 27279cb7eb4SDavid Greenman sb_max = old_sb_max; 27379cb7eb4SDavid Greenman return (EINVAL); 27479cb7eb4SDavid Greenman } 27579cb7eb4SDavid Greenman sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 27679cb7eb4SDavid Greenman return (0); 27779cb7eb4SDavid Greenman } 27879cb7eb4SDavid Greenman 279df8bae1dSRodney W. Grimes /* 280df8bae1dSRodney W. Grimes * Allot mbufs to a sockbuf. 281df8bae1dSRodney W. Grimes * Attempt to scale mbmax so that mbcnt doesn't become limiting 282df8bae1dSRodney W. Grimes * if buffering efficiency is near the normal case. 283df8bae1dSRodney W. Grimes */ 28426f9a767SRodney W. Grimes int 2853f11a2f3SRobert Watson sbreserve_locked(sb, cc, so, td) 286df8bae1dSRodney W. Grimes struct sockbuf *sb; 287df8bae1dSRodney W. Grimes u_long cc; 288ecf72308SBrian Feldman struct socket *so; 289b40ce416SJulian Elischer struct thread *td; 290df8bae1dSRodney W. Grimes { 29191d5354aSJohn Baldwin rlim_t sbsize_limit; 292ecf72308SBrian Feldman 2933f11a2f3SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 2943f11a2f3SRobert Watson 295ecf72308SBrian Feldman /* 296b40ce416SJulian Elischer * td will only be NULL when we're in an interrupt 297ecf72308SBrian Feldman * (e.g. in tcp_input()) 298ecf72308SBrian Feldman */ 29979cb7eb4SDavid Greenman if (cc > sb_max_adj) 300df8bae1dSRodney W. Grimes return (0); 30191d5354aSJohn Baldwin if (td != NULL) { 30291d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 30391d5354aSJohn Baldwin sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE); 30491d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 30591d5354aSJohn Baldwin } else 30691d5354aSJohn Baldwin sbsize_limit = RLIM_INFINITY; 307f535380cSDon Lewis if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 30891d5354aSJohn Baldwin sbsize_limit)) 309ecf72308SBrian Feldman return (0); 3104b29bc4fSGarrett Wollman sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 311df8bae1dSRodney W. Grimes if (sb->sb_lowat > sb->sb_hiwat) 312df8bae1dSRodney W. Grimes sb->sb_lowat = sb->sb_hiwat; 313df8bae1dSRodney W. Grimes return (1); 314df8bae1dSRodney W. Grimes } 315df8bae1dSRodney W. Grimes 3163f11a2f3SRobert Watson int 3173f11a2f3SRobert Watson sbreserve(sb, cc, so, td) 3183f11a2f3SRobert Watson struct sockbuf *sb; 3193f11a2f3SRobert Watson u_long cc; 3203f11a2f3SRobert Watson struct socket *so; 3213f11a2f3SRobert Watson struct thread *td; 3223f11a2f3SRobert Watson { 3233f11a2f3SRobert Watson int error; 3243f11a2f3SRobert Watson 3253f11a2f3SRobert Watson SOCKBUF_LOCK(sb); 3263f11a2f3SRobert Watson error = sbreserve_locked(sb, cc, so, td); 3273f11a2f3SRobert Watson SOCKBUF_UNLOCK(sb); 3283f11a2f3SRobert Watson return (error); 3293f11a2f3SRobert Watson } 3303f11a2f3SRobert Watson 331df8bae1dSRodney W. Grimes /* 332df8bae1dSRodney W. Grimes * Free mbufs held by a socket, and reserved mbuf space. 333df8bae1dSRodney W. Grimes */ 33426f9a767SRodney W. Grimes void 335a34b7046SRobert Watson sbrelease_locked(sb, so) 336df8bae1dSRodney W. Grimes struct sockbuf *sb; 337ecf72308SBrian Feldman struct socket *so; 338df8bae1dSRodney W. Grimes { 339df8bae1dSRodney W. Grimes 340a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 341a34b7046SRobert Watson 342a34b7046SRobert Watson sbflush_locked(sb); 343f535380cSDon Lewis (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 344f535380cSDon Lewis RLIM_INFINITY); 3456aef685fSBrian Feldman sb->sb_mbmax = 0; 346df8bae1dSRodney W. Grimes } 347df8bae1dSRodney W. Grimes 348a34b7046SRobert Watson void 349a34b7046SRobert Watson sbrelease(sb, so) 350a34b7046SRobert Watson struct sockbuf *sb; 351a34b7046SRobert Watson struct socket *so; 352a34b7046SRobert Watson { 353a34b7046SRobert Watson 354a34b7046SRobert Watson SOCKBUF_LOCK(sb); 355a34b7046SRobert Watson sbrelease_locked(sb, so); 356a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 357a34b7046SRobert Watson } 358df8bae1dSRodney W. Grimes /* 359df8bae1dSRodney W. Grimes * Routines to add and remove 360df8bae1dSRodney W. Grimes * data from an mbuf queue. 361df8bae1dSRodney W. Grimes * 362df8bae1dSRodney W. Grimes * The routines sbappend() or sbappendrecord() are normally called to 363df8bae1dSRodney W. Grimes * append new mbufs to a socket buffer, after checking that adequate 364df8bae1dSRodney W. Grimes * space is available, comparing the function sbspace() with the amount 365df8bae1dSRodney W. Grimes * of data to be added. sbappendrecord() differs from sbappend() in 366df8bae1dSRodney W. Grimes * that data supplied is treated as the beginning of a new record. 367df8bae1dSRodney W. Grimes * To place a sender's address, optional access rights, and data in a 368df8bae1dSRodney W. Grimes * socket receive buffer, sbappendaddr() should be used. To place 369df8bae1dSRodney W. Grimes * access rights and data in a socket receive buffer, sbappendrights() 370df8bae1dSRodney W. Grimes * should be used. In either case, the new data begins a new record. 371df8bae1dSRodney W. Grimes * Note that unlike sbappend() and sbappendrecord(), these routines check 372df8bae1dSRodney W. Grimes * for the caller that there will be enough space to store the data. 373df8bae1dSRodney W. Grimes * Each fails if there is not enough space, or if it cannot find mbufs 374df8bae1dSRodney W. Grimes * to store additional information in. 375df8bae1dSRodney W. Grimes * 376df8bae1dSRodney W. Grimes * Reliable protocols may use the socket send buffer to hold data 377df8bae1dSRodney W. Grimes * awaiting acknowledgement. Data is normally copied from a socket 378df8bae1dSRodney W. Grimes * send buffer in a protocol with m_copy for output to a peer, 379df8bae1dSRodney W. Grimes * and then removing the data from the socket buffer with sbdrop() 380df8bae1dSRodney W. Grimes * or sbdroprecord() when the data is acknowledged by the peer. 381df8bae1dSRodney W. Grimes */ 382df8bae1dSRodney W. Grimes 383395bb186SSam Leffler #ifdef SOCKBUF_DEBUG 384395bb186SSam Leffler void 385395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line) 386395bb186SSam Leffler { 387395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 388395bb186SSam Leffler 389a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 390a34b7046SRobert Watson 391395bb186SSam Leffler while (m && m->m_nextpkt) 392395bb186SSam Leffler m = m->m_nextpkt; 393395bb186SSam Leffler 394395bb186SSam Leffler if (m != sb->sb_lastrecord) { 395395bb186SSam Leffler printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 396395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_lastrecord, m); 397395bb186SSam Leffler printf("packet chain:\n"); 398395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 399395bb186SSam Leffler printf("\t%p\n", m); 400395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 401395bb186SSam Leffler } 402395bb186SSam Leffler } 403395bb186SSam Leffler 404395bb186SSam Leffler void 405395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line) 406395bb186SSam Leffler { 407395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 408395bb186SSam Leffler struct mbuf *n; 409395bb186SSam Leffler 410a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 411a34b7046SRobert Watson 412395bb186SSam Leffler while (m && m->m_nextpkt) 413395bb186SSam Leffler m = m->m_nextpkt; 414395bb186SSam Leffler 415395bb186SSam Leffler while (m && m->m_next) 416395bb186SSam Leffler m = m->m_next; 417395bb186SSam Leffler 418395bb186SSam Leffler if (m != sb->sb_mbtail) { 419395bb186SSam Leffler printf("%s: sb_mb %p sb_mbtail %p last %p\n", 420395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_mbtail, m); 421395bb186SSam Leffler printf("packet tree:\n"); 422395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 423395bb186SSam Leffler printf("\t"); 424395bb186SSam Leffler for (n = m; n != NULL; n = n->m_next) 425395bb186SSam Leffler printf("%p ", n); 426395bb186SSam Leffler printf("\n"); 427395bb186SSam Leffler } 428395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 429395bb186SSam Leffler } 430395bb186SSam Leffler } 431395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */ 432395bb186SSam Leffler 433395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do { \ 434a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); \ 435395bb186SSam Leffler if ((sb)->sb_lastrecord != NULL) \ 436395bb186SSam Leffler (sb)->sb_lastrecord->m_nextpkt = (m0); \ 437395bb186SSam Leffler else \ 438395bb186SSam Leffler (sb)->sb_mb = (m0); \ 439395bb186SSam Leffler (sb)->sb_lastrecord = (m0); \ 440395bb186SSam Leffler } while (/*CONSTCOND*/0) 441395bb186SSam Leffler 442df8bae1dSRodney W. Grimes /* 443df8bae1dSRodney W. Grimes * Append mbuf chain m to the last record in the 444df8bae1dSRodney W. Grimes * socket buffer sb. The additional space associated 445df8bae1dSRodney W. Grimes * the mbuf chain is recorded in sb. Empty mbufs are 446df8bae1dSRodney W. Grimes * discarded and mbufs are compacted where possible. 447df8bae1dSRodney W. Grimes */ 44826f9a767SRodney W. Grimes void 449a34b7046SRobert Watson sbappend_locked(sb, m) 450df8bae1dSRodney W. Grimes struct sockbuf *sb; 451df8bae1dSRodney W. Grimes struct mbuf *m; 452df8bae1dSRodney W. Grimes { 453df8bae1dSRodney W. Grimes register struct mbuf *n; 454df8bae1dSRodney W. Grimes 455a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 456a34b7046SRobert Watson 457df8bae1dSRodney W. Grimes if (m == 0) 458df8bae1dSRodney W. Grimes return; 459a34b7046SRobert Watson 460395bb186SSam Leffler SBLASTRECORDCHK(sb); 461797f2d22SPoul-Henning Kamp n = sb->sb_mb; 462797f2d22SPoul-Henning Kamp if (n) { 463df8bae1dSRodney W. Grimes while (n->m_nextpkt) 464df8bae1dSRodney W. Grimes n = n->m_nextpkt; 465df8bae1dSRodney W. Grimes do { 466df8bae1dSRodney W. Grimes if (n->m_flags & M_EOR) { 467a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 468df8bae1dSRodney W. Grimes return; 469df8bae1dSRodney W. Grimes } 470df8bae1dSRodney W. Grimes } while (n->m_next && (n = n->m_next)); 471395bb186SSam Leffler } else { 472395bb186SSam Leffler /* 473395bb186SSam Leffler * XXX Would like to simply use sb_mbtail here, but 474395bb186SSam Leffler * XXX I need to verify that I won't miss an EOR that 475395bb186SSam Leffler * XXX way. 476395bb186SSam Leffler */ 477395bb186SSam Leffler if ((n = sb->sb_lastrecord) != NULL) { 478395bb186SSam Leffler do { 479395bb186SSam Leffler if (n->m_flags & M_EOR) { 480a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 481395bb186SSam Leffler return; 482395bb186SSam Leffler } 483395bb186SSam Leffler } while (n->m_next && (n = n->m_next)); 484395bb186SSam Leffler } else { 485395bb186SSam Leffler /* 486395bb186SSam Leffler * If this is the first record in the socket buffer, 487395bb186SSam Leffler * it's also the last record. 488395bb186SSam Leffler */ 489395bb186SSam Leffler sb->sb_lastrecord = m; 490395bb186SSam Leffler } 491df8bae1dSRodney W. Grimes } 492df8bae1dSRodney W. Grimes sbcompress(sb, m, n); 493395bb186SSam Leffler SBLASTRECORDCHK(sb); 494395bb186SSam Leffler } 495395bb186SSam Leffler 496395bb186SSam Leffler /* 497a34b7046SRobert Watson * Append mbuf chain m to the last record in the 498a34b7046SRobert Watson * socket buffer sb. The additional space associated 499a34b7046SRobert Watson * the mbuf chain is recorded in sb. Empty mbufs are 500a34b7046SRobert Watson * discarded and mbufs are compacted where possible. 501a34b7046SRobert Watson */ 502a34b7046SRobert Watson void 503a34b7046SRobert Watson sbappend(sb, m) 504a34b7046SRobert Watson struct sockbuf *sb; 505a34b7046SRobert Watson struct mbuf *m; 506a34b7046SRobert Watson { 507a34b7046SRobert Watson 508a34b7046SRobert Watson SOCKBUF_LOCK(sb); 509a34b7046SRobert Watson sbappend_locked(sb, m); 510a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 511a34b7046SRobert Watson } 512a34b7046SRobert Watson 513a34b7046SRobert Watson /* 514395bb186SSam Leffler * This version of sbappend() should only be used when the caller 515395bb186SSam Leffler * absolutely knows that there will never be more than one record 516395bb186SSam Leffler * in the socket buffer, that is, a stream protocol (such as TCP). 517395bb186SSam Leffler */ 518395bb186SSam Leffler void 519a34b7046SRobert Watson sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) 520395bb186SSam Leffler { 521a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 522395bb186SSam Leffler 523395bb186SSam Leffler KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 524395bb186SSam Leffler KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 525395bb186SSam Leffler 526395bb186SSam Leffler SBLASTMBUFCHK(sb); 527395bb186SSam Leffler 528395bb186SSam Leffler sbcompress(sb, m, sb->sb_mbtail); 529395bb186SSam Leffler 530395bb186SSam Leffler sb->sb_lastrecord = sb->sb_mb; 531395bb186SSam Leffler SBLASTRECORDCHK(sb); 532df8bae1dSRodney W. Grimes } 533df8bae1dSRodney W. Grimes 534a34b7046SRobert Watson /* 535a34b7046SRobert Watson * This version of sbappend() should only be used when the caller 536a34b7046SRobert Watson * absolutely knows that there will never be more than one record 537a34b7046SRobert Watson * in the socket buffer, that is, a stream protocol (such as TCP). 538a34b7046SRobert Watson */ 539a34b7046SRobert Watson void 540a34b7046SRobert Watson sbappendstream(struct sockbuf *sb, struct mbuf *m) 541a34b7046SRobert Watson { 542a34b7046SRobert Watson 543a34b7046SRobert Watson SOCKBUF_LOCK(sb); 544a34b7046SRobert Watson sbappendstream_locked(sb, m); 545a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 546a34b7046SRobert Watson } 547a34b7046SRobert Watson 548df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG 54926f9a767SRodney W. Grimes void 550df8bae1dSRodney W. Grimes sbcheck(sb) 5517ed60de8SPoul-Henning Kamp struct sockbuf *sb; 552df8bae1dSRodney W. Grimes { 5537ed60de8SPoul-Henning Kamp struct mbuf *m; 5547ed60de8SPoul-Henning Kamp struct mbuf *n = 0; 5557ed60de8SPoul-Henning Kamp u_long len = 0, mbcnt = 0; 556df8bae1dSRodney W. Grimes 557a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 558a34b7046SRobert Watson 5590931333fSBill Fenner for (m = sb->sb_mb; m; m = n) { 5600931333fSBill Fenner n = m->m_nextpkt; 5610931333fSBill Fenner for (; m; m = m->m_next) { 562df8bae1dSRodney W. Grimes len += m->m_len; 563df8bae1dSRodney W. Grimes mbcnt += MSIZE; 564313861b8SJulian Elischer if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 565df8bae1dSRodney W. Grimes mbcnt += m->m_ext.ext_size; 5660931333fSBill Fenner } 567df8bae1dSRodney W. Grimes } 568df8bae1dSRodney W. Grimes if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 569395bb186SSam Leffler printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc, 570df8bae1dSRodney W. Grimes mbcnt, sb->sb_mbcnt); 571df8bae1dSRodney W. Grimes panic("sbcheck"); 572df8bae1dSRodney W. Grimes } 573df8bae1dSRodney W. Grimes } 574df8bae1dSRodney W. Grimes #endif 575df8bae1dSRodney W. Grimes 576df8bae1dSRodney W. Grimes /* 577df8bae1dSRodney W. Grimes * As above, except the mbuf chain 578df8bae1dSRodney W. Grimes * begins a new record. 579df8bae1dSRodney W. Grimes */ 58026f9a767SRodney W. Grimes void 581a34b7046SRobert Watson sbappendrecord_locked(sb, m0) 582df8bae1dSRodney W. Grimes register struct sockbuf *sb; 583df8bae1dSRodney W. Grimes register struct mbuf *m0; 584df8bae1dSRodney W. Grimes { 585df8bae1dSRodney W. Grimes register struct mbuf *m; 586df8bae1dSRodney W. Grimes 587a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 588a34b7046SRobert Watson 589df8bae1dSRodney W. Grimes if (m0 == 0) 590df8bae1dSRodney W. Grimes return; 591797f2d22SPoul-Henning Kamp m = sb->sb_mb; 592797f2d22SPoul-Henning Kamp if (m) 593df8bae1dSRodney W. Grimes while (m->m_nextpkt) 594df8bae1dSRodney W. Grimes m = m->m_nextpkt; 595df8bae1dSRodney W. Grimes /* 596df8bae1dSRodney W. Grimes * Put the first mbuf on the queue. 597df8bae1dSRodney W. Grimes * Note this permits zero length records. 598df8bae1dSRodney W. Grimes */ 599df8bae1dSRodney W. Grimes sballoc(sb, m0); 600395bb186SSam Leffler SBLASTRECORDCHK(sb); 601395bb186SSam Leffler SBLINKRECORD(sb, m0); 602df8bae1dSRodney W. Grimes if (m) 603df8bae1dSRodney W. Grimes m->m_nextpkt = m0; 604df8bae1dSRodney W. Grimes else 605df8bae1dSRodney W. Grimes sb->sb_mb = m0; 606df8bae1dSRodney W. Grimes m = m0->m_next; 607df8bae1dSRodney W. Grimes m0->m_next = 0; 608df8bae1dSRodney W. Grimes if (m && (m0->m_flags & M_EOR)) { 609df8bae1dSRodney W. Grimes m0->m_flags &= ~M_EOR; 610df8bae1dSRodney W. Grimes m->m_flags |= M_EOR; 611df8bae1dSRodney W. Grimes } 612df8bae1dSRodney W. Grimes sbcompress(sb, m, m0); 613df8bae1dSRodney W. Grimes } 614df8bae1dSRodney W. Grimes 615df8bae1dSRodney W. Grimes /* 616a34b7046SRobert Watson * As above, except the mbuf chain 617a34b7046SRobert Watson * begins a new record. 618a34b7046SRobert Watson */ 619a34b7046SRobert Watson void 620a34b7046SRobert Watson sbappendrecord(sb, m0) 621a34b7046SRobert Watson register struct sockbuf *sb; 622a34b7046SRobert Watson register struct mbuf *m0; 623a34b7046SRobert Watson { 624a34b7046SRobert Watson 625a34b7046SRobert Watson SOCKBUF_LOCK(sb); 626a34b7046SRobert Watson sbappendrecord_locked(sb, m0); 627a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 628a34b7046SRobert Watson } 629a34b7046SRobert Watson 630a34b7046SRobert Watson /* 631df8bae1dSRodney W. Grimes * Append address and data, and optionally, control (ancillary) data 632df8bae1dSRodney W. Grimes * to the receive queue of a socket. If present, 633df8bae1dSRodney W. Grimes * m0 must include a packet header with total length. 634df8bae1dSRodney W. Grimes * Returns 0 if no space in sockbuf or insufficient mbufs. 635df8bae1dSRodney W. Grimes */ 63626f9a767SRodney W. Grimes int 637a34b7046SRobert Watson sbappendaddr_locked(sb, asa, m0, control) 6387ed60de8SPoul-Henning Kamp struct sockbuf *sb; 639e7dd9a10SRobert Watson const struct sockaddr *asa; 640df8bae1dSRodney W. Grimes struct mbuf *m0, *control; 641df8bae1dSRodney W. Grimes { 642395bb186SSam Leffler struct mbuf *m, *n, *nlast; 643df8bae1dSRodney W. Grimes int space = asa->sa_len; 644df8bae1dSRodney W. Grimes 645a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 646a34b7046SRobert Watson 647df8bae1dSRodney W. Grimes if (m0 && (m0->m_flags & M_PKTHDR) == 0) 648a34b7046SRobert Watson panic("sbappendaddr_locked"); 649df8bae1dSRodney W. Grimes if (m0) 650df8bae1dSRodney W. Grimes space += m0->m_pkthdr.len; 6517ed60de8SPoul-Henning Kamp space += m_length(control, &n); 652a34b7046SRobert Watson 653df8bae1dSRodney W. Grimes if (space > sbspace(sb)) 654df8bae1dSRodney W. Grimes return (0); 655c43cad1aSScott Long #if MSIZE <= 256 656df8bae1dSRodney W. Grimes if (asa->sa_len > MLEN) 657df8bae1dSRodney W. Grimes return (0); 658c43cad1aSScott Long #endif 659a163d034SWarner Losh MGET(m, M_DONTWAIT, MT_SONAME); 660df8bae1dSRodney W. Grimes if (m == 0) 661df8bae1dSRodney W. Grimes return (0); 662df8bae1dSRodney W. Grimes m->m_len = asa->sa_len; 66380208239SAlfred Perlstein bcopy(asa, mtod(m, caddr_t), asa->sa_len); 664df8bae1dSRodney W. Grimes if (n) 665df8bae1dSRodney W. Grimes n->m_next = m0; /* concatenate data to control */ 666df8bae1dSRodney W. Grimes else 667df8bae1dSRodney W. Grimes control = m0; 668df8bae1dSRodney W. Grimes m->m_next = control; 669395bb186SSam Leffler for (n = m; n->m_next != NULL; n = n->m_next) 670df8bae1dSRodney W. Grimes sballoc(sb, n); 671395bb186SSam Leffler sballoc(sb, n); 672395bb186SSam Leffler nlast = n; 673395bb186SSam Leffler SBLINKRECORD(sb, m); 674395bb186SSam Leffler 675395bb186SSam Leffler sb->sb_mbtail = nlast; 676395bb186SSam Leffler SBLASTMBUFCHK(sb); 677395bb186SSam Leffler 678395bb186SSam Leffler SBLASTRECORDCHK(sb); 679df8bae1dSRodney W. Grimes return (1); 680df8bae1dSRodney W. Grimes } 681df8bae1dSRodney W. Grimes 682a34b7046SRobert Watson /* 683a34b7046SRobert Watson * Append address and data, and optionally, control (ancillary) data 684a34b7046SRobert Watson * to the receive queue of a socket. If present, 685a34b7046SRobert Watson * m0 must include a packet header with total length. 686a34b7046SRobert Watson * Returns 0 if no space in sockbuf or insufficient mbufs. 687a34b7046SRobert Watson */ 68826f9a767SRodney W. Grimes int 689a34b7046SRobert Watson sbappendaddr(sb, asa, m0, control) 690a34b7046SRobert Watson struct sockbuf *sb; 691a34b7046SRobert Watson const struct sockaddr *asa; 692a34b7046SRobert Watson struct mbuf *m0, *control; 693a34b7046SRobert Watson { 694a34b7046SRobert Watson int retval; 695a34b7046SRobert Watson 696a34b7046SRobert Watson SOCKBUF_LOCK(sb); 697a34b7046SRobert Watson retval = sbappendaddr_locked(sb, asa, m0, control); 698a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 699a34b7046SRobert Watson return (retval); 700a34b7046SRobert Watson } 701a34b7046SRobert Watson 702a34b7046SRobert Watson int 703a34b7046SRobert Watson sbappendcontrol_locked(sb, m0, control) 704df8bae1dSRodney W. Grimes struct sockbuf *sb; 705df8bae1dSRodney W. Grimes struct mbuf *control, *m0; 706df8bae1dSRodney W. Grimes { 707395bb186SSam Leffler struct mbuf *m, *n, *mlast; 7087ed60de8SPoul-Henning Kamp int space; 709df8bae1dSRodney W. Grimes 710a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 711a34b7046SRobert Watson 712df8bae1dSRodney W. Grimes if (control == 0) 713a34b7046SRobert Watson panic("sbappendcontrol_locked"); 7147ed60de8SPoul-Henning Kamp space = m_length(control, &n) + m_length(m0, NULL); 715a34b7046SRobert Watson 716df8bae1dSRodney W. Grimes if (space > sbspace(sb)) 717df8bae1dSRodney W. Grimes return (0); 718df8bae1dSRodney W. Grimes n->m_next = m0; /* concatenate data to control */ 719395bb186SSam Leffler 720395bb186SSam Leffler SBLASTRECORDCHK(sb); 721395bb186SSam Leffler 722395bb186SSam Leffler for (m = control; m->m_next; m = m->m_next) 723df8bae1dSRodney W. Grimes sballoc(sb, m); 724395bb186SSam Leffler sballoc(sb, m); 725395bb186SSam Leffler mlast = m; 726395bb186SSam Leffler SBLINKRECORD(sb, control); 727395bb186SSam Leffler 728395bb186SSam Leffler sb->sb_mbtail = mlast; 729395bb186SSam Leffler SBLASTMBUFCHK(sb); 730395bb186SSam Leffler 731395bb186SSam Leffler SBLASTRECORDCHK(sb); 732df8bae1dSRodney W. Grimes return (1); 733df8bae1dSRodney W. Grimes } 734df8bae1dSRodney W. Grimes 735a34b7046SRobert Watson int 736a34b7046SRobert Watson sbappendcontrol(sb, m0, control) 737a34b7046SRobert Watson struct sockbuf *sb; 738a34b7046SRobert Watson struct mbuf *control, *m0; 739a34b7046SRobert Watson { 740a34b7046SRobert Watson int retval; 741a34b7046SRobert Watson 742a34b7046SRobert Watson SOCKBUF_LOCK(sb); 743a34b7046SRobert Watson retval = sbappendcontrol_locked(sb, m0, control); 744a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 745a34b7046SRobert Watson return (retval); 746a34b7046SRobert Watson } 747a34b7046SRobert Watson 748df8bae1dSRodney W. Grimes /* 7497da7362bSRobert Watson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 7507da7362bSRobert Watson * (n). If (n) is NULL, the buffer is presumed empty. 7517da7362bSRobert Watson * 7527da7362bSRobert Watson * When the data is compressed, mbufs in the chain may be handled in one of 7537da7362bSRobert Watson * three ways: 7547da7362bSRobert Watson * 7557da7362bSRobert Watson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 7567da7362bSRobert Watson * record boundary, and no change in data type). 7577da7362bSRobert Watson * 7587da7362bSRobert Watson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 7597da7362bSRobert Watson * an mbuf already in the socket buffer. This can occur if an 7607da7362bSRobert Watson * appropriate mbuf exists, there is room, and no merging of data types 7617da7362bSRobert Watson * will occur. 7627da7362bSRobert Watson * 7637da7362bSRobert Watson * (3) The mbuf may be appended to the end of the existing mbuf chain. 7647da7362bSRobert Watson * 7657da7362bSRobert Watson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 7667da7362bSRobert Watson * end-of-record. 767df8bae1dSRodney W. Grimes */ 76826f9a767SRodney W. Grimes void 769df8bae1dSRodney W. Grimes sbcompress(sb, m, n) 770df8bae1dSRodney W. Grimes register struct sockbuf *sb; 771df8bae1dSRodney W. Grimes register struct mbuf *m, *n; 772df8bae1dSRodney W. Grimes { 773df8bae1dSRodney W. Grimes register int eor = 0; 774df8bae1dSRodney W. Grimes register struct mbuf *o; 775df8bae1dSRodney W. Grimes 776a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 777a34b7046SRobert Watson 778df8bae1dSRodney W. Grimes while (m) { 779df8bae1dSRodney W. Grimes eor |= m->m_flags & M_EOR; 780df8bae1dSRodney W. Grimes if (m->m_len == 0 && 781df8bae1dSRodney W. Grimes (eor == 0 || 782df8bae1dSRodney W. Grimes (((o = m->m_next) || (o = n)) && 783df8bae1dSRodney W. Grimes o->m_type == m->m_type))) { 784395bb186SSam Leffler if (sb->sb_lastrecord == m) 785395bb186SSam Leffler sb->sb_lastrecord = m->m_next; 786df8bae1dSRodney W. Grimes m = m_free(m); 787df8bae1dSRodney W. Grimes continue; 788df8bae1dSRodney W. Grimes } 78932af0d74SDavid Malone if (n && (n->m_flags & M_EOR) == 0 && 79032af0d74SDavid Malone M_WRITABLE(n) && 79132af0d74SDavid Malone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 79232af0d74SDavid Malone m->m_len <= M_TRAILINGSPACE(n) && 793df8bae1dSRodney W. Grimes n->m_type == m->m_type) { 794df8bae1dSRodney W. Grimes bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 795df8bae1dSRodney W. Grimes (unsigned)m->m_len); 796df8bae1dSRodney W. Grimes n->m_len += m->m_len; 797df8bae1dSRodney W. Grimes sb->sb_cc += m->m_len; 79834333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 799b3f1af6bSTim J. Robbins /* XXX: Probably don't need.*/ 80004ac9b97SKelly Yancey sb->sb_ctl += m->m_len; 801df8bae1dSRodney W. Grimes m = m_free(m); 802df8bae1dSRodney W. Grimes continue; 803df8bae1dSRodney W. Grimes } 804df8bae1dSRodney W. Grimes if (n) 805df8bae1dSRodney W. Grimes n->m_next = m; 806df8bae1dSRodney W. Grimes else 807df8bae1dSRodney W. Grimes sb->sb_mb = m; 808395bb186SSam Leffler sb->sb_mbtail = m; 809df8bae1dSRodney W. Grimes sballoc(sb, m); 810df8bae1dSRodney W. Grimes n = m; 811df8bae1dSRodney W. Grimes m->m_flags &= ~M_EOR; 812df8bae1dSRodney W. Grimes m = m->m_next; 813df8bae1dSRodney W. Grimes n->m_next = 0; 814df8bae1dSRodney W. Grimes } 815df8bae1dSRodney W. Grimes if (eor) { 8167da7362bSRobert Watson KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 817df8bae1dSRodney W. Grimes n->m_flags |= eor; 818df8bae1dSRodney W. Grimes } 819395bb186SSam Leffler SBLASTMBUFCHK(sb); 820df8bae1dSRodney W. Grimes } 821df8bae1dSRodney W. Grimes 822df8bae1dSRodney W. Grimes /* 823df8bae1dSRodney W. Grimes * Free all mbufs in a sockbuf. 824df8bae1dSRodney W. Grimes * Check that all resources are reclaimed. 825df8bae1dSRodney W. Grimes */ 82626f9a767SRodney W. Grimes void 827a34b7046SRobert Watson sbflush_locked(sb) 828df8bae1dSRodney W. Grimes register struct sockbuf *sb; 829df8bae1dSRodney W. Grimes { 830df8bae1dSRodney W. Grimes 831a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 832a34b7046SRobert Watson 833df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_LOCK) 834a34b7046SRobert Watson panic("sbflush_locked: locked"); 83523f84772SPierre Beyssac while (sb->sb_mbcnt) { 83623f84772SPierre Beyssac /* 83723f84772SPierre Beyssac * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty: 83823f84772SPierre Beyssac * we would loop forever. Panic instead. 83923f84772SPierre Beyssac */ 84023f84772SPierre Beyssac if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 84123f84772SPierre Beyssac break; 842a34b7046SRobert Watson sbdrop_locked(sb, (int)sb->sb_cc); 84323f84772SPierre Beyssac } 8440931333fSBill Fenner if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) 845a34b7046SRobert Watson panic("sbflush_locked: cc %u || mb %p || mbcnt %u", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); 846a34b7046SRobert Watson } 847a34b7046SRobert Watson 848a34b7046SRobert Watson void 849a34b7046SRobert Watson sbflush(sb) 850a34b7046SRobert Watson register struct sockbuf *sb; 851a34b7046SRobert Watson { 852a34b7046SRobert Watson 853a34b7046SRobert Watson SOCKBUF_LOCK(sb); 854a34b7046SRobert Watson sbflush_locked(sb); 855a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 856df8bae1dSRodney W. Grimes } 857df8bae1dSRodney W. Grimes 858df8bae1dSRodney W. Grimes /* 859df8bae1dSRodney W. Grimes * Drop data from (the front of) a sockbuf. 860df8bae1dSRodney W. Grimes */ 86126f9a767SRodney W. Grimes void 862a34b7046SRobert Watson sbdrop_locked(sb, len) 863df8bae1dSRodney W. Grimes register struct sockbuf *sb; 864df8bae1dSRodney W. Grimes register int len; 865df8bae1dSRodney W. Grimes { 866ecde8f7cSMatthew Dillon register struct mbuf *m; 867df8bae1dSRodney W. Grimes struct mbuf *next; 868df8bae1dSRodney W. Grimes 869a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 870a34b7046SRobert Watson 871df8bae1dSRodney W. Grimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 872df8bae1dSRodney W. Grimes while (len > 0) { 873df8bae1dSRodney W. Grimes if (m == 0) { 874df8bae1dSRodney W. Grimes if (next == 0) 875df8bae1dSRodney W. Grimes panic("sbdrop"); 876df8bae1dSRodney W. Grimes m = next; 877df8bae1dSRodney W. Grimes next = m->m_nextpkt; 878df8bae1dSRodney W. Grimes continue; 879df8bae1dSRodney W. Grimes } 880df8bae1dSRodney W. Grimes if (m->m_len > len) { 881df8bae1dSRodney W. Grimes m->m_len -= len; 882df8bae1dSRodney W. Grimes m->m_data += len; 883df8bae1dSRodney W. Grimes sb->sb_cc -= len; 88434333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 88504ac9b97SKelly Yancey sb->sb_ctl -= len; 886df8bae1dSRodney W. Grimes break; 887df8bae1dSRodney W. Grimes } 888df8bae1dSRodney W. Grimes len -= m->m_len; 889df8bae1dSRodney W. Grimes sbfree(sb, m); 890ecde8f7cSMatthew Dillon m = m_free(m); 891df8bae1dSRodney W. Grimes } 892df8bae1dSRodney W. Grimes while (m && m->m_len == 0) { 893df8bae1dSRodney W. Grimes sbfree(sb, m); 894ecde8f7cSMatthew Dillon m = m_free(m); 895df8bae1dSRodney W. Grimes } 896df8bae1dSRodney W. Grimes if (m) { 897df8bae1dSRodney W. Grimes sb->sb_mb = m; 898df8bae1dSRodney W. Grimes m->m_nextpkt = next; 899df8bae1dSRodney W. Grimes } else 900df8bae1dSRodney W. Grimes sb->sb_mb = next; 901395bb186SSam Leffler /* 902395bb186SSam Leffler * First part is an inline SB_EMPTY_FIXUP(). Second part 903395bb186SSam Leffler * makes sure sb_lastrecord is up-to-date if we dropped 904395bb186SSam Leffler * part of the last record. 905395bb186SSam Leffler */ 906395bb186SSam Leffler m = sb->sb_mb; 907395bb186SSam Leffler if (m == NULL) { 908395bb186SSam Leffler sb->sb_mbtail = NULL; 909395bb186SSam Leffler sb->sb_lastrecord = NULL; 910395bb186SSam Leffler } else if (m->m_nextpkt == NULL) { 911395bb186SSam Leffler sb->sb_lastrecord = m; 912395bb186SSam Leffler } 913df8bae1dSRodney W. Grimes } 914df8bae1dSRodney W. Grimes 915df8bae1dSRodney W. Grimes /* 916a34b7046SRobert Watson * Drop data from (the front of) a sockbuf. 917a34b7046SRobert Watson */ 918a34b7046SRobert Watson void 919a34b7046SRobert Watson sbdrop(sb, len) 920a34b7046SRobert Watson register struct sockbuf *sb; 921a34b7046SRobert Watson register int len; 922a34b7046SRobert Watson { 923a34b7046SRobert Watson 924a34b7046SRobert Watson SOCKBUF_LOCK(sb); 925a34b7046SRobert Watson sbdrop_locked(sb, len); 926a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 927a34b7046SRobert Watson } 928a34b7046SRobert Watson 929a34b7046SRobert Watson /* 930df8bae1dSRodney W. Grimes * Drop a record off the front of a sockbuf 931df8bae1dSRodney W. Grimes * and move the next record to the front. 932df8bae1dSRodney W. Grimes */ 93326f9a767SRodney W. Grimes void 934a34b7046SRobert Watson sbdroprecord_locked(sb) 935df8bae1dSRodney W. Grimes register struct sockbuf *sb; 936df8bae1dSRodney W. Grimes { 937ecde8f7cSMatthew Dillon register struct mbuf *m; 938df8bae1dSRodney W. Grimes 939a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 940a34b7046SRobert Watson 941df8bae1dSRodney W. Grimes m = sb->sb_mb; 942df8bae1dSRodney W. Grimes if (m) { 943df8bae1dSRodney W. Grimes sb->sb_mb = m->m_nextpkt; 944df8bae1dSRodney W. Grimes do { 945df8bae1dSRodney W. Grimes sbfree(sb, m); 946ecde8f7cSMatthew Dillon m = m_free(m); 947797f2d22SPoul-Henning Kamp } while (m); 948df8bae1dSRodney W. Grimes } 949395bb186SSam Leffler SB_EMPTY_FIXUP(sb); 950df8bae1dSRodney W. Grimes } 9511e4ad9ceSGarrett Wollman 95282c23ebaSBill Fenner /* 953a34b7046SRobert Watson * Drop a record off the front of a sockbuf 954a34b7046SRobert Watson * and move the next record to the front. 955a34b7046SRobert Watson */ 956a34b7046SRobert Watson void 957a34b7046SRobert Watson sbdroprecord(sb) 958a34b7046SRobert Watson register struct sockbuf *sb; 959a34b7046SRobert Watson { 960a34b7046SRobert Watson 961a34b7046SRobert Watson SOCKBUF_LOCK(sb); 962a34b7046SRobert Watson sbdroprecord_locked(sb); 963a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 964a34b7046SRobert Watson } 965a34b7046SRobert Watson 966639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 967639acc13SGarrett Wollman static int dummy; 968639acc13SGarrett Wollman SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); 9691b978d45SHartmut Brandt SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 9701b978d45SHartmut Brandt &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); 9711b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 972639acc13SGarrett Wollman &sb_efficiency, 0, ""); 973