19454b2d8SWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 32677b542eSDavid E. O'Brien #include <sys/cdefs.h> 33677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 34677b542eSDavid E. O'Brien 355b86eac4SJesper Skriver #include "opt_param.h" 36335654d7SRobert Watson 37df8bae1dSRodney W. Grimes #include <sys/param.h> 38960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */ 39ff5c09daSGarrett Wollman #include <sys/kernel.h> 40fb919e4dSMark Murray #include <sys/lock.h> 41df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 42960ed29cSSeigo Tanimura #include <sys/mutex.h> 43fb919e4dSMark Murray #include <sys/proc.h> 44df8bae1dSRodney W. Grimes #include <sys/protosw.h> 452f9a2132SBrian Feldman #include <sys/resourcevar.h> 46960ed29cSSeigo Tanimura #include <sys/signalvar.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 48df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 497abab911SRobert Watson #include <sys/sx.h> 50ff5c09daSGarrett Wollman #include <sys/sysctl.h> 5126f9a767SRodney W. Grimes 52f14cce87SRobert Watson /* 53f14cce87SRobert Watson * Function pointer set by the AIO routines so that the socket buffer code 54f14cce87SRobert Watson * can call back into the AIO module if it is loaded. 55f14cce87SRobert Watson */ 5621d56e9cSAlfred Perlstein void (*aio_swake)(struct socket *, struct sockbuf *); 5721d56e9cSAlfred Perlstein 58df8bae1dSRodney W. Grimes /* 59f14cce87SRobert Watson * Primitive routines for operating on socket buffers 60df8bae1dSRodney W. Grimes */ 61df8bae1dSRodney W. Grimes 6279cb7eb4SDavid Greenman u_long sb_max = SB_MAX; 6358d14daeSMohan Srinivasan u_long sb_max_adj = 64b233773bSBjoern A. Zeeb (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 65df8bae1dSRodney W. Grimes 664b29bc4fSGarrett Wollman static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 674b29bc4fSGarrett Wollman 681d2df300SGleb Smirnoff static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); 69050ac265SRobert Watson static void sbflush_internal(struct sockbuf *sb); 70eaa6dfbcSRobert Watson 71df8bae1dSRodney W. Grimes /* 720f9d0a73SGleb Smirnoff * Mark ready "count" mbufs starting with "m". 730f9d0a73SGleb Smirnoff */ 740f9d0a73SGleb Smirnoff int 750f9d0a73SGleb Smirnoff sbready(struct sockbuf *sb, struct mbuf *m, int count) 760f9d0a73SGleb Smirnoff { 770f9d0a73SGleb Smirnoff u_int blocker; 780f9d0a73SGleb Smirnoff 790f9d0a73SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 800f9d0a73SGleb Smirnoff KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); 810f9d0a73SGleb Smirnoff 820f9d0a73SGleb Smirnoff blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; 830f9d0a73SGleb Smirnoff 840f9d0a73SGleb Smirnoff for (int i = 0; i < count; i++, m = m->m_next) { 850f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 860f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 870f9d0a73SGleb Smirnoff m->m_flags &= ~(M_NOTREADY | blocker); 880f9d0a73SGleb Smirnoff if (blocker) 890f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 900f9d0a73SGleb Smirnoff } 910f9d0a73SGleb Smirnoff 920f9d0a73SGleb Smirnoff if (!blocker) 930f9d0a73SGleb Smirnoff return (EINPROGRESS); 940f9d0a73SGleb Smirnoff 950f9d0a73SGleb Smirnoff /* This one was blocking all the queue. */ 960f9d0a73SGleb Smirnoff for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { 970f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_BLOCKED, 980f9d0a73SGleb Smirnoff ("%s: m %p !M_BLOCKED", __func__, m)); 990f9d0a73SGleb Smirnoff m->m_flags &= ~M_BLOCKED; 1000f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 1010f9d0a73SGleb Smirnoff } 1020f9d0a73SGleb Smirnoff 1030f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 1040f9d0a73SGleb Smirnoff 1050f9d0a73SGleb Smirnoff return (0); 1060f9d0a73SGleb Smirnoff } 1070f9d0a73SGleb Smirnoff 1080f9d0a73SGleb Smirnoff /* 1098967b220SGleb Smirnoff * Adjust sockbuf state reflecting allocation of m. 1108967b220SGleb Smirnoff */ 1118967b220SGleb Smirnoff void 1128967b220SGleb Smirnoff sballoc(struct sockbuf *sb, struct mbuf *m) 1138967b220SGleb Smirnoff { 1148967b220SGleb Smirnoff 1158967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 1168967b220SGleb Smirnoff 1170f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 1180f9d0a73SGleb Smirnoff 1190f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) { 1200f9d0a73SGleb Smirnoff if (m->m_flags & M_NOTREADY) 1210f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 1220f9d0a73SGleb Smirnoff else 1230f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 1240f9d0a73SGleb Smirnoff } else 1250f9d0a73SGleb Smirnoff m->m_flags |= M_BLOCKED; 1268967b220SGleb Smirnoff 1278967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1288967b220SGleb Smirnoff sb->sb_ctl += m->m_len; 1298967b220SGleb Smirnoff 1308967b220SGleb Smirnoff sb->sb_mbcnt += MSIZE; 1318967b220SGleb Smirnoff sb->sb_mcnt += 1; 1328967b220SGleb Smirnoff 1338967b220SGleb Smirnoff if (m->m_flags & M_EXT) { 1348967b220SGleb Smirnoff sb->sb_mbcnt += m->m_ext.ext_size; 1358967b220SGleb Smirnoff sb->sb_ccnt += 1; 1368967b220SGleb Smirnoff } 1378967b220SGleb Smirnoff } 1388967b220SGleb Smirnoff 1398967b220SGleb Smirnoff /* 1408967b220SGleb Smirnoff * Adjust sockbuf state reflecting freeing of m. 1418967b220SGleb Smirnoff */ 1428967b220SGleb Smirnoff void 1438967b220SGleb Smirnoff sbfree(struct sockbuf *sb, struct mbuf *m) 1448967b220SGleb Smirnoff { 1458967b220SGleb Smirnoff 1468967b220SGleb Smirnoff #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ 1478967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 1488967b220SGleb Smirnoff #endif 1498967b220SGleb Smirnoff 1500f9d0a73SGleb Smirnoff sb->sb_ccc -= m->m_len; 1510f9d0a73SGleb Smirnoff 1520f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) 1530f9d0a73SGleb Smirnoff sb->sb_acc -= m->m_len; 1540f9d0a73SGleb Smirnoff 1550f9d0a73SGleb Smirnoff if (m == sb->sb_fnrdy) { 1560f9d0a73SGleb Smirnoff struct mbuf *n; 1570f9d0a73SGleb Smirnoff 1580f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 1590f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 1600f9d0a73SGleb Smirnoff 1610f9d0a73SGleb Smirnoff n = m->m_next; 1620f9d0a73SGleb Smirnoff while (n != NULL && !(n->m_flags & M_NOTREADY)) { 1630f9d0a73SGleb Smirnoff n->m_flags &= ~M_BLOCKED; 1640f9d0a73SGleb Smirnoff sb->sb_acc += n->m_len; 1650f9d0a73SGleb Smirnoff n = n->m_next; 1660f9d0a73SGleb Smirnoff } 1670f9d0a73SGleb Smirnoff sb->sb_fnrdy = n; 1680f9d0a73SGleb Smirnoff } 1698967b220SGleb Smirnoff 1708967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1718967b220SGleb Smirnoff sb->sb_ctl -= m->m_len; 1728967b220SGleb Smirnoff 1738967b220SGleb Smirnoff sb->sb_mbcnt -= MSIZE; 1748967b220SGleb Smirnoff sb->sb_mcnt -= 1; 1758967b220SGleb Smirnoff if (m->m_flags & M_EXT) { 1768967b220SGleb Smirnoff sb->sb_mbcnt -= m->m_ext.ext_size; 1778967b220SGleb Smirnoff sb->sb_ccnt -= 1; 1788967b220SGleb Smirnoff } 1798967b220SGleb Smirnoff 1808967b220SGleb Smirnoff if (sb->sb_sndptr == m) { 1818967b220SGleb Smirnoff sb->sb_sndptr = NULL; 1828967b220SGleb Smirnoff sb->sb_sndptroff = 0; 1838967b220SGleb Smirnoff } 1848967b220SGleb Smirnoff if (sb->sb_sndptroff != 0) 1858967b220SGleb Smirnoff sb->sb_sndptroff -= m->m_len; 1868967b220SGleb Smirnoff } 1878967b220SGleb Smirnoff 1888967b220SGleb Smirnoff /* 189050ac265SRobert Watson * Socantsendmore indicates that no more data will be sent on the socket; it 190050ac265SRobert Watson * would normally be applied to a socket when the user informs the system 191050ac265SRobert Watson * that no more data is to be sent, by the protocol code (in case 192050ac265SRobert Watson * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be 193050ac265SRobert Watson * received, and will normally be applied to the socket by a protocol when it 194050ac265SRobert Watson * detects that the peer will send no more data. Data queued for reading in 195050ac265SRobert Watson * the socket may yet be read. 196df8bae1dSRodney W. Grimes */ 197a34b7046SRobert Watson void 198050ac265SRobert Watson socantsendmore_locked(struct socket *so) 199a34b7046SRobert Watson { 200a34b7046SRobert Watson 201a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_snd); 202a34b7046SRobert Watson 203a34b7046SRobert Watson so->so_snd.sb_state |= SBS_CANTSENDMORE; 204a34b7046SRobert Watson sowwakeup_locked(so); 205a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 206a34b7046SRobert Watson } 207df8bae1dSRodney W. Grimes 20826f9a767SRodney W. Grimes void 209050ac265SRobert Watson socantsendmore(struct socket *so) 210df8bae1dSRodney W. Grimes { 211df8bae1dSRodney W. Grimes 212a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_snd); 213a34b7046SRobert Watson socantsendmore_locked(so); 214a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 215a34b7046SRobert Watson } 216a34b7046SRobert Watson 217a34b7046SRobert Watson void 218050ac265SRobert Watson socantrcvmore_locked(struct socket *so) 219a34b7046SRobert Watson { 220a34b7046SRobert Watson 221a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_rcv); 222a34b7046SRobert Watson 223a34b7046SRobert Watson so->so_rcv.sb_state |= SBS_CANTRCVMORE; 224a34b7046SRobert Watson sorwakeup_locked(so); 225a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 226df8bae1dSRodney W. Grimes } 227df8bae1dSRodney W. Grimes 22826f9a767SRodney W. Grimes void 229050ac265SRobert Watson socantrcvmore(struct socket *so) 230df8bae1dSRodney W. Grimes { 231df8bae1dSRodney W. Grimes 232a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 233a34b7046SRobert Watson socantrcvmore_locked(so); 234a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 235df8bae1dSRodney W. Grimes } 236df8bae1dSRodney W. Grimes 237df8bae1dSRodney W. Grimes /* 238df8bae1dSRodney W. Grimes * Wait for data to arrive at/drain from a socket buffer. 239df8bae1dSRodney W. Grimes */ 24026f9a767SRodney W. Grimes int 241050ac265SRobert Watson sbwait(struct sockbuf *sb) 242df8bae1dSRodney W. Grimes { 243df8bae1dSRodney W. Grimes 24431f555a1SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 24531f555a1SRobert Watson 246df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WAIT; 2470f9d0a73SGleb Smirnoff return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx, 24847daf5d5SBruce Evans (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 2497729cbf1SDavide Italiano sb->sb_timeo, 0, 0)); 250df8bae1dSRodney W. Grimes } 251df8bae1dSRodney W. Grimes 25226f9a767SRodney W. Grimes int 2537abab911SRobert Watson sblock(struct sockbuf *sb, int flags) 254df8bae1dSRodney W. Grimes { 255df8bae1dSRodney W. Grimes 256265de5bbSRobert Watson KASSERT((flags & SBL_VALID) == flags, 257265de5bbSRobert Watson ("sblock: flags invalid (0x%x)", flags)); 258265de5bbSRobert Watson 259265de5bbSRobert Watson if (flags & SBL_WAIT) { 260265de5bbSRobert Watson if ((sb->sb_flags & SB_NOINTR) || 261265de5bbSRobert Watson (flags & SBL_NOINTR)) { 2627abab911SRobert Watson sx_xlock(&sb->sb_sx); 263df8bae1dSRodney W. Grimes return (0); 264049c3b6cSRobert Watson } 265049c3b6cSRobert Watson return (sx_xlock_sig(&sb->sb_sx)); 2667abab911SRobert Watson } else { 2677abab911SRobert Watson if (sx_try_xlock(&sb->sb_sx) == 0) 2687abab911SRobert Watson return (EWOULDBLOCK); 2697abab911SRobert Watson return (0); 2707abab911SRobert Watson } 2717abab911SRobert Watson } 2727abab911SRobert Watson 2737abab911SRobert Watson void 2747abab911SRobert Watson sbunlock(struct sockbuf *sb) 2757abab911SRobert Watson { 2767abab911SRobert Watson 2777abab911SRobert Watson sx_xunlock(&sb->sb_sx); 278df8bae1dSRodney W. Grimes } 279df8bae1dSRodney W. Grimes 280df8bae1dSRodney W. Grimes /* 281050ac265SRobert Watson * Wakeup processes waiting on a socket buffer. Do asynchronous notification 282050ac265SRobert Watson * via SIGIO if the socket has the SS_ASYNC flag set. 283a34b7046SRobert Watson * 284a34b7046SRobert Watson * Called with the socket buffer lock held; will release the lock by the end 285a34b7046SRobert Watson * of the function. This allows the caller to acquire the socket buffer lock 286a34b7046SRobert Watson * while testing for the need for various sorts of wakeup and hold it through 287a34b7046SRobert Watson * to the point where it's no longer required. We currently hold the lock 288a34b7046SRobert Watson * through calls out to other subsystems (with the exception of kqueue), and 289a34b7046SRobert Watson * then release it to avoid lock order issues. It's not clear that's 290a34b7046SRobert Watson * correct. 291df8bae1dSRodney W. Grimes */ 29226f9a767SRodney W. Grimes void 293050ac265SRobert Watson sowakeup(struct socket *so, struct sockbuf *sb) 294df8bae1dSRodney W. Grimes { 29574fb0ba7SJohn Baldwin int ret; 296d48d4b25SSeigo Tanimura 297a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 298a34b7046SRobert Watson 299512824f8SSeigo Tanimura selwakeuppri(&sb->sb_sel, PSOCK); 300ace8398dSJeff Roberson if (!SEL_WAITING(&sb->sb_sel)) 301df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_SEL; 302df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_WAIT) { 303df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_WAIT; 3040f9d0a73SGleb Smirnoff wakeup(&sb->sb_acc); 305df8bae1dSRodney W. Grimes } 306ad3b9257SJohn-Mark Gurney KNOTE_LOCKED(&sb->sb_sel.si_note, 0); 30774fb0ba7SJohn Baldwin if (sb->sb_upcall != NULL) { 308eb1b1807SGleb Smirnoff ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); 30974fb0ba7SJohn Baldwin if (ret == SU_ISCONNECTED) { 31074fb0ba7SJohn Baldwin KASSERT(sb == &so->so_rcv, 31174fb0ba7SJohn Baldwin ("SO_SND upcall returned SU_ISCONNECTED")); 31274fb0ba7SJohn Baldwin soupcall_clear(so, SO_RCV); 31374fb0ba7SJohn Baldwin } 31474fb0ba7SJohn Baldwin } else 31574fb0ba7SJohn Baldwin ret = SU_OK; 3164cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_AIO) 317bfbbc4aaSJason Evans aio_swake(so, sb); 31874fb0ba7SJohn Baldwin SOCKBUF_UNLOCK(sb); 31974fb0ba7SJohn Baldwin if (ret == SU_ISCONNECTED) 32074fb0ba7SJohn Baldwin soisconnected(so); 32174fb0ba7SJohn Baldwin if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 32274fb0ba7SJohn Baldwin pgsigio(&so->so_sigio, SIGIO, 0); 323a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 324df8bae1dSRodney W. Grimes } 325df8bae1dSRodney W. Grimes 326df8bae1dSRodney W. Grimes /* 327df8bae1dSRodney W. Grimes * Socket buffer (struct sockbuf) utility routines. 328df8bae1dSRodney W. Grimes * 329050ac265SRobert Watson * Each socket contains two socket buffers: one for sending data and one for 330050ac265SRobert Watson * receiving data. Each buffer contains a queue of mbufs, information about 331050ac265SRobert Watson * the number of mbufs and amount of data in the queue, and other fields 332050ac265SRobert Watson * allowing select() statements and notification on data availability to be 333050ac265SRobert Watson * implemented. 334df8bae1dSRodney W. Grimes * 335050ac265SRobert Watson * Data stored in a socket buffer is maintained as a list of records. Each 336050ac265SRobert Watson * record is a list of mbufs chained together with the m_next field. Records 337050ac265SRobert Watson * are chained together with the m_nextpkt field. The upper level routine 338050ac265SRobert Watson * soreceive() expects the following conventions to be observed when placing 339050ac265SRobert Watson * information in the receive buffer: 340df8bae1dSRodney W. Grimes * 341050ac265SRobert Watson * 1. If the protocol requires each message be preceded by the sender's name, 342050ac265SRobert Watson * then a record containing that name must be present before any 343050ac265SRobert Watson * associated data (mbuf's must be of type MT_SONAME). 344050ac265SRobert Watson * 2. If the protocol supports the exchange of ``access rights'' (really just 345050ac265SRobert Watson * additional data associated with the message), and there are ``rights'' 346050ac265SRobert Watson * to be received, then a record containing this data should be present 347050ac265SRobert Watson * (mbuf's must be of type MT_RIGHTS). 348050ac265SRobert Watson * 3. If a name or rights record exists, then it must be followed by a data 349050ac265SRobert Watson * record, perhaps of zero length. 350df8bae1dSRodney W. Grimes * 351df8bae1dSRodney W. Grimes * Before using a new socket structure it is first necessary to reserve 352df8bae1dSRodney W. Grimes * buffer space to the socket, by calling sbreserve(). This should commit 353df8bae1dSRodney W. Grimes * some of the available buffer space in the system buffer pool for the 354050ac265SRobert Watson * socket (currently, it does nothing but enforce limits). The space should 355050ac265SRobert Watson * be released by calling sbrelease() when the socket is destroyed. 356df8bae1dSRodney W. Grimes */ 35726f9a767SRodney W. Grimes int 358050ac265SRobert Watson soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 359df8bae1dSRodney W. Grimes { 360b40ce416SJulian Elischer struct thread *td = curthread; 361df8bae1dSRodney W. Grimes 3623f11a2f3SRobert Watson SOCKBUF_LOCK(&so->so_snd); 3639535efc0SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 3643f11a2f3SRobert Watson if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 3653f11a2f3SRobert Watson goto bad; 3663f11a2f3SRobert Watson if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 3673f11a2f3SRobert Watson goto bad2; 368df8bae1dSRodney W. Grimes if (so->so_rcv.sb_lowat == 0) 369df8bae1dSRodney W. Grimes so->so_rcv.sb_lowat = 1; 370df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat == 0) 371df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = MCLBYTES; 372df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 373df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 3743f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 3759535efc0SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 376df8bae1dSRodney W. Grimes return (0); 377df8bae1dSRodney W. Grimes bad2: 3783f11a2f3SRobert Watson sbrelease_locked(&so->so_snd, so); 379df8bae1dSRodney W. Grimes bad: 3803f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 3813f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 382df8bae1dSRodney W. Grimes return (ENOBUFS); 383df8bae1dSRodney W. Grimes } 384df8bae1dSRodney W. Grimes 38579cb7eb4SDavid Greenman static int 38679cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 38779cb7eb4SDavid Greenman { 38879cb7eb4SDavid Greenman int error = 0; 38986a93d51SJohn Baldwin u_long tmp_sb_max = sb_max; 39079cb7eb4SDavid Greenman 39186a93d51SJohn Baldwin error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); 39279cb7eb4SDavid Greenman if (error || !req->newptr) 39379cb7eb4SDavid Greenman return (error); 39486a93d51SJohn Baldwin if (tmp_sb_max < MSIZE + MCLBYTES) 39579cb7eb4SDavid Greenman return (EINVAL); 39686a93d51SJohn Baldwin sb_max = tmp_sb_max; 39779cb7eb4SDavid Greenman sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 39879cb7eb4SDavid Greenman return (0); 39979cb7eb4SDavid Greenman } 40079cb7eb4SDavid Greenman 401df8bae1dSRodney W. Grimes /* 402050ac265SRobert Watson * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't 403050ac265SRobert Watson * become limiting if buffering efficiency is near the normal case. 404df8bae1dSRodney W. Grimes */ 40526f9a767SRodney W. Grimes int 406050ac265SRobert Watson sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, 407050ac265SRobert Watson struct thread *td) 408df8bae1dSRodney W. Grimes { 40991d5354aSJohn Baldwin rlim_t sbsize_limit; 410ecf72308SBrian Feldman 4113f11a2f3SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 4123f11a2f3SRobert Watson 413ecf72308SBrian Feldman /* 4147978014dSRobert Watson * When a thread is passed, we take into account the thread's socket 4157978014dSRobert Watson * buffer size limit. The caller will generally pass curthread, but 4167978014dSRobert Watson * in the TCP input path, NULL will be passed to indicate that no 4177978014dSRobert Watson * appropriate thread resource limits are available. In that case, 4187978014dSRobert Watson * we don't apply a process limit. 419ecf72308SBrian Feldman */ 42079cb7eb4SDavid Greenman if (cc > sb_max_adj) 421df8bae1dSRodney W. Grimes return (0); 42291d5354aSJohn Baldwin if (td != NULL) { 42391d5354aSJohn Baldwin PROC_LOCK(td->td_proc); 42491d5354aSJohn Baldwin sbsize_limit = lim_cur(td->td_proc, RLIMIT_SBSIZE); 42591d5354aSJohn Baldwin PROC_UNLOCK(td->td_proc); 42691d5354aSJohn Baldwin } else 42791d5354aSJohn Baldwin sbsize_limit = RLIM_INFINITY; 428f535380cSDon Lewis if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 42991d5354aSJohn Baldwin sbsize_limit)) 430ecf72308SBrian Feldman return (0); 4314b29bc4fSGarrett Wollman sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 432df8bae1dSRodney W. Grimes if (sb->sb_lowat > sb->sb_hiwat) 433df8bae1dSRodney W. Grimes sb->sb_lowat = sb->sb_hiwat; 434df8bae1dSRodney W. Grimes return (1); 435df8bae1dSRodney W. Grimes } 436df8bae1dSRodney W. Grimes 4373f11a2f3SRobert Watson int 438050ac265SRobert Watson sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, 439050ac265SRobert Watson struct thread *td) 4403f11a2f3SRobert Watson { 4413f11a2f3SRobert Watson int error; 4423f11a2f3SRobert Watson 4433f11a2f3SRobert Watson SOCKBUF_LOCK(sb); 4443f11a2f3SRobert Watson error = sbreserve_locked(sb, cc, so, td); 4453f11a2f3SRobert Watson SOCKBUF_UNLOCK(sb); 4463f11a2f3SRobert Watson return (error); 4473f11a2f3SRobert Watson } 4483f11a2f3SRobert Watson 449df8bae1dSRodney W. Grimes /* 450df8bae1dSRodney W. Grimes * Free mbufs held by a socket, and reserved mbuf space. 451df8bae1dSRodney W. Grimes */ 4523f0bfcccSRobert Watson void 453050ac265SRobert Watson sbrelease_internal(struct sockbuf *sb, struct socket *so) 454eaa6dfbcSRobert Watson { 455eaa6dfbcSRobert Watson 456eaa6dfbcSRobert Watson sbflush_internal(sb); 457eaa6dfbcSRobert Watson (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 458eaa6dfbcSRobert Watson RLIM_INFINITY); 459eaa6dfbcSRobert Watson sb->sb_mbmax = 0; 460eaa6dfbcSRobert Watson } 461eaa6dfbcSRobert Watson 46226f9a767SRodney W. Grimes void 463050ac265SRobert Watson sbrelease_locked(struct sockbuf *sb, struct socket *so) 464df8bae1dSRodney W. Grimes { 465df8bae1dSRodney W. Grimes 466a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 467a34b7046SRobert Watson 468eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 469df8bae1dSRodney W. Grimes } 470df8bae1dSRodney W. Grimes 471a34b7046SRobert Watson void 472050ac265SRobert Watson sbrelease(struct sockbuf *sb, struct socket *so) 473a34b7046SRobert Watson { 474a34b7046SRobert Watson 475a34b7046SRobert Watson SOCKBUF_LOCK(sb); 476a34b7046SRobert Watson sbrelease_locked(sb, so); 477a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 478a34b7046SRobert Watson } 479eaa6dfbcSRobert Watson 480eaa6dfbcSRobert Watson void 481050ac265SRobert Watson sbdestroy(struct sockbuf *sb, struct socket *so) 482eaa6dfbcSRobert Watson { 483eaa6dfbcSRobert Watson 484eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 485eaa6dfbcSRobert Watson } 486eaa6dfbcSRobert Watson 487df8bae1dSRodney W. Grimes /* 488050ac265SRobert Watson * Routines to add and remove data from an mbuf queue. 489df8bae1dSRodney W. Grimes * 490050ac265SRobert Watson * The routines sbappend() or sbappendrecord() are normally called to append 491050ac265SRobert Watson * new mbufs to a socket buffer, after checking that adequate space is 492050ac265SRobert Watson * available, comparing the function sbspace() with the amount of data to be 493050ac265SRobert Watson * added. sbappendrecord() differs from sbappend() in that data supplied is 494050ac265SRobert Watson * treated as the beginning of a new record. To place a sender's address, 495050ac265SRobert Watson * optional access rights, and data in a socket receive buffer, 496050ac265SRobert Watson * sbappendaddr() should be used. To place access rights and data in a 497050ac265SRobert Watson * socket receive buffer, sbappendrights() should be used. In either case, 498050ac265SRobert Watson * the new data begins a new record. Note that unlike sbappend() and 499050ac265SRobert Watson * sbappendrecord(), these routines check for the caller that there will be 500050ac265SRobert Watson * enough space to store the data. Each fails if there is not enough space, 501050ac265SRobert Watson * or if it cannot find mbufs to store additional information in. 502df8bae1dSRodney W. Grimes * 503050ac265SRobert Watson * Reliable protocols may use the socket send buffer to hold data awaiting 504050ac265SRobert Watson * acknowledgement. Data is normally copied from a socket send buffer in a 505050ac265SRobert Watson * protocol with m_copy for output to a peer, and then removing the data from 506050ac265SRobert Watson * the socket buffer with sbdrop() or sbdroprecord() when the data is 507050ac265SRobert Watson * acknowledged by the peer. 508df8bae1dSRodney W. Grimes */ 509395bb186SSam Leffler #ifdef SOCKBUF_DEBUG 510395bb186SSam Leffler void 511395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line) 512395bb186SSam Leffler { 513395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 514395bb186SSam Leffler 515a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 516a34b7046SRobert Watson 517395bb186SSam Leffler while (m && m->m_nextpkt) 518395bb186SSam Leffler m = m->m_nextpkt; 519395bb186SSam Leffler 520395bb186SSam Leffler if (m != sb->sb_lastrecord) { 521395bb186SSam Leffler printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 522395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_lastrecord, m); 523395bb186SSam Leffler printf("packet chain:\n"); 524395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 525395bb186SSam Leffler printf("\t%p\n", m); 526395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 527395bb186SSam Leffler } 528395bb186SSam Leffler } 529395bb186SSam Leffler 530395bb186SSam Leffler void 531395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line) 532395bb186SSam Leffler { 533395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 534395bb186SSam Leffler struct mbuf *n; 535395bb186SSam Leffler 536a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 537a34b7046SRobert Watson 538395bb186SSam Leffler while (m && m->m_nextpkt) 539395bb186SSam Leffler m = m->m_nextpkt; 540395bb186SSam Leffler 541395bb186SSam Leffler while (m && m->m_next) 542395bb186SSam Leffler m = m->m_next; 543395bb186SSam Leffler 544395bb186SSam Leffler if (m != sb->sb_mbtail) { 545395bb186SSam Leffler printf("%s: sb_mb %p sb_mbtail %p last %p\n", 546395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_mbtail, m); 547395bb186SSam Leffler printf("packet tree:\n"); 548395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 549395bb186SSam Leffler printf("\t"); 550395bb186SSam Leffler for (n = m; n != NULL; n = n->m_next) 551395bb186SSam Leffler printf("%p ", n); 552395bb186SSam Leffler printf("\n"); 553395bb186SSam Leffler } 554395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 555395bb186SSam Leffler } 556395bb186SSam Leffler } 557395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */ 558395bb186SSam Leffler 559395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do { \ 560a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); \ 561395bb186SSam Leffler if ((sb)->sb_lastrecord != NULL) \ 562395bb186SSam Leffler (sb)->sb_lastrecord->m_nextpkt = (m0); \ 563395bb186SSam Leffler else \ 564395bb186SSam Leffler (sb)->sb_mb = (m0); \ 565395bb186SSam Leffler (sb)->sb_lastrecord = (m0); \ 566395bb186SSam Leffler } while (/*CONSTCOND*/0) 567395bb186SSam Leffler 568df8bae1dSRodney W. Grimes /* 569050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 570050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 571050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 572df8bae1dSRodney W. Grimes */ 57326f9a767SRodney W. Grimes void 574050ac265SRobert Watson sbappend_locked(struct sockbuf *sb, struct mbuf *m) 575df8bae1dSRodney W. Grimes { 576050ac265SRobert Watson struct mbuf *n; 577df8bae1dSRodney W. Grimes 578a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 579a34b7046SRobert Watson 580df8bae1dSRodney W. Grimes if (m == 0) 581df8bae1dSRodney W. Grimes return; 582*53b680caSGleb Smirnoff m_clrprotoflags(m); 583395bb186SSam Leffler SBLASTRECORDCHK(sb); 584797f2d22SPoul-Henning Kamp n = sb->sb_mb; 585797f2d22SPoul-Henning Kamp if (n) { 586df8bae1dSRodney W. Grimes while (n->m_nextpkt) 587df8bae1dSRodney W. Grimes n = n->m_nextpkt; 588df8bae1dSRodney W. Grimes do { 589df8bae1dSRodney W. Grimes if (n->m_flags & M_EOR) { 590a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 591df8bae1dSRodney W. Grimes return; 592df8bae1dSRodney W. Grimes } 593df8bae1dSRodney W. Grimes } while (n->m_next && (n = n->m_next)); 594395bb186SSam Leffler } else { 595395bb186SSam Leffler /* 596395bb186SSam Leffler * XXX Would like to simply use sb_mbtail here, but 597395bb186SSam Leffler * XXX I need to verify that I won't miss an EOR that 598395bb186SSam Leffler * XXX way. 599395bb186SSam Leffler */ 600395bb186SSam Leffler if ((n = sb->sb_lastrecord) != NULL) { 601395bb186SSam Leffler do { 602395bb186SSam Leffler if (n->m_flags & M_EOR) { 603a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 604395bb186SSam Leffler return; 605395bb186SSam Leffler } 606395bb186SSam Leffler } while (n->m_next && (n = n->m_next)); 607395bb186SSam Leffler } else { 608395bb186SSam Leffler /* 609395bb186SSam Leffler * If this is the first record in the socket buffer, 610395bb186SSam Leffler * it's also the last record. 611395bb186SSam Leffler */ 612395bb186SSam Leffler sb->sb_lastrecord = m; 613395bb186SSam Leffler } 614df8bae1dSRodney W. Grimes } 615df8bae1dSRodney W. Grimes sbcompress(sb, m, n); 616395bb186SSam Leffler SBLASTRECORDCHK(sb); 617395bb186SSam Leffler } 618395bb186SSam Leffler 619395bb186SSam Leffler /* 620050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 621050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 622050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 623a34b7046SRobert Watson */ 624a34b7046SRobert Watson void 625050ac265SRobert Watson sbappend(struct sockbuf *sb, struct mbuf *m) 626a34b7046SRobert Watson { 627a34b7046SRobert Watson 628a34b7046SRobert Watson SOCKBUF_LOCK(sb); 629a34b7046SRobert Watson sbappend_locked(sb, m); 630a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 631a34b7046SRobert Watson } 632a34b7046SRobert Watson 633a34b7046SRobert Watson /* 634050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 635050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 636050ac265SRobert Watson * that is, a stream protocol (such as TCP). 637395bb186SSam Leffler */ 638395bb186SSam Leffler void 639651e4e6aSGleb Smirnoff sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) 640395bb186SSam Leffler { 641a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 642395bb186SSam Leffler 643395bb186SSam Leffler KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 644395bb186SSam Leffler KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 645395bb186SSam Leffler 646395bb186SSam Leffler SBLASTMBUFCHK(sb); 647395bb186SSam Leffler 648844cacd1SGleb Smirnoff /* Remove all packet headers and mbuf tags to get a pure data chain. */ 649651e4e6aSGleb Smirnoff m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); 650844cacd1SGleb Smirnoff 651395bb186SSam Leffler sbcompress(sb, m, sb->sb_mbtail); 652395bb186SSam Leffler 653395bb186SSam Leffler sb->sb_lastrecord = sb->sb_mb; 654395bb186SSam Leffler SBLASTRECORDCHK(sb); 655df8bae1dSRodney W. Grimes } 656df8bae1dSRodney W. Grimes 657a34b7046SRobert Watson /* 658050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 659050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 660050ac265SRobert Watson * that is, a stream protocol (such as TCP). 661a34b7046SRobert Watson */ 662a34b7046SRobert Watson void 663651e4e6aSGleb Smirnoff sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) 664a34b7046SRobert Watson { 665a34b7046SRobert Watson 666a34b7046SRobert Watson SOCKBUF_LOCK(sb); 667651e4e6aSGleb Smirnoff sbappendstream_locked(sb, m, flags); 668a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 669a34b7046SRobert Watson } 670a34b7046SRobert Watson 671df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG 67226f9a767SRodney W. Grimes void 67357f43a45SGleb Smirnoff sbcheck(struct sockbuf *sb, const char *file, int line) 674df8bae1dSRodney W. Grimes { 6750f9d0a73SGleb Smirnoff struct mbuf *m, *n, *fnrdy; 6760f9d0a73SGleb Smirnoff u_long acc, ccc, mbcnt; 677df8bae1dSRodney W. Grimes 678a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 679a34b7046SRobert Watson 6800f9d0a73SGleb Smirnoff acc = ccc = mbcnt = 0; 6810f9d0a73SGleb Smirnoff fnrdy = NULL; 68257f43a45SGleb Smirnoff 6830931333fSBill Fenner for (m = sb->sb_mb; m; m = n) { 6840931333fSBill Fenner n = m->m_nextpkt; 6850931333fSBill Fenner for (; m; m = m->m_next) { 68657f43a45SGleb Smirnoff if (m->m_len == 0) { 68757f43a45SGleb Smirnoff printf("sb %p empty mbuf %p\n", sb, m); 68857f43a45SGleb Smirnoff goto fail; 68957f43a45SGleb Smirnoff } 6900f9d0a73SGleb Smirnoff if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { 6910f9d0a73SGleb Smirnoff if (m != sb->sb_fnrdy) { 6920f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p != m %p\n", 6930f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 6940f9d0a73SGleb Smirnoff goto fail; 6950f9d0a73SGleb Smirnoff } 6960f9d0a73SGleb Smirnoff fnrdy = m; 6970f9d0a73SGleb Smirnoff } 6980f9d0a73SGleb Smirnoff if (fnrdy) { 6990f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) { 7000f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p, m %p is avail\n", 7010f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 7020f9d0a73SGleb Smirnoff goto fail; 7030f9d0a73SGleb Smirnoff } 7040f9d0a73SGleb Smirnoff } else 7050f9d0a73SGleb Smirnoff acc += m->m_len; 7060f9d0a73SGleb Smirnoff ccc += m->m_len; 707df8bae1dSRodney W. Grimes mbcnt += MSIZE; 708313861b8SJulian Elischer if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 709df8bae1dSRodney W. Grimes mbcnt += m->m_ext.ext_size; 7100931333fSBill Fenner } 711df8bae1dSRodney W. Grimes } 7120f9d0a73SGleb Smirnoff if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { 7130f9d0a73SGleb Smirnoff printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", 7140f9d0a73SGleb Smirnoff acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); 71557f43a45SGleb Smirnoff goto fail; 716df8bae1dSRodney W. Grimes } 71757f43a45SGleb Smirnoff return; 71857f43a45SGleb Smirnoff fail: 71957f43a45SGleb Smirnoff panic("%s from %s:%u", __func__, file, line); 720df8bae1dSRodney W. Grimes } 721df8bae1dSRodney W. Grimes #endif 722df8bae1dSRodney W. Grimes 723df8bae1dSRodney W. Grimes /* 724050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 725df8bae1dSRodney W. Grimes */ 72626f9a767SRodney W. Grimes void 727050ac265SRobert Watson sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) 728df8bae1dSRodney W. Grimes { 729050ac265SRobert Watson struct mbuf *m; 730df8bae1dSRodney W. Grimes 731a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 732a34b7046SRobert Watson 733df8bae1dSRodney W. Grimes if (m0 == 0) 734df8bae1dSRodney W. Grimes return; 735*53b680caSGleb Smirnoff m_clrprotoflags(m0); 736df8bae1dSRodney W. Grimes /* 737050ac265SRobert Watson * Put the first mbuf on the queue. Note this permits zero length 738050ac265SRobert Watson * records. 739df8bae1dSRodney W. Grimes */ 740df8bae1dSRodney W. Grimes sballoc(sb, m0); 741395bb186SSam Leffler SBLASTRECORDCHK(sb); 742395bb186SSam Leffler SBLINKRECORD(sb, m0); 743e72a94adSMaksim Yevmenkin sb->sb_mbtail = m0; 744df8bae1dSRodney W. Grimes m = m0->m_next; 745df8bae1dSRodney W. Grimes m0->m_next = 0; 746df8bae1dSRodney W. Grimes if (m && (m0->m_flags & M_EOR)) { 747df8bae1dSRodney W. Grimes m0->m_flags &= ~M_EOR; 748df8bae1dSRodney W. Grimes m->m_flags |= M_EOR; 749df8bae1dSRodney W. Grimes } 750e72a94adSMaksim Yevmenkin /* always call sbcompress() so it can do SBLASTMBUFCHK() */ 751df8bae1dSRodney W. Grimes sbcompress(sb, m, m0); 752df8bae1dSRodney W. Grimes } 753df8bae1dSRodney W. Grimes 754df8bae1dSRodney W. Grimes /* 755050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 756a34b7046SRobert Watson */ 757a34b7046SRobert Watson void 758050ac265SRobert Watson sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 759a34b7046SRobert Watson { 760a34b7046SRobert Watson 761a34b7046SRobert Watson SOCKBUF_LOCK(sb); 762a34b7046SRobert Watson sbappendrecord_locked(sb, m0); 763a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 764a34b7046SRobert Watson } 765a34b7046SRobert Watson 7668de34a88SAlan Somers /* Helper routine that appends data, control, and address to a sockbuf. */ 7678de34a88SAlan Somers static int 7688de34a88SAlan Somers sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, 7698de34a88SAlan Somers struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) 770df8bae1dSRodney W. Grimes { 771395bb186SSam Leffler struct mbuf *m, *n, *nlast; 772c43cad1aSScott Long #if MSIZE <= 256 773df8bae1dSRodney W. Grimes if (asa->sa_len > MLEN) 774df8bae1dSRodney W. Grimes return (0); 775c43cad1aSScott Long #endif 776c8b59ea7SGleb Smirnoff m = m_get(M_NOWAIT, MT_SONAME); 777c8b59ea7SGleb Smirnoff if (m == NULL) 778df8bae1dSRodney W. Grimes return (0); 779df8bae1dSRodney W. Grimes m->m_len = asa->sa_len; 78080208239SAlfred Perlstein bcopy(asa, mtod(m, caddr_t), asa->sa_len); 781*53b680caSGleb Smirnoff if (m0) 782*53b680caSGleb Smirnoff m_clrprotoflags(m0); 7838de34a88SAlan Somers if (ctrl_last) 7848de34a88SAlan Somers ctrl_last->m_next = m0; /* concatenate data to control */ 785df8bae1dSRodney W. Grimes else 786df8bae1dSRodney W. Grimes control = m0; 787df8bae1dSRodney W. Grimes m->m_next = control; 788395bb186SSam Leffler for (n = m; n->m_next != NULL; n = n->m_next) 789df8bae1dSRodney W. Grimes sballoc(sb, n); 790395bb186SSam Leffler sballoc(sb, n); 791395bb186SSam Leffler nlast = n; 792395bb186SSam Leffler SBLINKRECORD(sb, m); 793395bb186SSam Leffler 794395bb186SSam Leffler sb->sb_mbtail = nlast; 795395bb186SSam Leffler SBLASTMBUFCHK(sb); 796395bb186SSam Leffler 797395bb186SSam Leffler SBLASTRECORDCHK(sb); 798df8bae1dSRodney W. Grimes return (1); 799df8bae1dSRodney W. Grimes } 800df8bae1dSRodney W. Grimes 801a34b7046SRobert Watson /* 802050ac265SRobert Watson * Append address and data, and optionally, control (ancillary) data to the 803050ac265SRobert Watson * receive queue of a socket. If present, m0 must include a packet header 804050ac265SRobert Watson * with total length. Returns 0 if no space in sockbuf or insufficient 805050ac265SRobert Watson * mbufs. 806a34b7046SRobert Watson */ 80726f9a767SRodney W. Grimes int 8088de34a88SAlan Somers sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, 8098de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 8108de34a88SAlan Somers { 8118de34a88SAlan Somers struct mbuf *ctrl_last; 8128de34a88SAlan Somers int space = asa->sa_len; 8138de34a88SAlan Somers 8148de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 8158de34a88SAlan Somers 8168de34a88SAlan Somers if (m0 && (m0->m_flags & M_PKTHDR) == 0) 8178de34a88SAlan Somers panic("sbappendaddr_locked"); 8188de34a88SAlan Somers if (m0) 8198de34a88SAlan Somers space += m0->m_pkthdr.len; 8208de34a88SAlan Somers space += m_length(control, &ctrl_last); 8218de34a88SAlan Somers 8228de34a88SAlan Somers if (space > sbspace(sb)) 8238de34a88SAlan Somers return (0); 8248de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 8258de34a88SAlan Somers } 8268de34a88SAlan Somers 8278de34a88SAlan Somers /* 8288de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 8298de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 8308de34a88SAlan Somers * with total length. Returns 0 if insufficient mbufs. Does not validate space 8318de34a88SAlan Somers * on the receiving sockbuf. 8328de34a88SAlan Somers */ 8338de34a88SAlan Somers int 8348de34a88SAlan Somers sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, 8358de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 8368de34a88SAlan Somers { 8378de34a88SAlan Somers struct mbuf *ctrl_last; 8388de34a88SAlan Somers 8398de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 8408de34a88SAlan Somers 8418de34a88SAlan Somers ctrl_last = (control == NULL) ? NULL : m_last(control); 8428de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 8438de34a88SAlan Somers } 8448de34a88SAlan Somers 8458de34a88SAlan Somers /* 8468de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 8478de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 8488de34a88SAlan Somers * with total length. Returns 0 if no space in sockbuf or insufficient 8498de34a88SAlan Somers * mbufs. 8508de34a88SAlan Somers */ 8518de34a88SAlan Somers int 852050ac265SRobert Watson sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, 853050ac265SRobert Watson struct mbuf *m0, struct mbuf *control) 854a34b7046SRobert Watson { 855a34b7046SRobert Watson int retval; 856a34b7046SRobert Watson 857a34b7046SRobert Watson SOCKBUF_LOCK(sb); 858a34b7046SRobert Watson retval = sbappendaddr_locked(sb, asa, m0, control); 859a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 860a34b7046SRobert Watson return (retval); 861a34b7046SRobert Watson } 862a34b7046SRobert Watson 863a34b7046SRobert Watson int 864050ac265SRobert Watson sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, 865050ac265SRobert Watson struct mbuf *control) 866df8bae1dSRodney W. Grimes { 867395bb186SSam Leffler struct mbuf *m, *n, *mlast; 8687ed60de8SPoul-Henning Kamp int space; 869df8bae1dSRodney W. Grimes 870a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 871a34b7046SRobert Watson 872df8bae1dSRodney W. Grimes if (control == 0) 873a34b7046SRobert Watson panic("sbappendcontrol_locked"); 8747ed60de8SPoul-Henning Kamp space = m_length(control, &n) + m_length(m0, NULL); 875a34b7046SRobert Watson 876df8bae1dSRodney W. Grimes if (space > sbspace(sb)) 877df8bae1dSRodney W. Grimes return (0); 878*53b680caSGleb Smirnoff m_clrprotoflags(m0); 879df8bae1dSRodney W. Grimes n->m_next = m0; /* concatenate data to control */ 880395bb186SSam Leffler 881395bb186SSam Leffler SBLASTRECORDCHK(sb); 882395bb186SSam Leffler 883395bb186SSam Leffler for (m = control; m->m_next; m = m->m_next) 884df8bae1dSRodney W. Grimes sballoc(sb, m); 885395bb186SSam Leffler sballoc(sb, m); 886395bb186SSam Leffler mlast = m; 887395bb186SSam Leffler SBLINKRECORD(sb, control); 888395bb186SSam Leffler 889395bb186SSam Leffler sb->sb_mbtail = mlast; 890395bb186SSam Leffler SBLASTMBUFCHK(sb); 891395bb186SSam Leffler 892395bb186SSam Leffler SBLASTRECORDCHK(sb); 893df8bae1dSRodney W. Grimes return (1); 894df8bae1dSRodney W. Grimes } 895df8bae1dSRodney W. Grimes 896a34b7046SRobert Watson int 897050ac265SRobert Watson sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) 898a34b7046SRobert Watson { 899a34b7046SRobert Watson int retval; 900a34b7046SRobert Watson 901a34b7046SRobert Watson SOCKBUF_LOCK(sb); 902a34b7046SRobert Watson retval = sbappendcontrol_locked(sb, m0, control); 903a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 904a34b7046SRobert Watson return (retval); 905a34b7046SRobert Watson } 906a34b7046SRobert Watson 907df8bae1dSRodney W. Grimes /* 9087da7362bSRobert Watson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 9097da7362bSRobert Watson * (n). If (n) is NULL, the buffer is presumed empty. 9107da7362bSRobert Watson * 9117da7362bSRobert Watson * When the data is compressed, mbufs in the chain may be handled in one of 9127da7362bSRobert Watson * three ways: 9137da7362bSRobert Watson * 9147da7362bSRobert Watson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 9157da7362bSRobert Watson * record boundary, and no change in data type). 9167da7362bSRobert Watson * 9177da7362bSRobert Watson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 9187da7362bSRobert Watson * an mbuf already in the socket buffer. This can occur if an 9190f9d0a73SGleb Smirnoff * appropriate mbuf exists, there is room, both mbufs are not marked as 9200f9d0a73SGleb Smirnoff * not ready, and no merging of data types will occur. 9217da7362bSRobert Watson * 9227da7362bSRobert Watson * (3) The mbuf may be appended to the end of the existing mbuf chain. 9237da7362bSRobert Watson * 9247da7362bSRobert Watson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 9257da7362bSRobert Watson * end-of-record. 926df8bae1dSRodney W. Grimes */ 92726f9a767SRodney W. Grimes void 928050ac265SRobert Watson sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 929df8bae1dSRodney W. Grimes { 930050ac265SRobert Watson int eor = 0; 931050ac265SRobert Watson struct mbuf *o; 932df8bae1dSRodney W. Grimes 933a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 934a34b7046SRobert Watson 935df8bae1dSRodney W. Grimes while (m) { 936df8bae1dSRodney W. Grimes eor |= m->m_flags & M_EOR; 937df8bae1dSRodney W. Grimes if (m->m_len == 0 && 938df8bae1dSRodney W. Grimes (eor == 0 || 939df8bae1dSRodney W. Grimes (((o = m->m_next) || (o = n)) && 940df8bae1dSRodney W. Grimes o->m_type == m->m_type))) { 941395bb186SSam Leffler if (sb->sb_lastrecord == m) 942395bb186SSam Leffler sb->sb_lastrecord = m->m_next; 943df8bae1dSRodney W. Grimes m = m_free(m); 944df8bae1dSRodney W. Grimes continue; 945df8bae1dSRodney W. Grimes } 94632af0d74SDavid Malone if (n && (n->m_flags & M_EOR) == 0 && 94732af0d74SDavid Malone M_WRITABLE(n) && 9485e0f5cfaSKip Macy ((sb->sb_flags & SB_NOCOALESCE) == 0) && 9490f9d0a73SGleb Smirnoff !(m->m_flags & M_NOTREADY) && 9500f9d0a73SGleb Smirnoff !(n->m_flags & M_NOTREADY) && 95132af0d74SDavid Malone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 95232af0d74SDavid Malone m->m_len <= M_TRAILINGSPACE(n) && 953df8bae1dSRodney W. Grimes n->m_type == m->m_type) { 954df8bae1dSRodney W. Grimes bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 955df8bae1dSRodney W. Grimes (unsigned)m->m_len); 956df8bae1dSRodney W. Grimes n->m_len += m->m_len; 9570f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 9580f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) 9590f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 96034333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 961b3f1af6bSTim J. Robbins /* XXX: Probably don't need.*/ 96204ac9b97SKelly Yancey sb->sb_ctl += m->m_len; 963df8bae1dSRodney W. Grimes m = m_free(m); 964df8bae1dSRodney W. Grimes continue; 965df8bae1dSRodney W. Grimes } 966df8bae1dSRodney W. Grimes if (n) 967df8bae1dSRodney W. Grimes n->m_next = m; 968df8bae1dSRodney W. Grimes else 969df8bae1dSRodney W. Grimes sb->sb_mb = m; 970395bb186SSam Leffler sb->sb_mbtail = m; 971df8bae1dSRodney W. Grimes sballoc(sb, m); 972df8bae1dSRodney W. Grimes n = m; 973df8bae1dSRodney W. Grimes m->m_flags &= ~M_EOR; 974df8bae1dSRodney W. Grimes m = m->m_next; 975df8bae1dSRodney W. Grimes n->m_next = 0; 976df8bae1dSRodney W. Grimes } 977df8bae1dSRodney W. Grimes if (eor) { 9787da7362bSRobert Watson KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 979df8bae1dSRodney W. Grimes n->m_flags |= eor; 980df8bae1dSRodney W. Grimes } 981395bb186SSam Leffler SBLASTMBUFCHK(sb); 982df8bae1dSRodney W. Grimes } 983df8bae1dSRodney W. Grimes 984df8bae1dSRodney W. Grimes /* 985050ac265SRobert Watson * Free all mbufs in a sockbuf. Check that all resources are reclaimed. 986df8bae1dSRodney W. Grimes */ 987eaa6dfbcSRobert Watson static void 988050ac265SRobert Watson sbflush_internal(struct sockbuf *sb) 989df8bae1dSRodney W. Grimes { 990df8bae1dSRodney W. Grimes 99123f84772SPierre Beyssac while (sb->sb_mbcnt) { 99223f84772SPierre Beyssac /* 993761a9a1fSGleb Smirnoff * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: 99423f84772SPierre Beyssac * we would loop forever. Panic instead. 99523f84772SPierre Beyssac */ 9960f9d0a73SGleb Smirnoff if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 99723f84772SPierre Beyssac break; 9980f9d0a73SGleb Smirnoff m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); 99923f84772SPierre Beyssac } 10000f9d0a73SGleb Smirnoff KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, 10010f9d0a73SGleb Smirnoff ("%s: ccc %u mb %p mbcnt %u", __func__, 10020f9d0a73SGleb Smirnoff sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); 1003a34b7046SRobert Watson } 1004a34b7046SRobert Watson 1005a34b7046SRobert Watson void 1006050ac265SRobert Watson sbflush_locked(struct sockbuf *sb) 1007eaa6dfbcSRobert Watson { 1008eaa6dfbcSRobert Watson 1009eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1010eaa6dfbcSRobert Watson sbflush_internal(sb); 1011eaa6dfbcSRobert Watson } 1012eaa6dfbcSRobert Watson 1013eaa6dfbcSRobert Watson void 1014050ac265SRobert Watson sbflush(struct sockbuf *sb) 1015a34b7046SRobert Watson { 1016a34b7046SRobert Watson 1017a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1018a34b7046SRobert Watson sbflush_locked(sb); 1019a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1020df8bae1dSRodney W. Grimes } 1021df8bae1dSRodney W. Grimes 1022df8bae1dSRodney W. Grimes /* 10231d2df300SGleb Smirnoff * Cut data from (the front of) a sockbuf. 1024df8bae1dSRodney W. Grimes */ 10251d2df300SGleb Smirnoff static struct mbuf * 10261d2df300SGleb Smirnoff sbcut_internal(struct sockbuf *sb, int len) 1027df8bae1dSRodney W. Grimes { 10280f9d0a73SGleb Smirnoff struct mbuf *m, *next, *mfree; 1029df8bae1dSRodney W. Grimes 1030df8bae1dSRodney W. Grimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 10311d2df300SGleb Smirnoff mfree = NULL; 10321d2df300SGleb Smirnoff 1033df8bae1dSRodney W. Grimes while (len > 0) { 10348146bcfeSGleb Smirnoff if (m == NULL) { 10358146bcfeSGleb Smirnoff KASSERT(next, ("%s: no next, len %d", __func__, len)); 1036df8bae1dSRodney W. Grimes m = next; 1037df8bae1dSRodney W. Grimes next = m->m_nextpkt; 1038df8bae1dSRodney W. Grimes } 1039df8bae1dSRodney W. Grimes if (m->m_len > len) { 10400f9d0a73SGleb Smirnoff KASSERT(!(m->m_flags & M_NOTAVAIL), 10410f9d0a73SGleb Smirnoff ("%s: m %p M_NOTAVAIL", __func__, m)); 1042df8bae1dSRodney W. Grimes m->m_len -= len; 1043df8bae1dSRodney W. Grimes m->m_data += len; 10440f9d0a73SGleb Smirnoff sb->sb_ccc -= len; 10450f9d0a73SGleb Smirnoff sb->sb_acc -= len; 10464e023759SAndre Oppermann if (sb->sb_sndptroff != 0) 10474e023759SAndre Oppermann sb->sb_sndptroff -= len; 104834333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 104904ac9b97SKelly Yancey sb->sb_ctl -= len; 1050df8bae1dSRodney W. Grimes break; 1051df8bae1dSRodney W. Grimes } 1052df8bae1dSRodney W. Grimes len -= m->m_len; 1053df8bae1dSRodney W. Grimes sbfree(sb, m); 10540f9d0a73SGleb Smirnoff /* 10550f9d0a73SGleb Smirnoff * Do not put M_NOTREADY buffers to the free list, they 10560f9d0a73SGleb Smirnoff * are referenced from outside. 10570f9d0a73SGleb Smirnoff */ 10580f9d0a73SGleb Smirnoff if (m->m_flags & M_NOTREADY) 10590f9d0a73SGleb Smirnoff m = m->m_next; 10600f9d0a73SGleb Smirnoff else { 10610f9d0a73SGleb Smirnoff struct mbuf *n; 10620f9d0a73SGleb Smirnoff 10631d2df300SGleb Smirnoff n = m->m_next; 10641d2df300SGleb Smirnoff m->m_next = mfree; 10651d2df300SGleb Smirnoff mfree = m; 10661d2df300SGleb Smirnoff m = n; 1067df8bae1dSRodney W. Grimes } 10680f9d0a73SGleb Smirnoff } 1069e834a840SGleb Smirnoff /* 1070e834a840SGleb Smirnoff * Free any zero-length mbufs from the buffer. 1071e834a840SGleb Smirnoff * For SOCK_DGRAM sockets such mbufs represent empty records. 1072e834a840SGleb Smirnoff * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, 1073e834a840SGleb Smirnoff * when sosend_generic() needs to send only control data. 1074e834a840SGleb Smirnoff */ 1075e834a840SGleb Smirnoff while (m && m->m_len == 0) { 1076e834a840SGleb Smirnoff struct mbuf *n; 1077e834a840SGleb Smirnoff 1078e834a840SGleb Smirnoff sbfree(sb, m); 1079e834a840SGleb Smirnoff n = m->m_next; 1080e834a840SGleb Smirnoff m->m_next = mfree; 1081e834a840SGleb Smirnoff mfree = m; 1082e834a840SGleb Smirnoff m = n; 1083e834a840SGleb Smirnoff } 1084df8bae1dSRodney W. Grimes if (m) { 1085df8bae1dSRodney W. Grimes sb->sb_mb = m; 1086df8bae1dSRodney W. Grimes m->m_nextpkt = next; 1087df8bae1dSRodney W. Grimes } else 1088df8bae1dSRodney W. Grimes sb->sb_mb = next; 1089395bb186SSam Leffler /* 1090050ac265SRobert Watson * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure 1091050ac265SRobert Watson * sb_lastrecord is up-to-date if we dropped part of the last record. 1092395bb186SSam Leffler */ 1093395bb186SSam Leffler m = sb->sb_mb; 1094395bb186SSam Leffler if (m == NULL) { 1095395bb186SSam Leffler sb->sb_mbtail = NULL; 1096395bb186SSam Leffler sb->sb_lastrecord = NULL; 1097395bb186SSam Leffler } else if (m->m_nextpkt == NULL) { 1098395bb186SSam Leffler sb->sb_lastrecord = m; 1099395bb186SSam Leffler } 11001d2df300SGleb Smirnoff 11011d2df300SGleb Smirnoff return (mfree); 1102df8bae1dSRodney W. Grimes } 1103df8bae1dSRodney W. Grimes 1104df8bae1dSRodney W. Grimes /* 1105a34b7046SRobert Watson * Drop data from (the front of) a sockbuf. 1106a34b7046SRobert Watson */ 1107a34b7046SRobert Watson void 1108050ac265SRobert Watson sbdrop_locked(struct sockbuf *sb, int len) 1109eaa6dfbcSRobert Watson { 1110eaa6dfbcSRobert Watson 1111eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 11121d2df300SGleb Smirnoff m_freem(sbcut_internal(sb, len)); 11131d2df300SGleb Smirnoff } 1114eaa6dfbcSRobert Watson 11151d2df300SGleb Smirnoff /* 11161d2df300SGleb Smirnoff * Drop data from (the front of) a sockbuf, 11171d2df300SGleb Smirnoff * and return it to caller. 11181d2df300SGleb Smirnoff */ 11191d2df300SGleb Smirnoff struct mbuf * 11201d2df300SGleb Smirnoff sbcut_locked(struct sockbuf *sb, int len) 11211d2df300SGleb Smirnoff { 11221d2df300SGleb Smirnoff 11231d2df300SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 11241d2df300SGleb Smirnoff return (sbcut_internal(sb, len)); 1125eaa6dfbcSRobert Watson } 1126eaa6dfbcSRobert Watson 1127eaa6dfbcSRobert Watson void 1128050ac265SRobert Watson sbdrop(struct sockbuf *sb, int len) 1129a34b7046SRobert Watson { 11301d2df300SGleb Smirnoff struct mbuf *mfree; 1131a34b7046SRobert Watson 1132a34b7046SRobert Watson SOCKBUF_LOCK(sb); 11331d2df300SGleb Smirnoff mfree = sbcut_internal(sb, len); 1134a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 11351d2df300SGleb Smirnoff 11361d2df300SGleb Smirnoff m_freem(mfree); 1137a34b7046SRobert Watson } 1138a34b7046SRobert Watson 11394e023759SAndre Oppermann /* 11404e023759SAndre Oppermann * Maintain a pointer and offset pair into the socket buffer mbuf chain to 11414e023759SAndre Oppermann * avoid traversal of the entire socket buffer for larger offsets. 11424e023759SAndre Oppermann */ 11434e023759SAndre Oppermann struct mbuf * 11444e023759SAndre Oppermann sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff) 11454e023759SAndre Oppermann { 11464e023759SAndre Oppermann struct mbuf *m, *ret; 11474e023759SAndre Oppermann 11484e023759SAndre Oppermann KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 11490f9d0a73SGleb Smirnoff KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__)); 11500f9d0a73SGleb Smirnoff KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__)); 11514e023759SAndre Oppermann 11524e023759SAndre Oppermann /* 11534e023759SAndre Oppermann * Is off below stored offset? Happens on retransmits. 11544e023759SAndre Oppermann * Just return, we can't help here. 11554e023759SAndre Oppermann */ 11564e023759SAndre Oppermann if (sb->sb_sndptroff > off) { 11574e023759SAndre Oppermann *moff = off; 11584e023759SAndre Oppermann return (sb->sb_mb); 11594e023759SAndre Oppermann } 11604e023759SAndre Oppermann 11614e023759SAndre Oppermann /* Return closest mbuf in chain for current offset. */ 11624e023759SAndre Oppermann *moff = off - sb->sb_sndptroff; 11634e023759SAndre Oppermann m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb; 11640963c8e4SLawrence Stewart if (*moff == m->m_len) { 11650963c8e4SLawrence Stewart *moff = 0; 11660963c8e4SLawrence Stewart sb->sb_sndptroff += m->m_len; 11670963c8e4SLawrence Stewart m = ret = m->m_next; 11680963c8e4SLawrence Stewart KASSERT(ret->m_len > 0, 11690963c8e4SLawrence Stewart ("mbuf %p in sockbuf %p chain has no valid data", ret, sb)); 11700963c8e4SLawrence Stewart } 11714e023759SAndre Oppermann 11724e023759SAndre Oppermann /* Advance by len to be as close as possible for the next transmit. */ 11734e023759SAndre Oppermann for (off = off - sb->sb_sndptroff + len - 1; 11746f4745d5SBjoern A. Zeeb off > 0 && m != NULL && off >= m->m_len; 11754e023759SAndre Oppermann m = m->m_next) { 11764e023759SAndre Oppermann sb->sb_sndptroff += m->m_len; 11774e023759SAndre Oppermann off -= m->m_len; 11784e023759SAndre Oppermann } 11796f4745d5SBjoern A. Zeeb if (off > 0 && m == NULL) 11806f4745d5SBjoern A. Zeeb panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret); 11814e023759SAndre Oppermann sb->sb_sndptr = m; 11824e023759SAndre Oppermann 11834e023759SAndre Oppermann return (ret); 11844e023759SAndre Oppermann } 11854e023759SAndre Oppermann 1186a34b7046SRobert Watson /* 11879fd573c3SHans Petter Selasky * Return the first mbuf and the mbuf data offset for the provided 11889fd573c3SHans Petter Selasky * send offset without changing the "sb_sndptroff" field. 11899fd573c3SHans Petter Selasky */ 11909fd573c3SHans Petter Selasky struct mbuf * 11919fd573c3SHans Petter Selasky sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) 11929fd573c3SHans Petter Selasky { 11939fd573c3SHans Petter Selasky struct mbuf *m; 11949fd573c3SHans Petter Selasky 11959fd573c3SHans Petter Selasky KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 11969fd573c3SHans Petter Selasky 11979fd573c3SHans Petter Selasky /* 11989fd573c3SHans Petter Selasky * If the "off" is below the stored offset, which happens on 11999fd573c3SHans Petter Selasky * retransmits, just use "sb_mb": 12009fd573c3SHans Petter Selasky */ 12019fd573c3SHans Petter Selasky if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 12029fd573c3SHans Petter Selasky m = sb->sb_mb; 12039fd573c3SHans Petter Selasky } else { 12049fd573c3SHans Petter Selasky m = sb->sb_sndptr; 12059fd573c3SHans Petter Selasky off -= sb->sb_sndptroff; 12069fd573c3SHans Petter Selasky } 12079fd573c3SHans Petter Selasky while (off > 0 && m != NULL) { 12089fd573c3SHans Petter Selasky if (off < m->m_len) 12099fd573c3SHans Petter Selasky break; 12109fd573c3SHans Petter Selasky off -= m->m_len; 12119fd573c3SHans Petter Selasky m = m->m_next; 12129fd573c3SHans Petter Selasky } 12139fd573c3SHans Petter Selasky *moff = off; 12149fd573c3SHans Petter Selasky return (m); 12159fd573c3SHans Petter Selasky } 12169fd573c3SHans Petter Selasky 12179fd573c3SHans Petter Selasky /* 1218050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1219050ac265SRobert Watson * front. 1220df8bae1dSRodney W. Grimes */ 122126f9a767SRodney W. Grimes void 1222050ac265SRobert Watson sbdroprecord_locked(struct sockbuf *sb) 1223df8bae1dSRodney W. Grimes { 1224050ac265SRobert Watson struct mbuf *m; 1225df8bae1dSRodney W. Grimes 1226a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1227a34b7046SRobert Watson 1228df8bae1dSRodney W. Grimes m = sb->sb_mb; 1229df8bae1dSRodney W. Grimes if (m) { 1230df8bae1dSRodney W. Grimes sb->sb_mb = m->m_nextpkt; 1231df8bae1dSRodney W. Grimes do { 1232df8bae1dSRodney W. Grimes sbfree(sb, m); 1233ecde8f7cSMatthew Dillon m = m_free(m); 1234797f2d22SPoul-Henning Kamp } while (m); 1235df8bae1dSRodney W. Grimes } 1236395bb186SSam Leffler SB_EMPTY_FIXUP(sb); 1237df8bae1dSRodney W. Grimes } 12381e4ad9ceSGarrett Wollman 123982c23ebaSBill Fenner /* 1240050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1241050ac265SRobert Watson * front. 1242a34b7046SRobert Watson */ 1243a34b7046SRobert Watson void 1244050ac265SRobert Watson sbdroprecord(struct sockbuf *sb) 1245a34b7046SRobert Watson { 1246a34b7046SRobert Watson 1247a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1248a34b7046SRobert Watson sbdroprecord_locked(sb); 1249a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1250a34b7046SRobert Watson } 1251a34b7046SRobert Watson 125220d9e5e8SRobert Watson /* 12538c799760SRobert Watson * Create a "control" mbuf containing the specified data with the specified 12548c799760SRobert Watson * type for presentation on a socket buffer. 125520d9e5e8SRobert Watson */ 125620d9e5e8SRobert Watson struct mbuf * 1257d19e16a7SRobert Watson sbcreatecontrol(caddr_t p, int size, int type, int level) 125820d9e5e8SRobert Watson { 1259d19e16a7SRobert Watson struct cmsghdr *cp; 126020d9e5e8SRobert Watson struct mbuf *m; 126120d9e5e8SRobert Watson 126220d9e5e8SRobert Watson if (CMSG_SPACE((u_int)size) > MCLBYTES) 126320d9e5e8SRobert Watson return ((struct mbuf *) NULL); 126420d9e5e8SRobert Watson if (CMSG_SPACE((u_int)size) > MLEN) 1265eb1b1807SGleb Smirnoff m = m_getcl(M_NOWAIT, MT_CONTROL, 0); 126620d9e5e8SRobert Watson else 1267eb1b1807SGleb Smirnoff m = m_get(M_NOWAIT, MT_CONTROL); 126820d9e5e8SRobert Watson if (m == NULL) 126920d9e5e8SRobert Watson return ((struct mbuf *) NULL); 127020d9e5e8SRobert Watson cp = mtod(m, struct cmsghdr *); 127120d9e5e8SRobert Watson m->m_len = 0; 127220d9e5e8SRobert Watson KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), 127320d9e5e8SRobert Watson ("sbcreatecontrol: short mbuf")); 12742827952eSXin LI /* 12752827952eSXin LI * Don't leave the padding between the msg header and the 12762827952eSXin LI * cmsg data and the padding after the cmsg data un-initialized. 12772827952eSXin LI */ 12782827952eSXin LI bzero(cp, CMSG_SPACE((u_int)size)); 127920d9e5e8SRobert Watson if (p != NULL) 128020d9e5e8SRobert Watson (void)memcpy(CMSG_DATA(cp), p, size); 128120d9e5e8SRobert Watson m->m_len = CMSG_SPACE(size); 128220d9e5e8SRobert Watson cp->cmsg_len = CMSG_LEN(size); 128320d9e5e8SRobert Watson cp->cmsg_level = level; 128420d9e5e8SRobert Watson cp->cmsg_type = type; 128520d9e5e8SRobert Watson return (m); 128620d9e5e8SRobert Watson } 128720d9e5e8SRobert Watson 128820d9e5e8SRobert Watson /* 12898c799760SRobert Watson * This does the same for socket buffers that sotoxsocket does for sockets: 12908c799760SRobert Watson * generate an user-format data structure describing the socket buffer. Note 12918c799760SRobert Watson * that the xsockbuf structure, since it is always embedded in a socket, does 12928c799760SRobert Watson * not include a self pointer nor a length. We make this entry point public 12938c799760SRobert Watson * in case some other mechanism needs it. 129420d9e5e8SRobert Watson */ 129520d9e5e8SRobert Watson void 129620d9e5e8SRobert Watson sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) 129720d9e5e8SRobert Watson { 1298d19e16a7SRobert Watson 12990f9d0a73SGleb Smirnoff xsb->sb_cc = sb->sb_ccc; 130020d9e5e8SRobert Watson xsb->sb_hiwat = sb->sb_hiwat; 130120d9e5e8SRobert Watson xsb->sb_mbcnt = sb->sb_mbcnt; 130249f287f8SGeorge V. Neville-Neil xsb->sb_mcnt = sb->sb_mcnt; 130349f287f8SGeorge V. Neville-Neil xsb->sb_ccnt = sb->sb_ccnt; 130420d9e5e8SRobert Watson xsb->sb_mbmax = sb->sb_mbmax; 130520d9e5e8SRobert Watson xsb->sb_lowat = sb->sb_lowat; 130620d9e5e8SRobert Watson xsb->sb_flags = sb->sb_flags; 130720d9e5e8SRobert Watson xsb->sb_timeo = sb->sb_timeo; 130820d9e5e8SRobert Watson } 130920d9e5e8SRobert Watson 1310639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 1311639acc13SGarrett Wollman static int dummy; 1312639acc13SGarrett Wollman SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); 13131b978d45SHartmut Brandt SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW, 13141b978d45SHartmut Brandt &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size"); 13151b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 13163eb9ab52SEitan Adler &sb_efficiency, 0, "Socket buffer size waste factor"); 1317