xref: /freebsd/sys/kern/uipc_sockbuf.c (revision 7029da5c36f2d3cf6bb6c81bf551229f416399e8)
19454b2d8SWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1990, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1569a28758SEd Maste  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
34677b542eSDavid E. O'Brien #include <sys/cdefs.h>
35677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
36677b542eSDavid E. O'Brien 
37b2e60773SJohn Baldwin #include "opt_kern_tls.h"
385b86eac4SJesper Skriver #include "opt_param.h"
39335654d7SRobert Watson 
40df8bae1dSRodney W. Grimes #include <sys/param.h>
41960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */
42ff5c09daSGarrett Wollman #include <sys/kernel.h>
43b2e60773SJohn Baldwin #include <sys/ktls.h>
44fb919e4dSMark Murray #include <sys/lock.h>
458ec07310SGleb Smirnoff #include <sys/malloc.h>
46df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
47960ed29cSSeigo Tanimura #include <sys/mutex.h>
48fb919e4dSMark Murray #include <sys/proc.h>
49df8bae1dSRodney W. Grimes #include <sys/protosw.h>
502f9a2132SBrian Feldman #include <sys/resourcevar.h>
51960ed29cSSeigo Tanimura #include <sys/signalvar.h>
52df8bae1dSRodney W. Grimes #include <sys/socket.h>
53df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
547abab911SRobert Watson #include <sys/sx.h>
55ff5c09daSGarrett Wollman #include <sys/sysctl.h>
5626f9a767SRodney W. Grimes 
57f14cce87SRobert Watson /*
58f14cce87SRobert Watson  * Function pointer set by the AIO routines so that the socket buffer code
59f14cce87SRobert Watson  * can call back into the AIO module if it is loaded.
60f14cce87SRobert Watson  */
6121d56e9cSAlfred Perlstein void	(*aio_swake)(struct socket *, struct sockbuf *);
6221d56e9cSAlfred Perlstein 
63df8bae1dSRodney W. Grimes /*
64f14cce87SRobert Watson  * Primitive routines for operating on socket buffers
65df8bae1dSRodney W. Grimes  */
66df8bae1dSRodney W. Grimes 
6779cb7eb4SDavid Greenman u_long	sb_max = SB_MAX;
6858d14daeSMohan Srinivasan u_long sb_max_adj =
69b233773bSBjoern A. Zeeb        (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
70df8bae1dSRodney W. Grimes 
714b29bc4fSGarrett Wollman static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
724b29bc4fSGarrett Wollman 
731d2df300SGleb Smirnoff static struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
74050ac265SRobert Watson static void	sbflush_internal(struct sockbuf *sb);
75eaa6dfbcSRobert Watson 
76df8bae1dSRodney W. Grimes /*
77829fae90SGleb Smirnoff  * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
78829fae90SGleb Smirnoff  */
79829fae90SGleb Smirnoff static void
80829fae90SGleb Smirnoff sbm_clrprotoflags(struct mbuf *m, int flags)
81829fae90SGleb Smirnoff {
82829fae90SGleb Smirnoff 	int mask;
83829fae90SGleb Smirnoff 
84829fae90SGleb Smirnoff 	mask = ~M_PROTOFLAGS;
85829fae90SGleb Smirnoff 	if (flags & PRUS_NOTREADY)
86829fae90SGleb Smirnoff 		mask |= M_NOTREADY;
87829fae90SGleb Smirnoff 	while (m) {
88829fae90SGleb Smirnoff 		m->m_flags &= mask;
89829fae90SGleb Smirnoff 		m = m->m_next;
90829fae90SGleb Smirnoff 	}
91829fae90SGleb Smirnoff }
92829fae90SGleb Smirnoff 
93829fae90SGleb Smirnoff /*
943807631bSJohn Baldwin  * Compress M_NOTREADY mbufs after they have been readied by sbready().
953807631bSJohn Baldwin  *
963807631bSJohn Baldwin  * sbcompress() skips M_NOTREADY mbufs since the data is not available to
973807631bSJohn Baldwin  * be copied at the time of sbcompress().  This function combines small
983807631bSJohn Baldwin  * mbufs similar to sbcompress() once mbufs are ready.  'm0' is the first
993807631bSJohn Baldwin  * mbuf sbready() marked ready, and 'end' is the first mbuf still not
1003807631bSJohn Baldwin  * ready.
1013807631bSJohn Baldwin  */
1023807631bSJohn Baldwin static void
1033807631bSJohn Baldwin sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
1043807631bSJohn Baldwin {
1053807631bSJohn Baldwin 	struct mbuf *m, *n;
1063807631bSJohn Baldwin 	int ext_size;
1073807631bSJohn Baldwin 
1083807631bSJohn Baldwin 	SOCKBUF_LOCK_ASSERT(sb);
1093807631bSJohn Baldwin 
1103807631bSJohn Baldwin 	if ((sb->sb_flags & SB_NOCOALESCE) != 0)
1113807631bSJohn Baldwin 		return;
1123807631bSJohn Baldwin 
1133807631bSJohn Baldwin 	for (m = m0; m != end; m = m->m_next) {
1143807631bSJohn Baldwin 		MPASS((m->m_flags & M_NOTREADY) == 0);
1153807631bSJohn Baldwin 
1163807631bSJohn Baldwin 		/* Compress small unmapped mbufs into plain mbufs. */
117b2e60773SJohn Baldwin 		if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN &&
118b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m)) {
1193807631bSJohn Baldwin 			MPASS(m->m_flags & M_EXT);
1203807631bSJohn Baldwin 			ext_size = m->m_ext.ext_size;
1213807631bSJohn Baldwin 			if (mb_unmapped_compress(m) == 0) {
1223807631bSJohn Baldwin 				sb->sb_mbcnt -= ext_size;
1233807631bSJohn Baldwin 				sb->sb_ccnt -= 1;
1243807631bSJohn Baldwin 			}
1253807631bSJohn Baldwin 		}
1263807631bSJohn Baldwin 
1273807631bSJohn Baldwin 		/*
1283807631bSJohn Baldwin 		 * NB: In sbcompress(), 'n' is the last mbuf in the
1293807631bSJohn Baldwin 		 * socket buffer and 'm' is the new mbuf being copied
1303807631bSJohn Baldwin 		 * into the trailing space of 'n'.  Here, the roles
1313807631bSJohn Baldwin 		 * are reversed and 'n' is the next mbuf after 'm'
1323807631bSJohn Baldwin 		 * that is being copied into the trailing space of
1333807631bSJohn Baldwin 		 * 'm'.
1343807631bSJohn Baldwin 		 */
1353807631bSJohn Baldwin 		n = m->m_next;
1363807631bSJohn Baldwin 		while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
1373807631bSJohn Baldwin 		    M_WRITABLE(m) &&
1383807631bSJohn Baldwin 		    (m->m_flags & M_NOMAP) == 0 &&
139b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(n) &&
140b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m) &&
1413807631bSJohn Baldwin 		    n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1423807631bSJohn Baldwin 		    n->m_len <= M_TRAILINGSPACE(m) &&
1433807631bSJohn Baldwin 		    m->m_type == n->m_type) {
1443807631bSJohn Baldwin 			KASSERT(sb->sb_lastrecord != n,
1453807631bSJohn Baldwin 		    ("%s: merging start of record (%p) into previous mbuf (%p)",
1463807631bSJohn Baldwin 			    __func__, n, m));
1473807631bSJohn Baldwin 			m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
1483807631bSJohn Baldwin 			m->m_len += n->m_len;
1493807631bSJohn Baldwin 			m->m_next = n->m_next;
1503807631bSJohn Baldwin 			m->m_flags |= n->m_flags & M_EOR;
1513807631bSJohn Baldwin 			if (sb->sb_mbtail == n)
1523807631bSJohn Baldwin 				sb->sb_mbtail = m;
1533807631bSJohn Baldwin 
1543807631bSJohn Baldwin 			sb->sb_mbcnt -= MSIZE;
1553807631bSJohn Baldwin 			sb->sb_mcnt -= 1;
1563807631bSJohn Baldwin 			if (n->m_flags & M_EXT) {
1573807631bSJohn Baldwin 				sb->sb_mbcnt -= n->m_ext.ext_size;
1583807631bSJohn Baldwin 				sb->sb_ccnt -= 1;
1593807631bSJohn Baldwin 			}
1603807631bSJohn Baldwin 			m_free(n);
1613807631bSJohn Baldwin 			n = m->m_next;
1623807631bSJohn Baldwin 		}
1633807631bSJohn Baldwin 	}
1643807631bSJohn Baldwin 	SBLASTRECORDCHK(sb);
1653807631bSJohn Baldwin 	SBLASTMBUFCHK(sb);
1663807631bSJohn Baldwin }
1673807631bSJohn Baldwin 
1683807631bSJohn Baldwin /*
16982334850SJohn Baldwin  * Mark ready "count" units of I/O starting with "m".  Most mbufs
17082334850SJohn Baldwin  * count as a single unit of I/O except for EXT_PGS-backed mbufs which
17182334850SJohn Baldwin  * can be backed by multiple pages.
1720f9d0a73SGleb Smirnoff  */
1730f9d0a73SGleb Smirnoff int
17482334850SJohn Baldwin sbready(struct sockbuf *sb, struct mbuf *m0, int count)
1750f9d0a73SGleb Smirnoff {
17682334850SJohn Baldwin 	struct mbuf *m;
1770f9d0a73SGleb Smirnoff 	u_int blocker;
1780f9d0a73SGleb Smirnoff 
1790f9d0a73SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
1800f9d0a73SGleb Smirnoff 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
18182334850SJohn Baldwin 	KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
1820f9d0a73SGleb Smirnoff 
18382334850SJohn Baldwin 	m = m0;
1840f9d0a73SGleb Smirnoff 	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
1850f9d0a73SGleb Smirnoff 
18682334850SJohn Baldwin 	while (count > 0) {
1870f9d0a73SGleb Smirnoff 		KASSERT(m->m_flags & M_NOTREADY,
1880f9d0a73SGleb Smirnoff 		    ("%s: m %p !M_NOTREADY", __func__, m));
18982334850SJohn Baldwin 		if ((m->m_flags & M_EXT) != 0 &&
19082334850SJohn Baldwin 		    m->m_ext.ext_type == EXT_PGS) {
19182334850SJohn Baldwin 			if (count < m->m_ext.ext_pgs->nrdy) {
19282334850SJohn Baldwin 				m->m_ext.ext_pgs->nrdy -= count;
19382334850SJohn Baldwin 				count = 0;
19482334850SJohn Baldwin 				break;
19582334850SJohn Baldwin 			}
19682334850SJohn Baldwin 			count -= m->m_ext.ext_pgs->nrdy;
19782334850SJohn Baldwin 			m->m_ext.ext_pgs->nrdy = 0;
19882334850SJohn Baldwin 		} else
19982334850SJohn Baldwin 			count--;
20082334850SJohn Baldwin 
2010f9d0a73SGleb Smirnoff 		m->m_flags &= ~(M_NOTREADY | blocker);
2020f9d0a73SGleb Smirnoff 		if (blocker)
2030f9d0a73SGleb Smirnoff 			sb->sb_acc += m->m_len;
20482334850SJohn Baldwin 		m = m->m_next;
2050f9d0a73SGleb Smirnoff 	}
2060f9d0a73SGleb Smirnoff 
20782334850SJohn Baldwin 	/*
20882334850SJohn Baldwin 	 * If the first mbuf is still not fully ready because only
20982334850SJohn Baldwin 	 * some of its backing pages were readied, no further progress
21082334850SJohn Baldwin 	 * can be made.
21182334850SJohn Baldwin 	 */
21282334850SJohn Baldwin 	if (m0 == m) {
21382334850SJohn Baldwin 		MPASS(m->m_flags & M_NOTREADY);
2140f9d0a73SGleb Smirnoff 		return (EINPROGRESS);
21582334850SJohn Baldwin 	}
21682334850SJohn Baldwin 
21782334850SJohn Baldwin 	if (!blocker) {
2183807631bSJohn Baldwin 		sbready_compress(sb, m0, m);
21982334850SJohn Baldwin 		return (EINPROGRESS);
22082334850SJohn Baldwin 	}
2210f9d0a73SGleb Smirnoff 
2220f9d0a73SGleb Smirnoff 	/* This one was blocking all the queue. */
2230f9d0a73SGleb Smirnoff 	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
2240f9d0a73SGleb Smirnoff 		KASSERT(m->m_flags & M_BLOCKED,
2250f9d0a73SGleb Smirnoff 		    ("%s: m %p !M_BLOCKED", __func__, m));
2260f9d0a73SGleb Smirnoff 		m->m_flags &= ~M_BLOCKED;
2270f9d0a73SGleb Smirnoff 		sb->sb_acc += m->m_len;
2280f9d0a73SGleb Smirnoff 	}
2290f9d0a73SGleb Smirnoff 
2300f9d0a73SGleb Smirnoff 	sb->sb_fnrdy = m;
2313807631bSJohn Baldwin 	sbready_compress(sb, m0, m);
2320f9d0a73SGleb Smirnoff 
2330f9d0a73SGleb Smirnoff 	return (0);
2340f9d0a73SGleb Smirnoff }
2350f9d0a73SGleb Smirnoff 
2360f9d0a73SGleb Smirnoff /*
2378967b220SGleb Smirnoff  * Adjust sockbuf state reflecting allocation of m.
2388967b220SGleb Smirnoff  */
2398967b220SGleb Smirnoff void
2408967b220SGleb Smirnoff sballoc(struct sockbuf *sb, struct mbuf *m)
2418967b220SGleb Smirnoff {
2428967b220SGleb Smirnoff 
2438967b220SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
2448967b220SGleb Smirnoff 
2450f9d0a73SGleb Smirnoff 	sb->sb_ccc += m->m_len;
2460f9d0a73SGleb Smirnoff 
2470f9d0a73SGleb Smirnoff 	if (sb->sb_fnrdy == NULL) {
2480f9d0a73SGleb Smirnoff 		if (m->m_flags & M_NOTREADY)
2490f9d0a73SGleb Smirnoff 			sb->sb_fnrdy = m;
2500f9d0a73SGleb Smirnoff 		else
2510f9d0a73SGleb Smirnoff 			sb->sb_acc += m->m_len;
2520f9d0a73SGleb Smirnoff 	} else
2530f9d0a73SGleb Smirnoff 		m->m_flags |= M_BLOCKED;
2548967b220SGleb Smirnoff 
2558967b220SGleb Smirnoff 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
2568967b220SGleb Smirnoff 		sb->sb_ctl += m->m_len;
2578967b220SGleb Smirnoff 
2588967b220SGleb Smirnoff 	sb->sb_mbcnt += MSIZE;
2598967b220SGleb Smirnoff 	sb->sb_mcnt += 1;
2608967b220SGleb Smirnoff 
2618967b220SGleb Smirnoff 	if (m->m_flags & M_EXT) {
2628967b220SGleb Smirnoff 		sb->sb_mbcnt += m->m_ext.ext_size;
2638967b220SGleb Smirnoff 		sb->sb_ccnt += 1;
2648967b220SGleb Smirnoff 	}
2658967b220SGleb Smirnoff }
2668967b220SGleb Smirnoff 
2678967b220SGleb Smirnoff /*
2688967b220SGleb Smirnoff  * Adjust sockbuf state reflecting freeing of m.
2698967b220SGleb Smirnoff  */
2708967b220SGleb Smirnoff void
2718967b220SGleb Smirnoff sbfree(struct sockbuf *sb, struct mbuf *m)
2728967b220SGleb Smirnoff {
2738967b220SGleb Smirnoff 
2748967b220SGleb Smirnoff #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
2758967b220SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
2768967b220SGleb Smirnoff #endif
2778967b220SGleb Smirnoff 
2780f9d0a73SGleb Smirnoff 	sb->sb_ccc -= m->m_len;
2790f9d0a73SGleb Smirnoff 
2800f9d0a73SGleb Smirnoff 	if (!(m->m_flags & M_NOTAVAIL))
2810f9d0a73SGleb Smirnoff 		sb->sb_acc -= m->m_len;
2820f9d0a73SGleb Smirnoff 
2830f9d0a73SGleb Smirnoff 	if (m == sb->sb_fnrdy) {
2840f9d0a73SGleb Smirnoff 		struct mbuf *n;
2850f9d0a73SGleb Smirnoff 
2860f9d0a73SGleb Smirnoff 		KASSERT(m->m_flags & M_NOTREADY,
2870f9d0a73SGleb Smirnoff 		    ("%s: m %p !M_NOTREADY", __func__, m));
2880f9d0a73SGleb Smirnoff 
2890f9d0a73SGleb Smirnoff 		n = m->m_next;
2900f9d0a73SGleb Smirnoff 		while (n != NULL && !(n->m_flags & M_NOTREADY)) {
2910f9d0a73SGleb Smirnoff 			n->m_flags &= ~M_BLOCKED;
2920f9d0a73SGleb Smirnoff 			sb->sb_acc += n->m_len;
2930f9d0a73SGleb Smirnoff 			n = n->m_next;
2940f9d0a73SGleb Smirnoff 		}
2950f9d0a73SGleb Smirnoff 		sb->sb_fnrdy = n;
2960f9d0a73SGleb Smirnoff 	}
2978967b220SGleb Smirnoff 
2988967b220SGleb Smirnoff 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
2998967b220SGleb Smirnoff 		sb->sb_ctl -= m->m_len;
3008967b220SGleb Smirnoff 
3018967b220SGleb Smirnoff 	sb->sb_mbcnt -= MSIZE;
3028967b220SGleb Smirnoff 	sb->sb_mcnt -= 1;
3038967b220SGleb Smirnoff 	if (m->m_flags & M_EXT) {
3048967b220SGleb Smirnoff 		sb->sb_mbcnt -= m->m_ext.ext_size;
3058967b220SGleb Smirnoff 		sb->sb_ccnt -= 1;
3068967b220SGleb Smirnoff 	}
3078967b220SGleb Smirnoff 
3088967b220SGleb Smirnoff 	if (sb->sb_sndptr == m) {
3098967b220SGleb Smirnoff 		sb->sb_sndptr = NULL;
3108967b220SGleb Smirnoff 		sb->sb_sndptroff = 0;
3118967b220SGleb Smirnoff 	}
3128967b220SGleb Smirnoff 	if (sb->sb_sndptroff != 0)
3138967b220SGleb Smirnoff 		sb->sb_sndptroff -= m->m_len;
3148967b220SGleb Smirnoff }
3158967b220SGleb Smirnoff 
3168967b220SGleb Smirnoff /*
317050ac265SRobert Watson  * Socantsendmore indicates that no more data will be sent on the socket; it
318050ac265SRobert Watson  * would normally be applied to a socket when the user informs the system
319050ac265SRobert Watson  * that no more data is to be sent, by the protocol code (in case
320050ac265SRobert Watson  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
321050ac265SRobert Watson  * received, and will normally be applied to the socket by a protocol when it
322050ac265SRobert Watson  * detects that the peer will send no more data.  Data queued for reading in
323050ac265SRobert Watson  * the socket may yet be read.
324df8bae1dSRodney W. Grimes  */
325a34b7046SRobert Watson void
326050ac265SRobert Watson socantsendmore_locked(struct socket *so)
327a34b7046SRobert Watson {
328a34b7046SRobert Watson 
329a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
330a34b7046SRobert Watson 
331a34b7046SRobert Watson 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
332a34b7046SRobert Watson 	sowwakeup_locked(so);
333a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
334a34b7046SRobert Watson }
335df8bae1dSRodney W. Grimes 
33626f9a767SRodney W. Grimes void
337050ac265SRobert Watson socantsendmore(struct socket *so)
338df8bae1dSRodney W. Grimes {
339df8bae1dSRodney W. Grimes 
340a34b7046SRobert Watson 	SOCKBUF_LOCK(&so->so_snd);
341a34b7046SRobert Watson 	socantsendmore_locked(so);
342a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
343a34b7046SRobert Watson }
344a34b7046SRobert Watson 
345a34b7046SRobert Watson void
346050ac265SRobert Watson socantrcvmore_locked(struct socket *so)
347a34b7046SRobert Watson {
348a34b7046SRobert Watson 
349a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
350a34b7046SRobert Watson 
351a34b7046SRobert Watson 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
352a34b7046SRobert Watson 	sorwakeup_locked(so);
353a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
354df8bae1dSRodney W. Grimes }
355df8bae1dSRodney W. Grimes 
35626f9a767SRodney W. Grimes void
357050ac265SRobert Watson socantrcvmore(struct socket *so)
358df8bae1dSRodney W. Grimes {
359df8bae1dSRodney W. Grimes 
360a34b7046SRobert Watson 	SOCKBUF_LOCK(&so->so_rcv);
361a34b7046SRobert Watson 	socantrcvmore_locked(so);
362a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
363df8bae1dSRodney W. Grimes }
364df8bae1dSRodney W. Grimes 
365df8bae1dSRodney W. Grimes /*
366df8bae1dSRodney W. Grimes  * Wait for data to arrive at/drain from a socket buffer.
367df8bae1dSRodney W. Grimes  */
36826f9a767SRodney W. Grimes int
369050ac265SRobert Watson sbwait(struct sockbuf *sb)
370df8bae1dSRodney W. Grimes {
371df8bae1dSRodney W. Grimes 
37231f555a1SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
37331f555a1SRobert Watson 
374df8bae1dSRodney W. Grimes 	sb->sb_flags |= SB_WAIT;
3750f9d0a73SGleb Smirnoff 	return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx,
37647daf5d5SBruce Evans 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
3777729cbf1SDavide Italiano 	    sb->sb_timeo, 0, 0));
378df8bae1dSRodney W. Grimes }
379df8bae1dSRodney W. Grimes 
38026f9a767SRodney W. Grimes int
3817abab911SRobert Watson sblock(struct sockbuf *sb, int flags)
382df8bae1dSRodney W. Grimes {
383df8bae1dSRodney W. Grimes 
384265de5bbSRobert Watson 	KASSERT((flags & SBL_VALID) == flags,
385265de5bbSRobert Watson 	    ("sblock: flags invalid (0x%x)", flags));
386265de5bbSRobert Watson 
387265de5bbSRobert Watson 	if (flags & SBL_WAIT) {
388265de5bbSRobert Watson 		if ((sb->sb_flags & SB_NOINTR) ||
389265de5bbSRobert Watson 		    (flags & SBL_NOINTR)) {
3907abab911SRobert Watson 			sx_xlock(&sb->sb_sx);
391df8bae1dSRodney W. Grimes 			return (0);
392049c3b6cSRobert Watson 		}
393049c3b6cSRobert Watson 		return (sx_xlock_sig(&sb->sb_sx));
3947abab911SRobert Watson 	} else {
3957abab911SRobert Watson 		if (sx_try_xlock(&sb->sb_sx) == 0)
3967abab911SRobert Watson 			return (EWOULDBLOCK);
3977abab911SRobert Watson 		return (0);
3987abab911SRobert Watson 	}
3997abab911SRobert Watson }
4007abab911SRobert Watson 
4017abab911SRobert Watson void
4027abab911SRobert Watson sbunlock(struct sockbuf *sb)
4037abab911SRobert Watson {
4047abab911SRobert Watson 
4057abab911SRobert Watson 	sx_xunlock(&sb->sb_sx);
406df8bae1dSRodney W. Grimes }
407df8bae1dSRodney W. Grimes 
408df8bae1dSRodney W. Grimes /*
409050ac265SRobert Watson  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
410050ac265SRobert Watson  * via SIGIO if the socket has the SS_ASYNC flag set.
411a34b7046SRobert Watson  *
412a34b7046SRobert Watson  * Called with the socket buffer lock held; will release the lock by the end
413a34b7046SRobert Watson  * of the function.  This allows the caller to acquire the socket buffer lock
414a34b7046SRobert Watson  * while testing for the need for various sorts of wakeup and hold it through
415a34b7046SRobert Watson  * to the point where it's no longer required.  We currently hold the lock
416a34b7046SRobert Watson  * through calls out to other subsystems (with the exception of kqueue), and
417a34b7046SRobert Watson  * then release it to avoid lock order issues.  It's not clear that's
418a34b7046SRobert Watson  * correct.
419df8bae1dSRodney W. Grimes  */
42026f9a767SRodney W. Grimes void
421050ac265SRobert Watson sowakeup(struct socket *so, struct sockbuf *sb)
422df8bae1dSRodney W. Grimes {
42374fb0ba7SJohn Baldwin 	int ret;
424d48d4b25SSeigo Tanimura 
425a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
426a34b7046SRobert Watson 
427779f106aSGleb Smirnoff 	selwakeuppri(sb->sb_sel, PSOCK);
428779f106aSGleb Smirnoff 	if (!SEL_WAITING(sb->sb_sel))
429df8bae1dSRodney W. Grimes 		sb->sb_flags &= ~SB_SEL;
430df8bae1dSRodney W. Grimes 	if (sb->sb_flags & SB_WAIT) {
431df8bae1dSRodney W. Grimes 		sb->sb_flags &= ~SB_WAIT;
4320f9d0a73SGleb Smirnoff 		wakeup(&sb->sb_acc);
433df8bae1dSRodney W. Grimes 	}
434779f106aSGleb Smirnoff 	KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
43598c92369SNavdeep Parhar 	if (sb->sb_upcall != NULL) {
436eb1b1807SGleb Smirnoff 		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
43774fb0ba7SJohn Baldwin 		if (ret == SU_ISCONNECTED) {
43874fb0ba7SJohn Baldwin 			KASSERT(sb == &so->so_rcv,
43974fb0ba7SJohn Baldwin 			    ("SO_SND upcall returned SU_ISCONNECTED"));
44074fb0ba7SJohn Baldwin 			soupcall_clear(so, SO_RCV);
44174fb0ba7SJohn Baldwin 		}
44274fb0ba7SJohn Baldwin 	} else
44374fb0ba7SJohn Baldwin 		ret = SU_OK;
4444cc20ab1SSeigo Tanimura 	if (sb->sb_flags & SB_AIO)
445f3215338SJohn Baldwin 		sowakeup_aio(so, sb);
44674fb0ba7SJohn Baldwin 	SOCKBUF_UNLOCK(sb);
447555b3e2fSGleb Smirnoff 	if (ret == SU_ISCONNECTED)
44874fb0ba7SJohn Baldwin 		soisconnected(so);
44974fb0ba7SJohn Baldwin 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
45074fb0ba7SJohn Baldwin 		pgsigio(&so->so_sigio, SIGIO, 0);
451a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
452df8bae1dSRodney W. Grimes }
453df8bae1dSRodney W. Grimes 
454df8bae1dSRodney W. Grimes /*
455df8bae1dSRodney W. Grimes  * Socket buffer (struct sockbuf) utility routines.
456df8bae1dSRodney W. Grimes  *
457050ac265SRobert Watson  * Each socket contains two socket buffers: one for sending data and one for
458050ac265SRobert Watson  * receiving data.  Each buffer contains a queue of mbufs, information about
459050ac265SRobert Watson  * the number of mbufs and amount of data in the queue, and other fields
460050ac265SRobert Watson  * allowing select() statements and notification on data availability to be
461050ac265SRobert Watson  * implemented.
462df8bae1dSRodney W. Grimes  *
463050ac265SRobert Watson  * Data stored in a socket buffer is maintained as a list of records.  Each
464050ac265SRobert Watson  * record is a list of mbufs chained together with the m_next field.  Records
465050ac265SRobert Watson  * are chained together with the m_nextpkt field. The upper level routine
466050ac265SRobert Watson  * soreceive() expects the following conventions to be observed when placing
467050ac265SRobert Watson  * information in the receive buffer:
468df8bae1dSRodney W. Grimes  *
469050ac265SRobert Watson  * 1. If the protocol requires each message be preceded by the sender's name,
470050ac265SRobert Watson  *    then a record containing that name must be present before any
471050ac265SRobert Watson  *    associated data (mbuf's must be of type MT_SONAME).
472050ac265SRobert Watson  * 2. If the protocol supports the exchange of ``access rights'' (really just
473050ac265SRobert Watson  *    additional data associated with the message), and there are ``rights''
474050ac265SRobert Watson  *    to be received, then a record containing this data should be present
475050ac265SRobert Watson  *    (mbuf's must be of type MT_RIGHTS).
476050ac265SRobert Watson  * 3. If a name or rights record exists, then it must be followed by a data
477050ac265SRobert Watson  *    record, perhaps of zero length.
478df8bae1dSRodney W. Grimes  *
479df8bae1dSRodney W. Grimes  * Before using a new socket structure it is first necessary to reserve
480df8bae1dSRodney W. Grimes  * buffer space to the socket, by calling sbreserve().  This should commit
481df8bae1dSRodney W. Grimes  * some of the available buffer space in the system buffer pool for the
482050ac265SRobert Watson  * socket (currently, it does nothing but enforce limits).  The space should
483050ac265SRobert Watson  * be released by calling sbrelease() when the socket is destroyed.
484df8bae1dSRodney W. Grimes  */
48526f9a767SRodney W. Grimes int
486050ac265SRobert Watson soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
487df8bae1dSRodney W. Grimes {
488b40ce416SJulian Elischer 	struct thread *td = curthread;
489df8bae1dSRodney W. Grimes 
4903f11a2f3SRobert Watson 	SOCKBUF_LOCK(&so->so_snd);
4919535efc0SRobert Watson 	SOCKBUF_LOCK(&so->so_rcv);
4923f11a2f3SRobert Watson 	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
4933f11a2f3SRobert Watson 		goto bad;
4943f11a2f3SRobert Watson 	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
4953f11a2f3SRobert Watson 		goto bad2;
496df8bae1dSRodney W. Grimes 	if (so->so_rcv.sb_lowat == 0)
497df8bae1dSRodney W. Grimes 		so->so_rcv.sb_lowat = 1;
498df8bae1dSRodney W. Grimes 	if (so->so_snd.sb_lowat == 0)
499df8bae1dSRodney W. Grimes 		so->so_snd.sb_lowat = MCLBYTES;
500df8bae1dSRodney W. Grimes 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
501df8bae1dSRodney W. Grimes 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
5023f11a2f3SRobert Watson 	SOCKBUF_UNLOCK(&so->so_rcv);
5039535efc0SRobert Watson 	SOCKBUF_UNLOCK(&so->so_snd);
504df8bae1dSRodney W. Grimes 	return (0);
505df8bae1dSRodney W. Grimes bad2:
5063f11a2f3SRobert Watson 	sbrelease_locked(&so->so_snd, so);
507df8bae1dSRodney W. Grimes bad:
5083f11a2f3SRobert Watson 	SOCKBUF_UNLOCK(&so->so_rcv);
5093f11a2f3SRobert Watson 	SOCKBUF_UNLOCK(&so->so_snd);
510df8bae1dSRodney W. Grimes 	return (ENOBUFS);
511df8bae1dSRodney W. Grimes }
512df8bae1dSRodney W. Grimes 
51379cb7eb4SDavid Greenman static int
51479cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
51579cb7eb4SDavid Greenman {
51679cb7eb4SDavid Greenman 	int error = 0;
51786a93d51SJohn Baldwin 	u_long tmp_sb_max = sb_max;
51879cb7eb4SDavid Greenman 
51986a93d51SJohn Baldwin 	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
52079cb7eb4SDavid Greenman 	if (error || !req->newptr)
52179cb7eb4SDavid Greenman 		return (error);
52286a93d51SJohn Baldwin 	if (tmp_sb_max < MSIZE + MCLBYTES)
52379cb7eb4SDavid Greenman 		return (EINVAL);
52486a93d51SJohn Baldwin 	sb_max = tmp_sb_max;
52579cb7eb4SDavid Greenman 	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
52679cb7eb4SDavid Greenman 	return (0);
52779cb7eb4SDavid Greenman }
52879cb7eb4SDavid Greenman 
529df8bae1dSRodney W. Grimes /*
530050ac265SRobert Watson  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
531050ac265SRobert Watson  * become limiting if buffering efficiency is near the normal case.
532df8bae1dSRodney W. Grimes  */
53326f9a767SRodney W. Grimes int
534050ac265SRobert Watson sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
535050ac265SRobert Watson     struct thread *td)
536df8bae1dSRodney W. Grimes {
53791d5354aSJohn Baldwin 	rlim_t sbsize_limit;
538ecf72308SBrian Feldman 
5393f11a2f3SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
5403f11a2f3SRobert Watson 
541ecf72308SBrian Feldman 	/*
5427978014dSRobert Watson 	 * When a thread is passed, we take into account the thread's socket
5437978014dSRobert Watson 	 * buffer size limit.  The caller will generally pass curthread, but
5447978014dSRobert Watson 	 * in the TCP input path, NULL will be passed to indicate that no
5457978014dSRobert Watson 	 * appropriate thread resource limits are available.  In that case,
5467978014dSRobert Watson 	 * we don't apply a process limit.
547ecf72308SBrian Feldman 	 */
54879cb7eb4SDavid Greenman 	if (cc > sb_max_adj)
549df8bae1dSRodney W. Grimes 		return (0);
55091d5354aSJohn Baldwin 	if (td != NULL) {
551f6f6d240SMateusz Guzik 		sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
55291d5354aSJohn Baldwin 	} else
55391d5354aSJohn Baldwin 		sbsize_limit = RLIM_INFINITY;
554f535380cSDon Lewis 	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
55591d5354aSJohn Baldwin 	    sbsize_limit))
556ecf72308SBrian Feldman 		return (0);
5574b29bc4fSGarrett Wollman 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
558df8bae1dSRodney W. Grimes 	if (sb->sb_lowat > sb->sb_hiwat)
559df8bae1dSRodney W. Grimes 		sb->sb_lowat = sb->sb_hiwat;
560df8bae1dSRodney W. Grimes 	return (1);
561df8bae1dSRodney W. Grimes }
562df8bae1dSRodney W. Grimes 
5633f11a2f3SRobert Watson int
56464290befSGleb Smirnoff sbsetopt(struct socket *so, int cmd, u_long cc)
5653f11a2f3SRobert Watson {
56664290befSGleb Smirnoff 	struct sockbuf *sb;
56764290befSGleb Smirnoff 	short *flags;
56864290befSGleb Smirnoff 	u_int *hiwat, *lowat;
5693f11a2f3SRobert Watson 	int error;
5703f11a2f3SRobert Watson 
571b2037136SMatt Macy 	sb = NULL;
57264290befSGleb Smirnoff 	SOCK_LOCK(so);
57364290befSGleb Smirnoff 	if (SOLISTENING(so)) {
57464290befSGleb Smirnoff 		switch (cmd) {
57564290befSGleb Smirnoff 			case SO_SNDLOWAT:
57664290befSGleb Smirnoff 			case SO_SNDBUF:
57764290befSGleb Smirnoff 				lowat = &so->sol_sbsnd_lowat;
57864290befSGleb Smirnoff 				hiwat = &so->sol_sbsnd_hiwat;
57964290befSGleb Smirnoff 				flags = &so->sol_sbsnd_flags;
58064290befSGleb Smirnoff 				break;
58164290befSGleb Smirnoff 			case SO_RCVLOWAT:
58264290befSGleb Smirnoff 			case SO_RCVBUF:
58364290befSGleb Smirnoff 				lowat = &so->sol_sbrcv_lowat;
58464290befSGleb Smirnoff 				hiwat = &so->sol_sbrcv_hiwat;
58564290befSGleb Smirnoff 				flags = &so->sol_sbrcv_flags;
58664290befSGleb Smirnoff 				break;
58764290befSGleb Smirnoff 		}
58864290befSGleb Smirnoff 	} else {
58964290befSGleb Smirnoff 		switch (cmd) {
59064290befSGleb Smirnoff 			case SO_SNDLOWAT:
59164290befSGleb Smirnoff 			case SO_SNDBUF:
59264290befSGleb Smirnoff 				sb = &so->so_snd;
59364290befSGleb Smirnoff 				break;
59464290befSGleb Smirnoff 			case SO_RCVLOWAT:
59564290befSGleb Smirnoff 			case SO_RCVBUF:
59664290befSGleb Smirnoff 				sb = &so->so_rcv;
59764290befSGleb Smirnoff 				break;
59864290befSGleb Smirnoff 		}
59964290befSGleb Smirnoff 		flags = &sb->sb_flags;
60064290befSGleb Smirnoff 		hiwat = &sb->sb_hiwat;
60164290befSGleb Smirnoff 		lowat = &sb->sb_lowat;
6023f11a2f3SRobert Watson 		SOCKBUF_LOCK(sb);
60364290befSGleb Smirnoff 	}
60464290befSGleb Smirnoff 
60564290befSGleb Smirnoff 	error = 0;
60664290befSGleb Smirnoff 	switch (cmd) {
60764290befSGleb Smirnoff 	case SO_SNDBUF:
60864290befSGleb Smirnoff 	case SO_RCVBUF:
60964290befSGleb Smirnoff 		if (SOLISTENING(so)) {
61064290befSGleb Smirnoff 			if (cc > sb_max_adj) {
61164290befSGleb Smirnoff 				error = ENOBUFS;
61264290befSGleb Smirnoff 				break;
61364290befSGleb Smirnoff 			}
61464290befSGleb Smirnoff 			*hiwat = cc;
61564290befSGleb Smirnoff 			if (*lowat > *hiwat)
61664290befSGleb Smirnoff 				*lowat = *hiwat;
61764290befSGleb Smirnoff 		} else {
61864290befSGleb Smirnoff 			if (!sbreserve_locked(sb, cc, so, curthread))
61964290befSGleb Smirnoff 				error = ENOBUFS;
62064290befSGleb Smirnoff 		}
62164290befSGleb Smirnoff 		if (error == 0)
62264290befSGleb Smirnoff 			*flags &= ~SB_AUTOSIZE;
62364290befSGleb Smirnoff 		break;
62464290befSGleb Smirnoff 	case SO_SNDLOWAT:
62564290befSGleb Smirnoff 	case SO_RCVLOWAT:
62664290befSGleb Smirnoff 		/*
62764290befSGleb Smirnoff 		 * Make sure the low-water is never greater than the
62864290befSGleb Smirnoff 		 * high-water.
62964290befSGleb Smirnoff 		 */
63064290befSGleb Smirnoff 		*lowat = (cc > *hiwat) ? *hiwat : cc;
63164290befSGleb Smirnoff 		break;
63264290befSGleb Smirnoff 	}
63364290befSGleb Smirnoff 
63464290befSGleb Smirnoff 	if (!SOLISTENING(so))
6353f11a2f3SRobert Watson 		SOCKBUF_UNLOCK(sb);
63664290befSGleb Smirnoff 	SOCK_UNLOCK(so);
6373f11a2f3SRobert Watson 	return (error);
6383f11a2f3SRobert Watson }
6393f11a2f3SRobert Watson 
640df8bae1dSRodney W. Grimes /*
641df8bae1dSRodney W. Grimes  * Free mbufs held by a socket, and reserved mbuf space.
642df8bae1dSRodney W. Grimes  */
6433f0bfcccSRobert Watson void
644050ac265SRobert Watson sbrelease_internal(struct sockbuf *sb, struct socket *so)
645eaa6dfbcSRobert Watson {
646eaa6dfbcSRobert Watson 
647eaa6dfbcSRobert Watson 	sbflush_internal(sb);
648eaa6dfbcSRobert Watson 	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
649eaa6dfbcSRobert Watson 	    RLIM_INFINITY);
650eaa6dfbcSRobert Watson 	sb->sb_mbmax = 0;
651eaa6dfbcSRobert Watson }
652eaa6dfbcSRobert Watson 
65326f9a767SRodney W. Grimes void
654050ac265SRobert Watson sbrelease_locked(struct sockbuf *sb, struct socket *so)
655df8bae1dSRodney W. Grimes {
656df8bae1dSRodney W. Grimes 
657a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
658a34b7046SRobert Watson 
659eaa6dfbcSRobert Watson 	sbrelease_internal(sb, so);
660df8bae1dSRodney W. Grimes }
661df8bae1dSRodney W. Grimes 
662a34b7046SRobert Watson void
663050ac265SRobert Watson sbrelease(struct sockbuf *sb, struct socket *so)
664a34b7046SRobert Watson {
665a34b7046SRobert Watson 
666a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
667a34b7046SRobert Watson 	sbrelease_locked(sb, so);
668a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
669a34b7046SRobert Watson }
670eaa6dfbcSRobert Watson 
671eaa6dfbcSRobert Watson void
672050ac265SRobert Watson sbdestroy(struct sockbuf *sb, struct socket *so)
673eaa6dfbcSRobert Watson {
674eaa6dfbcSRobert Watson 
675eaa6dfbcSRobert Watson 	sbrelease_internal(sb, so);
676b2e60773SJohn Baldwin #ifdef KERN_TLS
677b2e60773SJohn Baldwin 	if (sb->sb_tls_info != NULL)
678b2e60773SJohn Baldwin 		ktls_free(sb->sb_tls_info);
679b2e60773SJohn Baldwin 	sb->sb_tls_info = NULL;
680b2e60773SJohn Baldwin #endif
681eaa6dfbcSRobert Watson }
682eaa6dfbcSRobert Watson 
683df8bae1dSRodney W. Grimes /*
684050ac265SRobert Watson  * Routines to add and remove data from an mbuf queue.
685df8bae1dSRodney W. Grimes  *
686050ac265SRobert Watson  * The routines sbappend() or sbappendrecord() are normally called to append
687050ac265SRobert Watson  * new mbufs to a socket buffer, after checking that adequate space is
688050ac265SRobert Watson  * available, comparing the function sbspace() with the amount of data to be
689050ac265SRobert Watson  * added.  sbappendrecord() differs from sbappend() in that data supplied is
690050ac265SRobert Watson  * treated as the beginning of a new record.  To place a sender's address,
691050ac265SRobert Watson  * optional access rights, and data in a socket receive buffer,
692050ac265SRobert Watson  * sbappendaddr() should be used.  To place access rights and data in a
693050ac265SRobert Watson  * socket receive buffer, sbappendrights() should be used.  In either case,
694050ac265SRobert Watson  * the new data begins a new record.  Note that unlike sbappend() and
695050ac265SRobert Watson  * sbappendrecord(), these routines check for the caller that there will be
696050ac265SRobert Watson  * enough space to store the data.  Each fails if there is not enough space,
697050ac265SRobert Watson  * or if it cannot find mbufs to store additional information in.
698df8bae1dSRodney W. Grimes  *
699050ac265SRobert Watson  * Reliable protocols may use the socket send buffer to hold data awaiting
700050ac265SRobert Watson  * acknowledgement.  Data is normally copied from a socket send buffer in a
701050ac265SRobert Watson  * protocol with m_copy for output to a peer, and then removing the data from
702050ac265SRobert Watson  * the socket buffer with sbdrop() or sbdroprecord() when the data is
703050ac265SRobert Watson  * acknowledged by the peer.
704df8bae1dSRodney W. Grimes  */
705395bb186SSam Leffler #ifdef SOCKBUF_DEBUG
706395bb186SSam Leffler void
707395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line)
708395bb186SSam Leffler {
709395bb186SSam Leffler 	struct mbuf *m = sb->sb_mb;
710395bb186SSam Leffler 
711a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
712a34b7046SRobert Watson 
713395bb186SSam Leffler 	while (m && m->m_nextpkt)
714395bb186SSam Leffler 		m = m->m_nextpkt;
715395bb186SSam Leffler 
716395bb186SSam Leffler 	if (m != sb->sb_lastrecord) {
717395bb186SSam Leffler 		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
718395bb186SSam Leffler 			__func__, sb->sb_mb, sb->sb_lastrecord, m);
719395bb186SSam Leffler 		printf("packet chain:\n");
720395bb186SSam Leffler 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
721395bb186SSam Leffler 			printf("\t%p\n", m);
722395bb186SSam Leffler 		panic("%s from %s:%u", __func__, file, line);
723395bb186SSam Leffler 	}
724395bb186SSam Leffler }
725395bb186SSam Leffler 
726395bb186SSam Leffler void
727395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line)
728395bb186SSam Leffler {
729395bb186SSam Leffler 	struct mbuf *m = sb->sb_mb;
730395bb186SSam Leffler 	struct mbuf *n;
731395bb186SSam Leffler 
732a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
733a34b7046SRobert Watson 
734395bb186SSam Leffler 	while (m && m->m_nextpkt)
735395bb186SSam Leffler 		m = m->m_nextpkt;
736395bb186SSam Leffler 
737395bb186SSam Leffler 	while (m && m->m_next)
738395bb186SSam Leffler 		m = m->m_next;
739395bb186SSam Leffler 
740395bb186SSam Leffler 	if (m != sb->sb_mbtail) {
741395bb186SSam Leffler 		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
742395bb186SSam Leffler 			__func__, sb->sb_mb, sb->sb_mbtail, m);
743395bb186SSam Leffler 		printf("packet tree:\n");
744395bb186SSam Leffler 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
745395bb186SSam Leffler 			printf("\t");
746395bb186SSam Leffler 			for (n = m; n != NULL; n = n->m_next)
747395bb186SSam Leffler 				printf("%p ", n);
748395bb186SSam Leffler 			printf("\n");
749395bb186SSam Leffler 		}
750395bb186SSam Leffler 		panic("%s from %s:%u", __func__, file, line);
751395bb186SSam Leffler 	}
752395bb186SSam Leffler }
753395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */
754395bb186SSam Leffler 
755395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do {					\
756a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);					\
757395bb186SSam Leffler 	if ((sb)->sb_lastrecord != NULL)				\
758395bb186SSam Leffler 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
759395bb186SSam Leffler 	else								\
760395bb186SSam Leffler 		(sb)->sb_mb = (m0);					\
761395bb186SSam Leffler 	(sb)->sb_lastrecord = (m0);					\
762395bb186SSam Leffler } while (/*CONSTCOND*/0)
763395bb186SSam Leffler 
764df8bae1dSRodney W. Grimes /*
765050ac265SRobert Watson  * Append mbuf chain m to the last record in the socket buffer sb.  The
766050ac265SRobert Watson  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
767050ac265SRobert Watson  * are discarded and mbufs are compacted where possible.
768df8bae1dSRodney W. Grimes  */
76926f9a767SRodney W. Grimes void
770829fae90SGleb Smirnoff sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
771df8bae1dSRodney W. Grimes {
772050ac265SRobert Watson 	struct mbuf *n;
773df8bae1dSRodney W. Grimes 
774a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
775a34b7046SRobert Watson 
776b85f65afSPedro F. Giffuni 	if (m == NULL)
777df8bae1dSRodney W. Grimes 		return;
778829fae90SGleb Smirnoff 	sbm_clrprotoflags(m, flags);
779395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
780797f2d22SPoul-Henning Kamp 	n = sb->sb_mb;
781797f2d22SPoul-Henning Kamp 	if (n) {
782df8bae1dSRodney W. Grimes 		while (n->m_nextpkt)
783df8bae1dSRodney W. Grimes 			n = n->m_nextpkt;
784df8bae1dSRodney W. Grimes 		do {
785df8bae1dSRodney W. Grimes 			if (n->m_flags & M_EOR) {
786a34b7046SRobert Watson 				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
787df8bae1dSRodney W. Grimes 				return;
788df8bae1dSRodney W. Grimes 			}
789df8bae1dSRodney W. Grimes 		} while (n->m_next && (n = n->m_next));
790395bb186SSam Leffler 	} else {
791395bb186SSam Leffler 		/*
792395bb186SSam Leffler 		 * XXX Would like to simply use sb_mbtail here, but
793395bb186SSam Leffler 		 * XXX I need to verify that I won't miss an EOR that
794395bb186SSam Leffler 		 * XXX way.
795395bb186SSam Leffler 		 */
796395bb186SSam Leffler 		if ((n = sb->sb_lastrecord) != NULL) {
797395bb186SSam Leffler 			do {
798395bb186SSam Leffler 				if (n->m_flags & M_EOR) {
799a34b7046SRobert Watson 					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
800395bb186SSam Leffler 					return;
801395bb186SSam Leffler 				}
802395bb186SSam Leffler 			} while (n->m_next && (n = n->m_next));
803395bb186SSam Leffler 		} else {
804395bb186SSam Leffler 			/*
805395bb186SSam Leffler 			 * If this is the first record in the socket buffer,
806395bb186SSam Leffler 			 * it's also the last record.
807395bb186SSam Leffler 			 */
808395bb186SSam Leffler 			sb->sb_lastrecord = m;
809395bb186SSam Leffler 		}
810df8bae1dSRodney W. Grimes 	}
811df8bae1dSRodney W. Grimes 	sbcompress(sb, m, n);
812395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
813395bb186SSam Leffler }
814395bb186SSam Leffler 
815395bb186SSam Leffler /*
816050ac265SRobert Watson  * Append mbuf chain m to the last record in the socket buffer sb.  The
817050ac265SRobert Watson  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
818050ac265SRobert Watson  * are discarded and mbufs are compacted where possible.
819a34b7046SRobert Watson  */
820a34b7046SRobert Watson void
821829fae90SGleb Smirnoff sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
822a34b7046SRobert Watson {
823a34b7046SRobert Watson 
824a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
825829fae90SGleb Smirnoff 	sbappend_locked(sb, m, flags);
826a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
827a34b7046SRobert Watson }
828a34b7046SRobert Watson 
829a34b7046SRobert Watson /*
830050ac265SRobert Watson  * This version of sbappend() should only be used when the caller absolutely
831050ac265SRobert Watson  * knows that there will never be more than one record in the socket buffer,
832050ac265SRobert Watson  * that is, a stream protocol (such as TCP).
833395bb186SSam Leffler  */
834395bb186SSam Leffler void
835651e4e6aSGleb Smirnoff sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
836395bb186SSam Leffler {
837a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
838395bb186SSam Leffler 
839395bb186SSam Leffler 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
840395bb186SSam Leffler 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
841395bb186SSam Leffler 
842395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
843395bb186SSam Leffler 
844b2e60773SJohn Baldwin #ifdef KERN_TLS
845b2e60773SJohn Baldwin 	if (sb->sb_tls_info != NULL)
846b2e60773SJohn Baldwin 		ktls_seq(sb, m);
847b2e60773SJohn Baldwin #endif
848b2e60773SJohn Baldwin 
849844cacd1SGleb Smirnoff 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
850651e4e6aSGleb Smirnoff 	m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
851844cacd1SGleb Smirnoff 
852395bb186SSam Leffler 	sbcompress(sb, m, sb->sb_mbtail);
853395bb186SSam Leffler 
854395bb186SSam Leffler 	sb->sb_lastrecord = sb->sb_mb;
855395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
856df8bae1dSRodney W. Grimes }
857df8bae1dSRodney W. Grimes 
858a34b7046SRobert Watson /*
859050ac265SRobert Watson  * This version of sbappend() should only be used when the caller absolutely
860050ac265SRobert Watson  * knows that there will never be more than one record in the socket buffer,
861050ac265SRobert Watson  * that is, a stream protocol (such as TCP).
862a34b7046SRobert Watson  */
863a34b7046SRobert Watson void
864651e4e6aSGleb Smirnoff sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
865a34b7046SRobert Watson {
866a34b7046SRobert Watson 
867a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
868651e4e6aSGleb Smirnoff 	sbappendstream_locked(sb, m, flags);
869a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
870a34b7046SRobert Watson }
871a34b7046SRobert Watson 
872df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG
87326f9a767SRodney W. Grimes void
87457f43a45SGleb Smirnoff sbcheck(struct sockbuf *sb, const char *file, int line)
875df8bae1dSRodney W. Grimes {
8760f9d0a73SGleb Smirnoff 	struct mbuf *m, *n, *fnrdy;
8770f9d0a73SGleb Smirnoff 	u_long acc, ccc, mbcnt;
878df8bae1dSRodney W. Grimes 
879a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
880a34b7046SRobert Watson 
8810f9d0a73SGleb Smirnoff 	acc = ccc = mbcnt = 0;
8820f9d0a73SGleb Smirnoff 	fnrdy = NULL;
88357f43a45SGleb Smirnoff 
8840931333fSBill Fenner 	for (m = sb->sb_mb; m; m = n) {
8850931333fSBill Fenner 	    n = m->m_nextpkt;
8860931333fSBill Fenner 	    for (; m; m = m->m_next) {
88757f43a45SGleb Smirnoff 		if (m->m_len == 0) {
88857f43a45SGleb Smirnoff 			printf("sb %p empty mbuf %p\n", sb, m);
88957f43a45SGleb Smirnoff 			goto fail;
89057f43a45SGleb Smirnoff 		}
8910f9d0a73SGleb Smirnoff 		if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
8920f9d0a73SGleb Smirnoff 			if (m != sb->sb_fnrdy) {
8930f9d0a73SGleb Smirnoff 				printf("sb %p: fnrdy %p != m %p\n",
8940f9d0a73SGleb Smirnoff 				    sb, sb->sb_fnrdy, m);
8950f9d0a73SGleb Smirnoff 				goto fail;
8960f9d0a73SGleb Smirnoff 			}
8970f9d0a73SGleb Smirnoff 			fnrdy = m;
8980f9d0a73SGleb Smirnoff 		}
8990f9d0a73SGleb Smirnoff 		if (fnrdy) {
9000f9d0a73SGleb Smirnoff 			if (!(m->m_flags & M_NOTAVAIL)) {
9010f9d0a73SGleb Smirnoff 				printf("sb %p: fnrdy %p, m %p is avail\n",
9020f9d0a73SGleb Smirnoff 				    sb, sb->sb_fnrdy, m);
9030f9d0a73SGleb Smirnoff 				goto fail;
9040f9d0a73SGleb Smirnoff 			}
9050f9d0a73SGleb Smirnoff 		} else
9060f9d0a73SGleb Smirnoff 			acc += m->m_len;
9070f9d0a73SGleb Smirnoff 		ccc += m->m_len;
908df8bae1dSRodney W. Grimes 		mbcnt += MSIZE;
909313861b8SJulian Elischer 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
910df8bae1dSRodney W. Grimes 			mbcnt += m->m_ext.ext_size;
9110931333fSBill Fenner 	    }
912df8bae1dSRodney W. Grimes 	}
9130f9d0a73SGleb Smirnoff 	if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
9140f9d0a73SGleb Smirnoff 		printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
9150f9d0a73SGleb Smirnoff 		    acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
91657f43a45SGleb Smirnoff 		goto fail;
917df8bae1dSRodney W. Grimes 	}
91857f43a45SGleb Smirnoff 	return;
91957f43a45SGleb Smirnoff fail:
92057f43a45SGleb Smirnoff 	panic("%s from %s:%u", __func__, file, line);
921df8bae1dSRodney W. Grimes }
922df8bae1dSRodney W. Grimes #endif
923df8bae1dSRodney W. Grimes 
924df8bae1dSRodney W. Grimes /*
925050ac265SRobert Watson  * As above, except the mbuf chain begins a new record.
926df8bae1dSRodney W. Grimes  */
92726f9a767SRodney W. Grimes void
928050ac265SRobert Watson sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
929df8bae1dSRodney W. Grimes {
930050ac265SRobert Watson 	struct mbuf *m;
931df8bae1dSRodney W. Grimes 
932a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
933a34b7046SRobert Watson 
934b85f65afSPedro F. Giffuni 	if (m0 == NULL)
935df8bae1dSRodney W. Grimes 		return;
93653b680caSGleb Smirnoff 	m_clrprotoflags(m0);
937df8bae1dSRodney W. Grimes 	/*
938050ac265SRobert Watson 	 * Put the first mbuf on the queue.  Note this permits zero length
939050ac265SRobert Watson 	 * records.
940df8bae1dSRodney W. Grimes 	 */
941df8bae1dSRodney W. Grimes 	sballoc(sb, m0);
942395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
943395bb186SSam Leffler 	SBLINKRECORD(sb, m0);
944e72a94adSMaksim Yevmenkin 	sb->sb_mbtail = m0;
945df8bae1dSRodney W. Grimes 	m = m0->m_next;
946df8bae1dSRodney W. Grimes 	m0->m_next = 0;
947df8bae1dSRodney W. Grimes 	if (m && (m0->m_flags & M_EOR)) {
948df8bae1dSRodney W. Grimes 		m0->m_flags &= ~M_EOR;
949df8bae1dSRodney W. Grimes 		m->m_flags |= M_EOR;
950df8bae1dSRodney W. Grimes 	}
951e72a94adSMaksim Yevmenkin 	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
952df8bae1dSRodney W. Grimes 	sbcompress(sb, m, m0);
953df8bae1dSRodney W. Grimes }
954df8bae1dSRodney W. Grimes 
955df8bae1dSRodney W. Grimes /*
956050ac265SRobert Watson  * As above, except the mbuf chain begins a new record.
957a34b7046SRobert Watson  */
958a34b7046SRobert Watson void
959050ac265SRobert Watson sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
960a34b7046SRobert Watson {
961a34b7046SRobert Watson 
962a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
963a34b7046SRobert Watson 	sbappendrecord_locked(sb, m0);
964a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
965a34b7046SRobert Watson }
966a34b7046SRobert Watson 
9678de34a88SAlan Somers /* Helper routine that appends data, control, and address to a sockbuf. */
9688de34a88SAlan Somers static int
9698de34a88SAlan Somers sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
9708de34a88SAlan Somers     struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
971df8bae1dSRodney W. Grimes {
972395bb186SSam Leffler 	struct mbuf *m, *n, *nlast;
973c43cad1aSScott Long #if MSIZE <= 256
974df8bae1dSRodney W. Grimes 	if (asa->sa_len > MLEN)
975df8bae1dSRodney W. Grimes 		return (0);
976c43cad1aSScott Long #endif
977c8b59ea7SGleb Smirnoff 	m = m_get(M_NOWAIT, MT_SONAME);
978c8b59ea7SGleb Smirnoff 	if (m == NULL)
979df8bae1dSRodney W. Grimes 		return (0);
980df8bae1dSRodney W. Grimes 	m->m_len = asa->sa_len;
98180208239SAlfred Perlstein 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
982c33a2313SAndrey V. Elsukov 	if (m0) {
98353b680caSGleb Smirnoff 		m_clrprotoflags(m0);
98457386f5dSAndrey V. Elsukov 		m_tag_delete_chain(m0, NULL);
985c33a2313SAndrey V. Elsukov 		/*
986c33a2313SAndrey V. Elsukov 		 * Clear some persistent info from pkthdr.
987c33a2313SAndrey V. Elsukov 		 * We don't use m_demote(), because some netgraph consumers
988c33a2313SAndrey V. Elsukov 		 * expect M_PKTHDR presence.
989c33a2313SAndrey V. Elsukov 		 */
990c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.rcvif = NULL;
991c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.flowid = 0;
992c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.csum_flags = 0;
993c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.fibnum = 0;
994c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.rsstype = 0;
995c33a2313SAndrey V. Elsukov 	}
9968de34a88SAlan Somers 	if (ctrl_last)
9978de34a88SAlan Somers 		ctrl_last->m_next = m0;	/* concatenate data to control */
998df8bae1dSRodney W. Grimes 	else
999df8bae1dSRodney W. Grimes 		control = m0;
1000df8bae1dSRodney W. Grimes 	m->m_next = control;
1001395bb186SSam Leffler 	for (n = m; n->m_next != NULL; n = n->m_next)
1002df8bae1dSRodney W. Grimes 		sballoc(sb, n);
1003395bb186SSam Leffler 	sballoc(sb, n);
1004395bb186SSam Leffler 	nlast = n;
1005395bb186SSam Leffler 	SBLINKRECORD(sb, m);
1006395bb186SSam Leffler 
1007395bb186SSam Leffler 	sb->sb_mbtail = nlast;
1008395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
1009395bb186SSam Leffler 
1010395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
1011df8bae1dSRodney W. Grimes 	return (1);
1012df8bae1dSRodney W. Grimes }
1013df8bae1dSRodney W. Grimes 
1014a34b7046SRobert Watson /*
1015050ac265SRobert Watson  * Append address and data, and optionally, control (ancillary) data to the
1016050ac265SRobert Watson  * receive queue of a socket.  If present, m0 must include a packet header
1017050ac265SRobert Watson  * with total length.  Returns 0 if no space in sockbuf or insufficient
1018050ac265SRobert Watson  * mbufs.
1019a34b7046SRobert Watson  */
102026f9a767SRodney W. Grimes int
10218de34a88SAlan Somers sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
10228de34a88SAlan Somers     struct mbuf *m0, struct mbuf *control)
10238de34a88SAlan Somers {
10248de34a88SAlan Somers 	struct mbuf *ctrl_last;
10258de34a88SAlan Somers 	int space = asa->sa_len;
10268de34a88SAlan Somers 
10278de34a88SAlan Somers 	SOCKBUF_LOCK_ASSERT(sb);
10288de34a88SAlan Somers 
10298de34a88SAlan Somers 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
10308de34a88SAlan Somers 		panic("sbappendaddr_locked");
10318de34a88SAlan Somers 	if (m0)
10328de34a88SAlan Somers 		space += m0->m_pkthdr.len;
10338de34a88SAlan Somers 	space += m_length(control, &ctrl_last);
10348de34a88SAlan Somers 
10358de34a88SAlan Somers 	if (space > sbspace(sb))
10368de34a88SAlan Somers 		return (0);
10378de34a88SAlan Somers 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
10388de34a88SAlan Somers }
10398de34a88SAlan Somers 
10408de34a88SAlan Somers /*
10418de34a88SAlan Somers  * Append address and data, and optionally, control (ancillary) data to the
10428de34a88SAlan Somers  * receive queue of a socket.  If present, m0 must include a packet header
10438de34a88SAlan Somers  * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
10448de34a88SAlan Somers  * on the receiving sockbuf.
10458de34a88SAlan Somers  */
10468de34a88SAlan Somers int
10478de34a88SAlan Somers sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
10488de34a88SAlan Somers     struct mbuf *m0, struct mbuf *control)
10498de34a88SAlan Somers {
10508de34a88SAlan Somers 	struct mbuf *ctrl_last;
10518de34a88SAlan Somers 
10528de34a88SAlan Somers 	SOCKBUF_LOCK_ASSERT(sb);
10538de34a88SAlan Somers 
10548de34a88SAlan Somers 	ctrl_last = (control == NULL) ? NULL : m_last(control);
10558de34a88SAlan Somers 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
10568de34a88SAlan Somers }
10578de34a88SAlan Somers 
10588de34a88SAlan Somers /*
10598de34a88SAlan Somers  * Append address and data, and optionally, control (ancillary) data to the
10608de34a88SAlan Somers  * receive queue of a socket.  If present, m0 must include a packet header
10618de34a88SAlan Somers  * with total length.  Returns 0 if no space in sockbuf or insufficient
10628de34a88SAlan Somers  * mbufs.
10638de34a88SAlan Somers  */
10648de34a88SAlan Somers int
1065050ac265SRobert Watson sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
1066050ac265SRobert Watson     struct mbuf *m0, struct mbuf *control)
1067a34b7046SRobert Watson {
1068a34b7046SRobert Watson 	int retval;
1069a34b7046SRobert Watson 
1070a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
1071a34b7046SRobert Watson 	retval = sbappendaddr_locked(sb, asa, m0, control);
1072a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1073a34b7046SRobert Watson 	return (retval);
1074a34b7046SRobert Watson }
1075a34b7046SRobert Watson 
10765b0480f2SMark Johnston void
1077050ac265SRobert Watson sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
1078050ac265SRobert Watson     struct mbuf *control)
1079df8bae1dSRodney W. Grimes {
10805b0480f2SMark Johnston 	struct mbuf *m, *mlast;
1081df8bae1dSRodney W. Grimes 
108253b680caSGleb Smirnoff 	m_clrprotoflags(m0);
10835b0480f2SMark Johnston 	m_last(control)->m_next = m0;
1084395bb186SSam Leffler 
1085395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
1086395bb186SSam Leffler 
1087395bb186SSam Leffler 	for (m = control; m->m_next; m = m->m_next)
1088df8bae1dSRodney W. Grimes 		sballoc(sb, m);
1089395bb186SSam Leffler 	sballoc(sb, m);
1090395bb186SSam Leffler 	mlast = m;
1091395bb186SSam Leffler 	SBLINKRECORD(sb, control);
1092395bb186SSam Leffler 
1093395bb186SSam Leffler 	sb->sb_mbtail = mlast;
1094395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
1095395bb186SSam Leffler 
1096395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
1097df8bae1dSRodney W. Grimes }
1098df8bae1dSRodney W. Grimes 
10995b0480f2SMark Johnston void
1100050ac265SRobert Watson sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
1101a34b7046SRobert Watson {
1102a34b7046SRobert Watson 
1103a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
11045b0480f2SMark Johnston 	sbappendcontrol_locked(sb, m0, control);
1105a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1106a34b7046SRobert Watson }
1107a34b7046SRobert Watson 
1108df8bae1dSRodney W. Grimes /*
11097da7362bSRobert Watson  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
11107da7362bSRobert Watson  * (n).  If (n) is NULL, the buffer is presumed empty.
11117da7362bSRobert Watson  *
11127da7362bSRobert Watson  * When the data is compressed, mbufs in the chain may be handled in one of
11137da7362bSRobert Watson  * three ways:
11147da7362bSRobert Watson  *
11157da7362bSRobert Watson  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
11167da7362bSRobert Watson  *     record boundary, and no change in data type).
11177da7362bSRobert Watson  *
11187da7362bSRobert Watson  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
11197da7362bSRobert Watson  *     an mbuf already in the socket buffer.  This can occur if an
11200f9d0a73SGleb Smirnoff  *     appropriate mbuf exists, there is room, both mbufs are not marked as
11210f9d0a73SGleb Smirnoff  *     not ready, and no merging of data types will occur.
11227da7362bSRobert Watson  *
11237da7362bSRobert Watson  * (3) The mbuf may be appended to the end of the existing mbuf chain.
11247da7362bSRobert Watson  *
11257da7362bSRobert Watson  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
11267da7362bSRobert Watson  * end-of-record.
1127df8bae1dSRodney W. Grimes  */
112826f9a767SRodney W. Grimes void
1129050ac265SRobert Watson sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1130df8bae1dSRodney W. Grimes {
1131050ac265SRobert Watson 	int eor = 0;
1132050ac265SRobert Watson 	struct mbuf *o;
1133df8bae1dSRodney W. Grimes 
1134a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1135a34b7046SRobert Watson 
1136df8bae1dSRodney W. Grimes 	while (m) {
1137df8bae1dSRodney W. Grimes 		eor |= m->m_flags & M_EOR;
1138df8bae1dSRodney W. Grimes 		if (m->m_len == 0 &&
1139df8bae1dSRodney W. Grimes 		    (eor == 0 ||
1140df8bae1dSRodney W. Grimes 		     (((o = m->m_next) || (o = n)) &&
1141df8bae1dSRodney W. Grimes 		      o->m_type == m->m_type))) {
1142395bb186SSam Leffler 			if (sb->sb_lastrecord == m)
1143395bb186SSam Leffler 				sb->sb_lastrecord = m->m_next;
1144df8bae1dSRodney W. Grimes 			m = m_free(m);
1145df8bae1dSRodney W. Grimes 			continue;
1146df8bae1dSRodney W. Grimes 		}
114732af0d74SDavid Malone 		if (n && (n->m_flags & M_EOR) == 0 &&
114832af0d74SDavid Malone 		    M_WRITABLE(n) &&
11495e0f5cfaSKip Macy 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
11500f9d0a73SGleb Smirnoff 		    !(m->m_flags & M_NOTREADY) &&
115182334850SJohn Baldwin 		    !(n->m_flags & (M_NOTREADY | M_NOMAP)) &&
1152b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m) &&
1153b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(n) &&
115432af0d74SDavid Malone 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
115532af0d74SDavid Malone 		    m->m_len <= M_TRAILINGSPACE(n) &&
1156df8bae1dSRodney W. Grimes 		    n->m_type == m->m_type) {
115782334850SJohn Baldwin 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
1158df8bae1dSRodney W. Grimes 			n->m_len += m->m_len;
11590f9d0a73SGleb Smirnoff 			sb->sb_ccc += m->m_len;
11600f9d0a73SGleb Smirnoff 			if (sb->sb_fnrdy == NULL)
11610f9d0a73SGleb Smirnoff 				sb->sb_acc += m->m_len;
116234333b16SAndre Oppermann 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
1163b3f1af6bSTim J. Robbins 				/* XXX: Probably don't need.*/
116404ac9b97SKelly Yancey 				sb->sb_ctl += m->m_len;
1165df8bae1dSRodney W. Grimes 			m = m_free(m);
1166df8bae1dSRodney W. Grimes 			continue;
1167df8bae1dSRodney W. Grimes 		}
116882334850SJohn Baldwin 		if (m->m_len <= MLEN && (m->m_flags & M_NOMAP) &&
1169b2e60773SJohn Baldwin 		    (m->m_flags & M_NOTREADY) == 0 &&
1170b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m))
117182334850SJohn Baldwin 			(void)mb_unmapped_compress(m);
1172df8bae1dSRodney W. Grimes 		if (n)
1173df8bae1dSRodney W. Grimes 			n->m_next = m;
1174df8bae1dSRodney W. Grimes 		else
1175df8bae1dSRodney W. Grimes 			sb->sb_mb = m;
1176395bb186SSam Leffler 		sb->sb_mbtail = m;
1177df8bae1dSRodney W. Grimes 		sballoc(sb, m);
1178df8bae1dSRodney W. Grimes 		n = m;
1179df8bae1dSRodney W. Grimes 		m->m_flags &= ~M_EOR;
1180df8bae1dSRodney W. Grimes 		m = m->m_next;
1181df8bae1dSRodney W. Grimes 		n->m_next = 0;
1182df8bae1dSRodney W. Grimes 	}
1183df8bae1dSRodney W. Grimes 	if (eor) {
11847da7362bSRobert Watson 		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
1185df8bae1dSRodney W. Grimes 		n->m_flags |= eor;
1186df8bae1dSRodney W. Grimes 	}
1187395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
1188df8bae1dSRodney W. Grimes }
1189df8bae1dSRodney W. Grimes 
1190df8bae1dSRodney W. Grimes /*
1191050ac265SRobert Watson  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
1192df8bae1dSRodney W. Grimes  */
1193eaa6dfbcSRobert Watson static void
1194050ac265SRobert Watson sbflush_internal(struct sockbuf *sb)
1195df8bae1dSRodney W. Grimes {
1196df8bae1dSRodney W. Grimes 
119723f84772SPierre Beyssac 	while (sb->sb_mbcnt) {
119823f84772SPierre Beyssac 		/*
1199761a9a1fSGleb Smirnoff 		 * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
120023f84772SPierre Beyssac 		 * we would loop forever. Panic instead.
120123f84772SPierre Beyssac 		 */
12020f9d0a73SGleb Smirnoff 		if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
120323f84772SPierre Beyssac 			break;
12040f9d0a73SGleb Smirnoff 		m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
120523f84772SPierre Beyssac 	}
12060f9d0a73SGleb Smirnoff 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
12070f9d0a73SGleb Smirnoff 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
12080f9d0a73SGleb Smirnoff 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
1209a34b7046SRobert Watson }
1210a34b7046SRobert Watson 
1211a34b7046SRobert Watson void
1212050ac265SRobert Watson sbflush_locked(struct sockbuf *sb)
1213eaa6dfbcSRobert Watson {
1214eaa6dfbcSRobert Watson 
1215eaa6dfbcSRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1216eaa6dfbcSRobert Watson 	sbflush_internal(sb);
1217eaa6dfbcSRobert Watson }
1218eaa6dfbcSRobert Watson 
1219eaa6dfbcSRobert Watson void
1220050ac265SRobert Watson sbflush(struct sockbuf *sb)
1221a34b7046SRobert Watson {
1222a34b7046SRobert Watson 
1223a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
1224a34b7046SRobert Watson 	sbflush_locked(sb);
1225a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1226df8bae1dSRodney W. Grimes }
1227df8bae1dSRodney W. Grimes 
1228df8bae1dSRodney W. Grimes /*
12291d2df300SGleb Smirnoff  * Cut data from (the front of) a sockbuf.
1230df8bae1dSRodney W. Grimes  */
12311d2df300SGleb Smirnoff static struct mbuf *
12321d2df300SGleb Smirnoff sbcut_internal(struct sockbuf *sb, int len)
1233df8bae1dSRodney W. Grimes {
12340f9d0a73SGleb Smirnoff 	struct mbuf *m, *next, *mfree;
1235df8bae1dSRodney W. Grimes 
1236f41b2de7SHiren Panchasara 	KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
1237b5b023b9SHiren Panchasara 	    __func__, len));
1238b5b023b9SHiren Panchasara 	KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
1239b5b023b9SHiren Panchasara 	    __func__, len, sb->sb_ccc));
1240b5b023b9SHiren Panchasara 
1241df8bae1dSRodney W. Grimes 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
12421d2df300SGleb Smirnoff 	mfree = NULL;
12431d2df300SGleb Smirnoff 
1244df8bae1dSRodney W. Grimes 	while (len > 0) {
12458146bcfeSGleb Smirnoff 		if (m == NULL) {
12468146bcfeSGleb Smirnoff 			KASSERT(next, ("%s: no next, len %d", __func__, len));
1247df8bae1dSRodney W. Grimes 			m = next;
1248df8bae1dSRodney W. Grimes 			next = m->m_nextpkt;
1249df8bae1dSRodney W. Grimes 		}
1250df8bae1dSRodney W. Grimes 		if (m->m_len > len) {
12510f9d0a73SGleb Smirnoff 			KASSERT(!(m->m_flags & M_NOTAVAIL),
12520f9d0a73SGleb Smirnoff 			    ("%s: m %p M_NOTAVAIL", __func__, m));
1253df8bae1dSRodney W. Grimes 			m->m_len -= len;
1254df8bae1dSRodney W. Grimes 			m->m_data += len;
12550f9d0a73SGleb Smirnoff 			sb->sb_ccc -= len;
12560f9d0a73SGleb Smirnoff 			sb->sb_acc -= len;
12574e023759SAndre Oppermann 			if (sb->sb_sndptroff != 0)
12584e023759SAndre Oppermann 				sb->sb_sndptroff -= len;
125934333b16SAndre Oppermann 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
126004ac9b97SKelly Yancey 				sb->sb_ctl -= len;
1261df8bae1dSRodney W. Grimes 			break;
1262df8bae1dSRodney W. Grimes 		}
1263df8bae1dSRodney W. Grimes 		len -= m->m_len;
1264df8bae1dSRodney W. Grimes 		sbfree(sb, m);
12650f9d0a73SGleb Smirnoff 		/*
12660f9d0a73SGleb Smirnoff 		 * Do not put M_NOTREADY buffers to the free list, they
12670f9d0a73SGleb Smirnoff 		 * are referenced from outside.
12680f9d0a73SGleb Smirnoff 		 */
12690f9d0a73SGleb Smirnoff 		if (m->m_flags & M_NOTREADY)
12700f9d0a73SGleb Smirnoff 			m = m->m_next;
12710f9d0a73SGleb Smirnoff 		else {
12720f9d0a73SGleb Smirnoff 			struct mbuf *n;
12730f9d0a73SGleb Smirnoff 
12741d2df300SGleb Smirnoff 			n = m->m_next;
12751d2df300SGleb Smirnoff 			m->m_next = mfree;
12761d2df300SGleb Smirnoff 			mfree = m;
12771d2df300SGleb Smirnoff 			m = n;
1278df8bae1dSRodney W. Grimes 		}
12790f9d0a73SGleb Smirnoff 	}
1280e834a840SGleb Smirnoff 	/*
1281e834a840SGleb Smirnoff 	 * Free any zero-length mbufs from the buffer.
1282e834a840SGleb Smirnoff 	 * For SOCK_DGRAM sockets such mbufs represent empty records.
1283e834a840SGleb Smirnoff 	 * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
1284e834a840SGleb Smirnoff 	 * when sosend_generic() needs to send only control data.
1285e834a840SGleb Smirnoff 	 */
1286e834a840SGleb Smirnoff 	while (m && m->m_len == 0) {
1287e834a840SGleb Smirnoff 		struct mbuf *n;
1288e834a840SGleb Smirnoff 
1289e834a840SGleb Smirnoff 		sbfree(sb, m);
1290e834a840SGleb Smirnoff 		n = m->m_next;
1291e834a840SGleb Smirnoff 		m->m_next = mfree;
1292e834a840SGleb Smirnoff 		mfree = m;
1293e834a840SGleb Smirnoff 		m = n;
1294e834a840SGleb Smirnoff 	}
1295df8bae1dSRodney W. Grimes 	if (m) {
1296df8bae1dSRodney W. Grimes 		sb->sb_mb = m;
1297df8bae1dSRodney W. Grimes 		m->m_nextpkt = next;
1298df8bae1dSRodney W. Grimes 	} else
1299df8bae1dSRodney W. Grimes 		sb->sb_mb = next;
1300395bb186SSam Leffler 	/*
1301050ac265SRobert Watson 	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
1302050ac265SRobert Watson 	 * sb_lastrecord is up-to-date if we dropped part of the last record.
1303395bb186SSam Leffler 	 */
1304395bb186SSam Leffler 	m = sb->sb_mb;
1305395bb186SSam Leffler 	if (m == NULL) {
1306395bb186SSam Leffler 		sb->sb_mbtail = NULL;
1307395bb186SSam Leffler 		sb->sb_lastrecord = NULL;
1308395bb186SSam Leffler 	} else if (m->m_nextpkt == NULL) {
1309395bb186SSam Leffler 		sb->sb_lastrecord = m;
1310395bb186SSam Leffler 	}
13111d2df300SGleb Smirnoff 
13121d2df300SGleb Smirnoff 	return (mfree);
1313df8bae1dSRodney W. Grimes }
1314df8bae1dSRodney W. Grimes 
1315df8bae1dSRodney W. Grimes /*
1316a34b7046SRobert Watson  * Drop data from (the front of) a sockbuf.
1317a34b7046SRobert Watson  */
1318a34b7046SRobert Watson void
1319050ac265SRobert Watson sbdrop_locked(struct sockbuf *sb, int len)
1320eaa6dfbcSRobert Watson {
1321eaa6dfbcSRobert Watson 
1322eaa6dfbcSRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
13231d2df300SGleb Smirnoff 	m_freem(sbcut_internal(sb, len));
13241d2df300SGleb Smirnoff }
1325eaa6dfbcSRobert Watson 
13261d2df300SGleb Smirnoff /*
13271d2df300SGleb Smirnoff  * Drop data from (the front of) a sockbuf,
13281d2df300SGleb Smirnoff  * and return it to caller.
13291d2df300SGleb Smirnoff  */
13301d2df300SGleb Smirnoff struct mbuf *
13311d2df300SGleb Smirnoff sbcut_locked(struct sockbuf *sb, int len)
13321d2df300SGleb Smirnoff {
13331d2df300SGleb Smirnoff 
13341d2df300SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
13351d2df300SGleb Smirnoff 	return (sbcut_internal(sb, len));
1336eaa6dfbcSRobert Watson }
1337eaa6dfbcSRobert Watson 
1338eaa6dfbcSRobert Watson void
1339050ac265SRobert Watson sbdrop(struct sockbuf *sb, int len)
1340a34b7046SRobert Watson {
13411d2df300SGleb Smirnoff 	struct mbuf *mfree;
1342a34b7046SRobert Watson 
1343a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
13441d2df300SGleb Smirnoff 	mfree = sbcut_internal(sb, len);
1345a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
13461d2df300SGleb Smirnoff 
13471d2df300SGleb Smirnoff 	m_freem(mfree);
1348a34b7046SRobert Watson }
1349a34b7046SRobert Watson 
135089e560f4SRandall Stewart struct mbuf *
135189e560f4SRandall Stewart sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
135289e560f4SRandall Stewart {
135389e560f4SRandall Stewart 	struct mbuf *m;
135489e560f4SRandall Stewart 
135589e560f4SRandall Stewart 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
135689e560f4SRandall Stewart 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
135789e560f4SRandall Stewart 		*moff = off;
135889e560f4SRandall Stewart 		if (sb->sb_sndptr == NULL) {
135989e560f4SRandall Stewart 			sb->sb_sndptr = sb->sb_mb;
136089e560f4SRandall Stewart 			sb->sb_sndptroff = 0;
136189e560f4SRandall Stewart 		}
136289e560f4SRandall Stewart 		return (sb->sb_mb);
136389e560f4SRandall Stewart 	} else {
136489e560f4SRandall Stewart 		m = sb->sb_sndptr;
136589e560f4SRandall Stewart 		off -= sb->sb_sndptroff;
136689e560f4SRandall Stewart 	}
136789e560f4SRandall Stewart 	*moff = off;
136889e560f4SRandall Stewart 	return (m);
136989e560f4SRandall Stewart }
137089e560f4SRandall Stewart 
137189e560f4SRandall Stewart void
137289e560f4SRandall Stewart sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
137389e560f4SRandall Stewart {
137489e560f4SRandall Stewart 	/*
137589e560f4SRandall Stewart 	 * A small copy was done, advance forward the sb_sbsndptr to cover
137689e560f4SRandall Stewart 	 * it.
137789e560f4SRandall Stewart 	 */
137889e560f4SRandall Stewart 	struct mbuf *m;
137989e560f4SRandall Stewart 
138089e560f4SRandall Stewart 	if (mb != sb->sb_sndptr) {
138189e560f4SRandall Stewart 		/* Did not copyout at the same mbuf */
138289e560f4SRandall Stewart 		return;
138389e560f4SRandall Stewart 	}
138489e560f4SRandall Stewart 	m = mb;
138589e560f4SRandall Stewart 	while (m && (len > 0)) {
138689e560f4SRandall Stewart 		if (len >= m->m_len) {
138789e560f4SRandall Stewart 			len -= m->m_len;
138889e560f4SRandall Stewart 			if (m->m_next) {
138989e560f4SRandall Stewart 				sb->sb_sndptroff += m->m_len;
139089e560f4SRandall Stewart 				sb->sb_sndptr = m->m_next;
139189e560f4SRandall Stewart 			}
139289e560f4SRandall Stewart 			m = m->m_next;
139389e560f4SRandall Stewart 		} else {
139489e560f4SRandall Stewart 			len = 0;
139589e560f4SRandall Stewart 		}
139689e560f4SRandall Stewart 	}
139789e560f4SRandall Stewart }
139889e560f4SRandall Stewart 
1399a34b7046SRobert Watson /*
14009fd573c3SHans Petter Selasky  * Return the first mbuf and the mbuf data offset for the provided
14019fd573c3SHans Petter Selasky  * send offset without changing the "sb_sndptroff" field.
14029fd573c3SHans Petter Selasky  */
14039fd573c3SHans Petter Selasky struct mbuf *
14049fd573c3SHans Petter Selasky sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
14059fd573c3SHans Petter Selasky {
14069fd573c3SHans Petter Selasky 	struct mbuf *m;
14079fd573c3SHans Petter Selasky 
14089fd573c3SHans Petter Selasky 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
14099fd573c3SHans Petter Selasky 
14109fd573c3SHans Petter Selasky 	/*
14119fd573c3SHans Petter Selasky 	 * If the "off" is below the stored offset, which happens on
14129fd573c3SHans Petter Selasky 	 * retransmits, just use "sb_mb":
14139fd573c3SHans Petter Selasky 	 */
14149fd573c3SHans Petter Selasky 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
14159fd573c3SHans Petter Selasky 		m = sb->sb_mb;
14169fd573c3SHans Petter Selasky 	} else {
14179fd573c3SHans Petter Selasky 		m = sb->sb_sndptr;
14189fd573c3SHans Petter Selasky 		off -= sb->sb_sndptroff;
14199fd573c3SHans Petter Selasky 	}
14209fd573c3SHans Petter Selasky 	while (off > 0 && m != NULL) {
14219fd573c3SHans Petter Selasky 		if (off < m->m_len)
14229fd573c3SHans Petter Selasky 			break;
14239fd573c3SHans Petter Selasky 		off -= m->m_len;
14249fd573c3SHans Petter Selasky 		m = m->m_next;
14259fd573c3SHans Petter Selasky 	}
14269fd573c3SHans Petter Selasky 	*moff = off;
14279fd573c3SHans Petter Selasky 	return (m);
14289fd573c3SHans Petter Selasky }
14299fd573c3SHans Petter Selasky 
14309fd573c3SHans Petter Selasky /*
1431050ac265SRobert Watson  * Drop a record off the front of a sockbuf and move the next record to the
1432050ac265SRobert Watson  * front.
1433df8bae1dSRodney W. Grimes  */
143426f9a767SRodney W. Grimes void
1435050ac265SRobert Watson sbdroprecord_locked(struct sockbuf *sb)
1436df8bae1dSRodney W. Grimes {
1437050ac265SRobert Watson 	struct mbuf *m;
1438df8bae1dSRodney W. Grimes 
1439a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1440a34b7046SRobert Watson 
1441df8bae1dSRodney W. Grimes 	m = sb->sb_mb;
1442df8bae1dSRodney W. Grimes 	if (m) {
1443df8bae1dSRodney W. Grimes 		sb->sb_mb = m->m_nextpkt;
1444df8bae1dSRodney W. Grimes 		do {
1445df8bae1dSRodney W. Grimes 			sbfree(sb, m);
1446ecde8f7cSMatthew Dillon 			m = m_free(m);
1447797f2d22SPoul-Henning Kamp 		} while (m);
1448df8bae1dSRodney W. Grimes 	}
1449395bb186SSam Leffler 	SB_EMPTY_FIXUP(sb);
1450df8bae1dSRodney W. Grimes }
14511e4ad9ceSGarrett Wollman 
145282c23ebaSBill Fenner /*
1453050ac265SRobert Watson  * Drop a record off the front of a sockbuf and move the next record to the
1454050ac265SRobert Watson  * front.
1455a34b7046SRobert Watson  */
1456a34b7046SRobert Watson void
1457050ac265SRobert Watson sbdroprecord(struct sockbuf *sb)
1458a34b7046SRobert Watson {
1459a34b7046SRobert Watson 
1460a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
1461a34b7046SRobert Watson 	sbdroprecord_locked(sb);
1462a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1463a34b7046SRobert Watson }
1464a34b7046SRobert Watson 
146520d9e5e8SRobert Watson /*
14668c799760SRobert Watson  * Create a "control" mbuf containing the specified data with the specified
14678c799760SRobert Watson  * type for presentation on a socket buffer.
146820d9e5e8SRobert Watson  */
146920d9e5e8SRobert Watson struct mbuf *
1470d19e16a7SRobert Watson sbcreatecontrol(caddr_t p, int size, int type, int level)
147120d9e5e8SRobert Watson {
1472d19e16a7SRobert Watson 	struct cmsghdr *cp;
147320d9e5e8SRobert Watson 	struct mbuf *m;
147420d9e5e8SRobert Watson 
147520d9e5e8SRobert Watson 	if (CMSG_SPACE((u_int)size) > MCLBYTES)
147620d9e5e8SRobert Watson 		return ((struct mbuf *) NULL);
147720d9e5e8SRobert Watson 	if (CMSG_SPACE((u_int)size) > MLEN)
1478eb1b1807SGleb Smirnoff 		m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
147920d9e5e8SRobert Watson 	else
1480eb1b1807SGleb Smirnoff 		m = m_get(M_NOWAIT, MT_CONTROL);
148120d9e5e8SRobert Watson 	if (m == NULL)
148220d9e5e8SRobert Watson 		return ((struct mbuf *) NULL);
148320d9e5e8SRobert Watson 	cp = mtod(m, struct cmsghdr *);
148420d9e5e8SRobert Watson 	m->m_len = 0;
148520d9e5e8SRobert Watson 	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
148620d9e5e8SRobert Watson 	    ("sbcreatecontrol: short mbuf"));
14872827952eSXin LI 	/*
14882827952eSXin LI 	 * Don't leave the padding between the msg header and the
14892827952eSXin LI 	 * cmsg data and the padding after the cmsg data un-initialized.
14902827952eSXin LI 	 */
14912827952eSXin LI 	bzero(cp, CMSG_SPACE((u_int)size));
149220d9e5e8SRobert Watson 	if (p != NULL)
149320d9e5e8SRobert Watson 		(void)memcpy(CMSG_DATA(cp), p, size);
149420d9e5e8SRobert Watson 	m->m_len = CMSG_SPACE(size);
149520d9e5e8SRobert Watson 	cp->cmsg_len = CMSG_LEN(size);
149620d9e5e8SRobert Watson 	cp->cmsg_level = level;
149720d9e5e8SRobert Watson 	cp->cmsg_type = type;
149820d9e5e8SRobert Watson 	return (m);
149920d9e5e8SRobert Watson }
150020d9e5e8SRobert Watson 
150120d9e5e8SRobert Watson /*
15028c799760SRobert Watson  * This does the same for socket buffers that sotoxsocket does for sockets:
15038c799760SRobert Watson  * generate an user-format data structure describing the socket buffer.  Note
15048c799760SRobert Watson  * that the xsockbuf structure, since it is always embedded in a socket, does
15058c799760SRobert Watson  * not include a self pointer nor a length.  We make this entry point public
15068c799760SRobert Watson  * in case some other mechanism needs it.
150720d9e5e8SRobert Watson  */
150820d9e5e8SRobert Watson void
150920d9e5e8SRobert Watson sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
151020d9e5e8SRobert Watson {
1511d19e16a7SRobert Watson 
15120f9d0a73SGleb Smirnoff 	xsb->sb_cc = sb->sb_ccc;
151320d9e5e8SRobert Watson 	xsb->sb_hiwat = sb->sb_hiwat;
151420d9e5e8SRobert Watson 	xsb->sb_mbcnt = sb->sb_mbcnt;
151549f287f8SGeorge V. Neville-Neil 	xsb->sb_mcnt = sb->sb_mcnt;
151649f287f8SGeorge V. Neville-Neil 	xsb->sb_ccnt = sb->sb_ccnt;
151720d9e5e8SRobert Watson 	xsb->sb_mbmax = sb->sb_mbmax;
151820d9e5e8SRobert Watson 	xsb->sb_lowat = sb->sb_lowat;
151920d9e5e8SRobert Watson 	xsb->sb_flags = sb->sb_flags;
152020d9e5e8SRobert Watson 	xsb->sb_timeo = sb->sb_timeo;
152120d9e5e8SRobert Watson }
152220d9e5e8SRobert Watson 
1523639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1524639acc13SGarrett Wollman static int dummy;
1525e8cdbb48SPawel Biernacki SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, "");
1526*7029da5cSPawel Biernacki SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
1527*7029da5cSPawel Biernacki     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &sb_max, 0,
1528*7029da5cSPawel Biernacki     sysctl_handle_sb_max, "LU",
1529*7029da5cSPawel Biernacki     "Maximum socket buffer size");
15301b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
15313eb9ab52SEitan Adler     &sb_efficiency, 0, "Socket buffer size waste factor");
1532