xref: /freebsd/sys/kern/uipc_sockbuf.c (revision 3c0e5685051142f4125bf93d40a3e0b570e2327c)
19454b2d8SWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1990, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1569a28758SEd Maste  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
34677b542eSDavid E. O'Brien #include <sys/cdefs.h>
35677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
36677b542eSDavid E. O'Brien 
37b2e60773SJohn Baldwin #include "opt_kern_tls.h"
385b86eac4SJesper Skriver #include "opt_param.h"
39335654d7SRobert Watson 
40df8bae1dSRodney W. Grimes #include <sys/param.h>
41960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */
42ff5c09daSGarrett Wollman #include <sys/kernel.h>
43b2e60773SJohn Baldwin #include <sys/ktls.h>
44fb919e4dSMark Murray #include <sys/lock.h>
458ec07310SGleb Smirnoff #include <sys/malloc.h>
46df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
47960ed29cSSeigo Tanimura #include <sys/mutex.h>
48fb919e4dSMark Murray #include <sys/proc.h>
49df8bae1dSRodney W. Grimes #include <sys/protosw.h>
502f9a2132SBrian Feldman #include <sys/resourcevar.h>
51960ed29cSSeigo Tanimura #include <sys/signalvar.h>
52df8bae1dSRodney W. Grimes #include <sys/socket.h>
53df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
547abab911SRobert Watson #include <sys/sx.h>
55ff5c09daSGarrett Wollman #include <sys/sysctl.h>
5626f9a767SRodney W. Grimes 
57f14cce87SRobert Watson /*
58f14cce87SRobert Watson  * Function pointer set by the AIO routines so that the socket buffer code
59f14cce87SRobert Watson  * can call back into the AIO module if it is loaded.
60f14cce87SRobert Watson  */
6121d56e9cSAlfred Perlstein void	(*aio_swake)(struct socket *, struct sockbuf *);
6221d56e9cSAlfred Perlstein 
63df8bae1dSRodney W. Grimes /*
64f14cce87SRobert Watson  * Primitive routines for operating on socket buffers
65df8bae1dSRodney W. Grimes  */
66df8bae1dSRodney W. Grimes 
6779cb7eb4SDavid Greenman u_long	sb_max = SB_MAX;
6858d14daeSMohan Srinivasan u_long sb_max_adj =
69b233773bSBjoern A. Zeeb        (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
70df8bae1dSRodney W. Grimes 
714b29bc4fSGarrett Wollman static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
724b29bc4fSGarrett Wollman 
73*3c0e5685SJohn Baldwin static void	sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
74*3c0e5685SJohn Baldwin     struct mbuf *n);
751d2df300SGleb Smirnoff static struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
76050ac265SRobert Watson static void	sbflush_internal(struct sockbuf *sb);
77eaa6dfbcSRobert Watson 
78df8bae1dSRodney W. Grimes /*
79829fae90SGleb Smirnoff  * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
80829fae90SGleb Smirnoff  */
81829fae90SGleb Smirnoff static void
82829fae90SGleb Smirnoff sbm_clrprotoflags(struct mbuf *m, int flags)
83829fae90SGleb Smirnoff {
84829fae90SGleb Smirnoff 	int mask;
85829fae90SGleb Smirnoff 
86829fae90SGleb Smirnoff 	mask = ~M_PROTOFLAGS;
87829fae90SGleb Smirnoff 	if (flags & PRUS_NOTREADY)
88829fae90SGleb Smirnoff 		mask |= M_NOTREADY;
89829fae90SGleb Smirnoff 	while (m) {
90829fae90SGleb Smirnoff 		m->m_flags &= mask;
91829fae90SGleb Smirnoff 		m = m->m_next;
92829fae90SGleb Smirnoff 	}
93829fae90SGleb Smirnoff }
94829fae90SGleb Smirnoff 
95829fae90SGleb Smirnoff /*
963807631bSJohn Baldwin  * Compress M_NOTREADY mbufs after they have been readied by sbready().
973807631bSJohn Baldwin  *
983807631bSJohn Baldwin  * sbcompress() skips M_NOTREADY mbufs since the data is not available to
993807631bSJohn Baldwin  * be copied at the time of sbcompress().  This function combines small
1003807631bSJohn Baldwin  * mbufs similar to sbcompress() once mbufs are ready.  'm0' is the first
1013807631bSJohn Baldwin  * mbuf sbready() marked ready, and 'end' is the first mbuf still not
1023807631bSJohn Baldwin  * ready.
1033807631bSJohn Baldwin  */
1043807631bSJohn Baldwin static void
1053807631bSJohn Baldwin sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
1063807631bSJohn Baldwin {
1073807631bSJohn Baldwin 	struct mbuf *m, *n;
1083807631bSJohn Baldwin 	int ext_size;
1093807631bSJohn Baldwin 
1103807631bSJohn Baldwin 	SOCKBUF_LOCK_ASSERT(sb);
1113807631bSJohn Baldwin 
1123807631bSJohn Baldwin 	if ((sb->sb_flags & SB_NOCOALESCE) != 0)
1133807631bSJohn Baldwin 		return;
1143807631bSJohn Baldwin 
1153807631bSJohn Baldwin 	for (m = m0; m != end; m = m->m_next) {
1163807631bSJohn Baldwin 		MPASS((m->m_flags & M_NOTREADY) == 0);
117c4ad247bSAndrew Gallatin 		/*
118c4ad247bSAndrew Gallatin 		 * NB: In sbcompress(), 'n' is the last mbuf in the
119c4ad247bSAndrew Gallatin 		 * socket buffer and 'm' is the new mbuf being copied
120c4ad247bSAndrew Gallatin 		 * into the trailing space of 'n'.  Here, the roles
121c4ad247bSAndrew Gallatin 		 * are reversed and 'n' is the next mbuf after 'm'
122c4ad247bSAndrew Gallatin 		 * that is being copied into the trailing space of
123c4ad247bSAndrew Gallatin 		 * 'm'.
124c4ad247bSAndrew Gallatin 		 */
125c4ad247bSAndrew Gallatin 		n = m->m_next;
126c4ad247bSAndrew Gallatin #ifdef KERN_TLS
127c4ad247bSAndrew Gallatin 		/* Try to coalesce adjacent ktls mbuf hdr/trailers. */
128c4ad247bSAndrew Gallatin 		if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
1296edfd179SGleb Smirnoff 		    (m->m_flags & M_EXTPG) &&
1306edfd179SGleb Smirnoff 		    (n->m_flags & M_EXTPG) &&
131c4ad247bSAndrew Gallatin 		    !mbuf_has_tls_session(m) &&
132c4ad247bSAndrew Gallatin 		    !mbuf_has_tls_session(n)) {
133c4ad247bSAndrew Gallatin 			int hdr_len, trail_len;
134c4ad247bSAndrew Gallatin 
1357b6c99d0SGleb Smirnoff 			hdr_len = n->m_epg_hdrlen;
1367b6c99d0SGleb Smirnoff 			trail_len = m->m_epg_trllen;
137c4ad247bSAndrew Gallatin 			if (trail_len != 0 && hdr_len != 0 &&
138c4ad247bSAndrew Gallatin 			    trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) {
139c4ad247bSAndrew Gallatin 				/* copy n's header to m's trailer */
14023feb563SAndrew Gallatin 				memcpy(&m->m_epg_trail[trail_len],
14123feb563SAndrew Gallatin 				    n->m_epg_hdr, hdr_len);
1427b6c99d0SGleb Smirnoff 				m->m_epg_trllen += hdr_len;
143c4ad247bSAndrew Gallatin 				m->m_len += hdr_len;
1447b6c99d0SGleb Smirnoff 				n->m_epg_hdrlen = 0;
145c4ad247bSAndrew Gallatin 				n->m_len -= hdr_len;
146c4ad247bSAndrew Gallatin 			}
147c4ad247bSAndrew Gallatin 		}
148c4ad247bSAndrew Gallatin #endif
1493807631bSJohn Baldwin 
1503807631bSJohn Baldwin 		/* Compress small unmapped mbufs into plain mbufs. */
1516edfd179SGleb Smirnoff 		if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN &&
152b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m)) {
1533807631bSJohn Baldwin 			ext_size = m->m_ext.ext_size;
1543807631bSJohn Baldwin 			if (mb_unmapped_compress(m) == 0) {
1553807631bSJohn Baldwin 				sb->sb_mbcnt -= ext_size;
1563807631bSJohn Baldwin 				sb->sb_ccnt -= 1;
1573807631bSJohn Baldwin 			}
1583807631bSJohn Baldwin 		}
1593807631bSJohn Baldwin 
1603807631bSJohn Baldwin 		while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
1613807631bSJohn Baldwin 		    M_WRITABLE(m) &&
1626edfd179SGleb Smirnoff 		    (m->m_flags & M_EXTPG) == 0 &&
163b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(n) &&
164b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m) &&
1653807631bSJohn Baldwin 		    n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1663807631bSJohn Baldwin 		    n->m_len <= M_TRAILINGSPACE(m) &&
1673807631bSJohn Baldwin 		    m->m_type == n->m_type) {
1683807631bSJohn Baldwin 			KASSERT(sb->sb_lastrecord != n,
1693807631bSJohn Baldwin 		    ("%s: merging start of record (%p) into previous mbuf (%p)",
1703807631bSJohn Baldwin 			    __func__, n, m));
1713807631bSJohn Baldwin 			m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
1723807631bSJohn Baldwin 			m->m_len += n->m_len;
1733807631bSJohn Baldwin 			m->m_next = n->m_next;
1743807631bSJohn Baldwin 			m->m_flags |= n->m_flags & M_EOR;
1753807631bSJohn Baldwin 			if (sb->sb_mbtail == n)
1763807631bSJohn Baldwin 				sb->sb_mbtail = m;
1773807631bSJohn Baldwin 
1783807631bSJohn Baldwin 			sb->sb_mbcnt -= MSIZE;
1793807631bSJohn Baldwin 			sb->sb_mcnt -= 1;
1803807631bSJohn Baldwin 			if (n->m_flags & M_EXT) {
1813807631bSJohn Baldwin 				sb->sb_mbcnt -= n->m_ext.ext_size;
1823807631bSJohn Baldwin 				sb->sb_ccnt -= 1;
1833807631bSJohn Baldwin 			}
1843807631bSJohn Baldwin 			m_free(n);
1853807631bSJohn Baldwin 			n = m->m_next;
1863807631bSJohn Baldwin 		}
1873807631bSJohn Baldwin 	}
1883807631bSJohn Baldwin 	SBLASTRECORDCHK(sb);
1893807631bSJohn Baldwin 	SBLASTMBUFCHK(sb);
1903807631bSJohn Baldwin }
1913807631bSJohn Baldwin 
1923807631bSJohn Baldwin /*
19382334850SJohn Baldwin  * Mark ready "count" units of I/O starting with "m".  Most mbufs
19461664ee7SGleb Smirnoff  * count as a single unit of I/O except for M_EXTPG mbufs which
19561664ee7SGleb Smirnoff  * are backed by multiple pages.
1960f9d0a73SGleb Smirnoff  */
1970f9d0a73SGleb Smirnoff int
19882334850SJohn Baldwin sbready(struct sockbuf *sb, struct mbuf *m0, int count)
1990f9d0a73SGleb Smirnoff {
20082334850SJohn Baldwin 	struct mbuf *m;
2010f9d0a73SGleb Smirnoff 	u_int blocker;
2020f9d0a73SGleb Smirnoff 
2030f9d0a73SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
2040f9d0a73SGleb Smirnoff 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
20582334850SJohn Baldwin 	KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
2060f9d0a73SGleb Smirnoff 
20782334850SJohn Baldwin 	m = m0;
2080f9d0a73SGleb Smirnoff 	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
2090f9d0a73SGleb Smirnoff 
21082334850SJohn Baldwin 	while (count > 0) {
2110f9d0a73SGleb Smirnoff 		KASSERT(m->m_flags & M_NOTREADY,
2120f9d0a73SGleb Smirnoff 		    ("%s: m %p !M_NOTREADY", __func__, m));
21361664ee7SGleb Smirnoff 		if ((m->m_flags & M_EXTPG) != 0) {
2147b6c99d0SGleb Smirnoff 			if (count < m->m_epg_nrdy) {
2157b6c99d0SGleb Smirnoff 				m->m_epg_nrdy -= count;
21682334850SJohn Baldwin 				count = 0;
21782334850SJohn Baldwin 				break;
21882334850SJohn Baldwin 			}
2197b6c99d0SGleb Smirnoff 			count -= m->m_epg_nrdy;
2207b6c99d0SGleb Smirnoff 			m->m_epg_nrdy = 0;
22182334850SJohn Baldwin 		} else
22282334850SJohn Baldwin 			count--;
22382334850SJohn Baldwin 
2240f9d0a73SGleb Smirnoff 		m->m_flags &= ~(M_NOTREADY | blocker);
2250f9d0a73SGleb Smirnoff 		if (blocker)
2260f9d0a73SGleb Smirnoff 			sb->sb_acc += m->m_len;
22782334850SJohn Baldwin 		m = m->m_next;
2280f9d0a73SGleb Smirnoff 	}
2290f9d0a73SGleb Smirnoff 
23082334850SJohn Baldwin 	/*
23182334850SJohn Baldwin 	 * If the first mbuf is still not fully ready because only
23282334850SJohn Baldwin 	 * some of its backing pages were readied, no further progress
23382334850SJohn Baldwin 	 * can be made.
23482334850SJohn Baldwin 	 */
23582334850SJohn Baldwin 	if (m0 == m) {
23682334850SJohn Baldwin 		MPASS(m->m_flags & M_NOTREADY);
2370f9d0a73SGleb Smirnoff 		return (EINPROGRESS);
23882334850SJohn Baldwin 	}
23982334850SJohn Baldwin 
24082334850SJohn Baldwin 	if (!blocker) {
2413807631bSJohn Baldwin 		sbready_compress(sb, m0, m);
24282334850SJohn Baldwin 		return (EINPROGRESS);
24382334850SJohn Baldwin 	}
2440f9d0a73SGleb Smirnoff 
2450f9d0a73SGleb Smirnoff 	/* This one was blocking all the queue. */
2460f9d0a73SGleb Smirnoff 	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
2470f9d0a73SGleb Smirnoff 		KASSERT(m->m_flags & M_BLOCKED,
2480f9d0a73SGleb Smirnoff 		    ("%s: m %p !M_BLOCKED", __func__, m));
2490f9d0a73SGleb Smirnoff 		m->m_flags &= ~M_BLOCKED;
2500f9d0a73SGleb Smirnoff 		sb->sb_acc += m->m_len;
2510f9d0a73SGleb Smirnoff 	}
2520f9d0a73SGleb Smirnoff 
2530f9d0a73SGleb Smirnoff 	sb->sb_fnrdy = m;
2543807631bSJohn Baldwin 	sbready_compress(sb, m0, m);
2550f9d0a73SGleb Smirnoff 
2560f9d0a73SGleb Smirnoff 	return (0);
2570f9d0a73SGleb Smirnoff }
2580f9d0a73SGleb Smirnoff 
2590f9d0a73SGleb Smirnoff /*
2608967b220SGleb Smirnoff  * Adjust sockbuf state reflecting allocation of m.
2618967b220SGleb Smirnoff  */
2628967b220SGleb Smirnoff void
2638967b220SGleb Smirnoff sballoc(struct sockbuf *sb, struct mbuf *m)
2648967b220SGleb Smirnoff {
2658967b220SGleb Smirnoff 
2668967b220SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
2678967b220SGleb Smirnoff 
2680f9d0a73SGleb Smirnoff 	sb->sb_ccc += m->m_len;
2690f9d0a73SGleb Smirnoff 
2700f9d0a73SGleb Smirnoff 	if (sb->sb_fnrdy == NULL) {
2710f9d0a73SGleb Smirnoff 		if (m->m_flags & M_NOTREADY)
2720f9d0a73SGleb Smirnoff 			sb->sb_fnrdy = m;
2730f9d0a73SGleb Smirnoff 		else
2740f9d0a73SGleb Smirnoff 			sb->sb_acc += m->m_len;
2750f9d0a73SGleb Smirnoff 	} else
2760f9d0a73SGleb Smirnoff 		m->m_flags |= M_BLOCKED;
2778967b220SGleb Smirnoff 
2788967b220SGleb Smirnoff 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
2798967b220SGleb Smirnoff 		sb->sb_ctl += m->m_len;
2808967b220SGleb Smirnoff 
2818967b220SGleb Smirnoff 	sb->sb_mbcnt += MSIZE;
2828967b220SGleb Smirnoff 	sb->sb_mcnt += 1;
2838967b220SGleb Smirnoff 
2848967b220SGleb Smirnoff 	if (m->m_flags & M_EXT) {
2858967b220SGleb Smirnoff 		sb->sb_mbcnt += m->m_ext.ext_size;
2868967b220SGleb Smirnoff 		sb->sb_ccnt += 1;
2878967b220SGleb Smirnoff 	}
2888967b220SGleb Smirnoff }
2898967b220SGleb Smirnoff 
2908967b220SGleb Smirnoff /*
2918967b220SGleb Smirnoff  * Adjust sockbuf state reflecting freeing of m.
2928967b220SGleb Smirnoff  */
2938967b220SGleb Smirnoff void
2948967b220SGleb Smirnoff sbfree(struct sockbuf *sb, struct mbuf *m)
2958967b220SGleb Smirnoff {
2968967b220SGleb Smirnoff 
2978967b220SGleb Smirnoff #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
2988967b220SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
2998967b220SGleb Smirnoff #endif
3008967b220SGleb Smirnoff 
3010f9d0a73SGleb Smirnoff 	sb->sb_ccc -= m->m_len;
3020f9d0a73SGleb Smirnoff 
3030f9d0a73SGleb Smirnoff 	if (!(m->m_flags & M_NOTAVAIL))
3040f9d0a73SGleb Smirnoff 		sb->sb_acc -= m->m_len;
3050f9d0a73SGleb Smirnoff 
3060f9d0a73SGleb Smirnoff 	if (m == sb->sb_fnrdy) {
3070f9d0a73SGleb Smirnoff 		struct mbuf *n;
3080f9d0a73SGleb Smirnoff 
3090f9d0a73SGleb Smirnoff 		KASSERT(m->m_flags & M_NOTREADY,
3100f9d0a73SGleb Smirnoff 		    ("%s: m %p !M_NOTREADY", __func__, m));
3110f9d0a73SGleb Smirnoff 
3120f9d0a73SGleb Smirnoff 		n = m->m_next;
3130f9d0a73SGleb Smirnoff 		while (n != NULL && !(n->m_flags & M_NOTREADY)) {
3140f9d0a73SGleb Smirnoff 			n->m_flags &= ~M_BLOCKED;
3150f9d0a73SGleb Smirnoff 			sb->sb_acc += n->m_len;
3160f9d0a73SGleb Smirnoff 			n = n->m_next;
3170f9d0a73SGleb Smirnoff 		}
3180f9d0a73SGleb Smirnoff 		sb->sb_fnrdy = n;
3190f9d0a73SGleb Smirnoff 	}
3208967b220SGleb Smirnoff 
3218967b220SGleb Smirnoff 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
3228967b220SGleb Smirnoff 		sb->sb_ctl -= m->m_len;
3238967b220SGleb Smirnoff 
3248967b220SGleb Smirnoff 	sb->sb_mbcnt -= MSIZE;
3258967b220SGleb Smirnoff 	sb->sb_mcnt -= 1;
3268967b220SGleb Smirnoff 	if (m->m_flags & M_EXT) {
3278967b220SGleb Smirnoff 		sb->sb_mbcnt -= m->m_ext.ext_size;
3288967b220SGleb Smirnoff 		sb->sb_ccnt -= 1;
3298967b220SGleb Smirnoff 	}
3308967b220SGleb Smirnoff 
3318967b220SGleb Smirnoff 	if (sb->sb_sndptr == m) {
3328967b220SGleb Smirnoff 		sb->sb_sndptr = NULL;
3338967b220SGleb Smirnoff 		sb->sb_sndptroff = 0;
3348967b220SGleb Smirnoff 	}
3358967b220SGleb Smirnoff 	if (sb->sb_sndptroff != 0)
3368967b220SGleb Smirnoff 		sb->sb_sndptroff -= m->m_len;
3378967b220SGleb Smirnoff }
3388967b220SGleb Smirnoff 
339*3c0e5685SJohn Baldwin #ifdef KERN_TLS
340*3c0e5685SJohn Baldwin /*
341*3c0e5685SJohn Baldwin  * Similar to sballoc/sbfree but does not adjust state associated with
342*3c0e5685SJohn Baldwin  * the sb_mb chain such as sb_fnrdy or sb_sndptr*.  Also assumes mbufs
343*3c0e5685SJohn Baldwin  * are not ready.
344*3c0e5685SJohn Baldwin  */
345*3c0e5685SJohn Baldwin void
346*3c0e5685SJohn Baldwin sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
347*3c0e5685SJohn Baldwin {
348*3c0e5685SJohn Baldwin 
349*3c0e5685SJohn Baldwin 	SOCKBUF_LOCK_ASSERT(sb);
350*3c0e5685SJohn Baldwin 
351*3c0e5685SJohn Baldwin 	sb->sb_ccc += m->m_len;
352*3c0e5685SJohn Baldwin 	sb->sb_tlscc += m->m_len;
353*3c0e5685SJohn Baldwin 
354*3c0e5685SJohn Baldwin 	sb->sb_mbcnt += MSIZE;
355*3c0e5685SJohn Baldwin 	sb->sb_mcnt += 1;
356*3c0e5685SJohn Baldwin 
357*3c0e5685SJohn Baldwin 	if (m->m_flags & M_EXT) {
358*3c0e5685SJohn Baldwin 		sb->sb_mbcnt += m->m_ext.ext_size;
359*3c0e5685SJohn Baldwin 		sb->sb_ccnt += 1;
360*3c0e5685SJohn Baldwin 	}
361*3c0e5685SJohn Baldwin }
362*3c0e5685SJohn Baldwin 
363*3c0e5685SJohn Baldwin void
364*3c0e5685SJohn Baldwin sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
365*3c0e5685SJohn Baldwin {
366*3c0e5685SJohn Baldwin 
367*3c0e5685SJohn Baldwin #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
368*3c0e5685SJohn Baldwin 	SOCKBUF_LOCK_ASSERT(sb);
369*3c0e5685SJohn Baldwin #endif
370*3c0e5685SJohn Baldwin 
371*3c0e5685SJohn Baldwin 	sb->sb_ccc -= m->m_len;
372*3c0e5685SJohn Baldwin 	sb->sb_tlscc -= m->m_len;
373*3c0e5685SJohn Baldwin 
374*3c0e5685SJohn Baldwin 	sb->sb_mbcnt -= MSIZE;
375*3c0e5685SJohn Baldwin 	sb->sb_mcnt -= 1;
376*3c0e5685SJohn Baldwin 
377*3c0e5685SJohn Baldwin 	if (m->m_flags & M_EXT) {
378*3c0e5685SJohn Baldwin 		sb->sb_mbcnt -= m->m_ext.ext_size;
379*3c0e5685SJohn Baldwin 		sb->sb_ccnt -= 1;
380*3c0e5685SJohn Baldwin 	}
381*3c0e5685SJohn Baldwin }
382*3c0e5685SJohn Baldwin #endif
383*3c0e5685SJohn Baldwin 
3848967b220SGleb Smirnoff /*
385050ac265SRobert Watson  * Socantsendmore indicates that no more data will be sent on the socket; it
386050ac265SRobert Watson  * would normally be applied to a socket when the user informs the system
387050ac265SRobert Watson  * that no more data is to be sent, by the protocol code (in case
388050ac265SRobert Watson  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
389050ac265SRobert Watson  * received, and will normally be applied to the socket by a protocol when it
390050ac265SRobert Watson  * detects that the peer will send no more data.  Data queued for reading in
391050ac265SRobert Watson  * the socket may yet be read.
392df8bae1dSRodney W. Grimes  */
393a34b7046SRobert Watson void
394050ac265SRobert Watson socantsendmore_locked(struct socket *so)
395a34b7046SRobert Watson {
396a34b7046SRobert Watson 
397a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
398a34b7046SRobert Watson 
399a34b7046SRobert Watson 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
400a34b7046SRobert Watson 	sowwakeup_locked(so);
401a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
402a34b7046SRobert Watson }
403df8bae1dSRodney W. Grimes 
40426f9a767SRodney W. Grimes void
405050ac265SRobert Watson socantsendmore(struct socket *so)
406df8bae1dSRodney W. Grimes {
407df8bae1dSRodney W. Grimes 
408a34b7046SRobert Watson 	SOCKBUF_LOCK(&so->so_snd);
409a34b7046SRobert Watson 	socantsendmore_locked(so);
410a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
411a34b7046SRobert Watson }
412a34b7046SRobert Watson 
413a34b7046SRobert Watson void
414050ac265SRobert Watson socantrcvmore_locked(struct socket *so)
415a34b7046SRobert Watson {
416a34b7046SRobert Watson 
417a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
418a34b7046SRobert Watson 
419a34b7046SRobert Watson 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
420*3c0e5685SJohn Baldwin #ifdef KERN_TLS
421*3c0e5685SJohn Baldwin 	if (so->so_rcv.sb_flags & SB_TLS_RX)
422*3c0e5685SJohn Baldwin 		ktls_check_rx(&so->so_rcv);
423*3c0e5685SJohn Baldwin #endif
424a34b7046SRobert Watson 	sorwakeup_locked(so);
425a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
426df8bae1dSRodney W. Grimes }
427df8bae1dSRodney W. Grimes 
42826f9a767SRodney W. Grimes void
429050ac265SRobert Watson socantrcvmore(struct socket *so)
430df8bae1dSRodney W. Grimes {
431df8bae1dSRodney W. Grimes 
432a34b7046SRobert Watson 	SOCKBUF_LOCK(&so->so_rcv);
433a34b7046SRobert Watson 	socantrcvmore_locked(so);
434a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
435df8bae1dSRodney W. Grimes }
436df8bae1dSRodney W. Grimes 
437df8bae1dSRodney W. Grimes /*
438df8bae1dSRodney W. Grimes  * Wait for data to arrive at/drain from a socket buffer.
439df8bae1dSRodney W. Grimes  */
44026f9a767SRodney W. Grimes int
441050ac265SRobert Watson sbwait(struct sockbuf *sb)
442df8bae1dSRodney W. Grimes {
443df8bae1dSRodney W. Grimes 
44431f555a1SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
44531f555a1SRobert Watson 
446df8bae1dSRodney W. Grimes 	sb->sb_flags |= SB_WAIT;
4470f9d0a73SGleb Smirnoff 	return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx,
44847daf5d5SBruce Evans 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
4497729cbf1SDavide Italiano 	    sb->sb_timeo, 0, 0));
450df8bae1dSRodney W. Grimes }
451df8bae1dSRodney W. Grimes 
45226f9a767SRodney W. Grimes int
4537abab911SRobert Watson sblock(struct sockbuf *sb, int flags)
454df8bae1dSRodney W. Grimes {
455df8bae1dSRodney W. Grimes 
456265de5bbSRobert Watson 	KASSERT((flags & SBL_VALID) == flags,
457265de5bbSRobert Watson 	    ("sblock: flags invalid (0x%x)", flags));
458265de5bbSRobert Watson 
459265de5bbSRobert Watson 	if (flags & SBL_WAIT) {
460265de5bbSRobert Watson 		if ((sb->sb_flags & SB_NOINTR) ||
461265de5bbSRobert Watson 		    (flags & SBL_NOINTR)) {
4627abab911SRobert Watson 			sx_xlock(&sb->sb_sx);
463df8bae1dSRodney W. Grimes 			return (0);
464049c3b6cSRobert Watson 		}
465049c3b6cSRobert Watson 		return (sx_xlock_sig(&sb->sb_sx));
4667abab911SRobert Watson 	} else {
4677abab911SRobert Watson 		if (sx_try_xlock(&sb->sb_sx) == 0)
4687abab911SRobert Watson 			return (EWOULDBLOCK);
4697abab911SRobert Watson 		return (0);
4707abab911SRobert Watson 	}
4717abab911SRobert Watson }
4727abab911SRobert Watson 
4737abab911SRobert Watson void
4747abab911SRobert Watson sbunlock(struct sockbuf *sb)
4757abab911SRobert Watson {
4767abab911SRobert Watson 
4777abab911SRobert Watson 	sx_xunlock(&sb->sb_sx);
478df8bae1dSRodney W. Grimes }
479df8bae1dSRodney W. Grimes 
480df8bae1dSRodney W. Grimes /*
481050ac265SRobert Watson  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
482050ac265SRobert Watson  * via SIGIO if the socket has the SS_ASYNC flag set.
483a34b7046SRobert Watson  *
484a34b7046SRobert Watson  * Called with the socket buffer lock held; will release the lock by the end
485a34b7046SRobert Watson  * of the function.  This allows the caller to acquire the socket buffer lock
486a34b7046SRobert Watson  * while testing for the need for various sorts of wakeup and hold it through
487a34b7046SRobert Watson  * to the point where it's no longer required.  We currently hold the lock
488a34b7046SRobert Watson  * through calls out to other subsystems (with the exception of kqueue), and
489a34b7046SRobert Watson  * then release it to avoid lock order issues.  It's not clear that's
490a34b7046SRobert Watson  * correct.
491df8bae1dSRodney W. Grimes  */
49226f9a767SRodney W. Grimes void
493050ac265SRobert Watson sowakeup(struct socket *so, struct sockbuf *sb)
494df8bae1dSRodney W. Grimes {
49574fb0ba7SJohn Baldwin 	int ret;
496d48d4b25SSeigo Tanimura 
497a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
498a34b7046SRobert Watson 
499779f106aSGleb Smirnoff 	selwakeuppri(sb->sb_sel, PSOCK);
500779f106aSGleb Smirnoff 	if (!SEL_WAITING(sb->sb_sel))
501df8bae1dSRodney W. Grimes 		sb->sb_flags &= ~SB_SEL;
502df8bae1dSRodney W. Grimes 	if (sb->sb_flags & SB_WAIT) {
503df8bae1dSRodney W. Grimes 		sb->sb_flags &= ~SB_WAIT;
5040f9d0a73SGleb Smirnoff 		wakeup(&sb->sb_acc);
505df8bae1dSRodney W. Grimes 	}
506779f106aSGleb Smirnoff 	KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
50798c92369SNavdeep Parhar 	if (sb->sb_upcall != NULL) {
508eb1b1807SGleb Smirnoff 		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
50974fb0ba7SJohn Baldwin 		if (ret == SU_ISCONNECTED) {
51074fb0ba7SJohn Baldwin 			KASSERT(sb == &so->so_rcv,
51174fb0ba7SJohn Baldwin 			    ("SO_SND upcall returned SU_ISCONNECTED"));
51274fb0ba7SJohn Baldwin 			soupcall_clear(so, SO_RCV);
51374fb0ba7SJohn Baldwin 		}
51474fb0ba7SJohn Baldwin 	} else
51574fb0ba7SJohn Baldwin 		ret = SU_OK;
5164cc20ab1SSeigo Tanimura 	if (sb->sb_flags & SB_AIO)
517f3215338SJohn Baldwin 		sowakeup_aio(so, sb);
51874fb0ba7SJohn Baldwin 	SOCKBUF_UNLOCK(sb);
519555b3e2fSGleb Smirnoff 	if (ret == SU_ISCONNECTED)
52074fb0ba7SJohn Baldwin 		soisconnected(so);
52174fb0ba7SJohn Baldwin 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
52274fb0ba7SJohn Baldwin 		pgsigio(&so->so_sigio, SIGIO, 0);
523a34b7046SRobert Watson 	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
524df8bae1dSRodney W. Grimes }
525df8bae1dSRodney W. Grimes 
526df8bae1dSRodney W. Grimes /*
527df8bae1dSRodney W. Grimes  * Socket buffer (struct sockbuf) utility routines.
528df8bae1dSRodney W. Grimes  *
529050ac265SRobert Watson  * Each socket contains two socket buffers: one for sending data and one for
530050ac265SRobert Watson  * receiving data.  Each buffer contains a queue of mbufs, information about
531050ac265SRobert Watson  * the number of mbufs and amount of data in the queue, and other fields
532050ac265SRobert Watson  * allowing select() statements and notification on data availability to be
533050ac265SRobert Watson  * implemented.
534df8bae1dSRodney W. Grimes  *
535050ac265SRobert Watson  * Data stored in a socket buffer is maintained as a list of records.  Each
536050ac265SRobert Watson  * record is a list of mbufs chained together with the m_next field.  Records
537050ac265SRobert Watson  * are chained together with the m_nextpkt field. The upper level routine
538050ac265SRobert Watson  * soreceive() expects the following conventions to be observed when placing
539050ac265SRobert Watson  * information in the receive buffer:
540df8bae1dSRodney W. Grimes  *
541050ac265SRobert Watson  * 1. If the protocol requires each message be preceded by the sender's name,
542050ac265SRobert Watson  *    then a record containing that name must be present before any
543050ac265SRobert Watson  *    associated data (mbuf's must be of type MT_SONAME).
544050ac265SRobert Watson  * 2. If the protocol supports the exchange of ``access rights'' (really just
545050ac265SRobert Watson  *    additional data associated with the message), and there are ``rights''
546050ac265SRobert Watson  *    to be received, then a record containing this data should be present
547050ac265SRobert Watson  *    (mbuf's must be of type MT_RIGHTS).
548050ac265SRobert Watson  * 3. If a name or rights record exists, then it must be followed by a data
549050ac265SRobert Watson  *    record, perhaps of zero length.
550df8bae1dSRodney W. Grimes  *
551df8bae1dSRodney W. Grimes  * Before using a new socket structure it is first necessary to reserve
552df8bae1dSRodney W. Grimes  * buffer space to the socket, by calling sbreserve().  This should commit
553df8bae1dSRodney W. Grimes  * some of the available buffer space in the system buffer pool for the
554050ac265SRobert Watson  * socket (currently, it does nothing but enforce limits).  The space should
555050ac265SRobert Watson  * be released by calling sbrelease() when the socket is destroyed.
556df8bae1dSRodney W. Grimes  */
55726f9a767SRodney W. Grimes int
558050ac265SRobert Watson soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
559df8bae1dSRodney W. Grimes {
560b40ce416SJulian Elischer 	struct thread *td = curthread;
561df8bae1dSRodney W. Grimes 
5623f11a2f3SRobert Watson 	SOCKBUF_LOCK(&so->so_snd);
5639535efc0SRobert Watson 	SOCKBUF_LOCK(&so->so_rcv);
5643f11a2f3SRobert Watson 	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
5653f11a2f3SRobert Watson 		goto bad;
5663f11a2f3SRobert Watson 	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
5673f11a2f3SRobert Watson 		goto bad2;
568df8bae1dSRodney W. Grimes 	if (so->so_rcv.sb_lowat == 0)
569df8bae1dSRodney W. Grimes 		so->so_rcv.sb_lowat = 1;
570df8bae1dSRodney W. Grimes 	if (so->so_snd.sb_lowat == 0)
571df8bae1dSRodney W. Grimes 		so->so_snd.sb_lowat = MCLBYTES;
572df8bae1dSRodney W. Grimes 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
573df8bae1dSRodney W. Grimes 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
5743f11a2f3SRobert Watson 	SOCKBUF_UNLOCK(&so->so_rcv);
5759535efc0SRobert Watson 	SOCKBUF_UNLOCK(&so->so_snd);
576df8bae1dSRodney W. Grimes 	return (0);
577df8bae1dSRodney W. Grimes bad2:
5783f11a2f3SRobert Watson 	sbrelease_locked(&so->so_snd, so);
579df8bae1dSRodney W. Grimes bad:
5803f11a2f3SRobert Watson 	SOCKBUF_UNLOCK(&so->so_rcv);
5813f11a2f3SRobert Watson 	SOCKBUF_UNLOCK(&so->so_snd);
582df8bae1dSRodney W. Grimes 	return (ENOBUFS);
583df8bae1dSRodney W. Grimes }
584df8bae1dSRodney W. Grimes 
58579cb7eb4SDavid Greenman static int
58679cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
58779cb7eb4SDavid Greenman {
58879cb7eb4SDavid Greenman 	int error = 0;
58986a93d51SJohn Baldwin 	u_long tmp_sb_max = sb_max;
59079cb7eb4SDavid Greenman 
59186a93d51SJohn Baldwin 	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
59279cb7eb4SDavid Greenman 	if (error || !req->newptr)
59379cb7eb4SDavid Greenman 		return (error);
59486a93d51SJohn Baldwin 	if (tmp_sb_max < MSIZE + MCLBYTES)
59579cb7eb4SDavid Greenman 		return (EINVAL);
59686a93d51SJohn Baldwin 	sb_max = tmp_sb_max;
59779cb7eb4SDavid Greenman 	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
59879cb7eb4SDavid Greenman 	return (0);
59979cb7eb4SDavid Greenman }
60079cb7eb4SDavid Greenman 
601df8bae1dSRodney W. Grimes /*
602050ac265SRobert Watson  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
603050ac265SRobert Watson  * become limiting if buffering efficiency is near the normal case.
604df8bae1dSRodney W. Grimes  */
60526f9a767SRodney W. Grimes int
606050ac265SRobert Watson sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
607050ac265SRobert Watson     struct thread *td)
608df8bae1dSRodney W. Grimes {
60991d5354aSJohn Baldwin 	rlim_t sbsize_limit;
610ecf72308SBrian Feldman 
6113f11a2f3SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
6123f11a2f3SRobert Watson 
613ecf72308SBrian Feldman 	/*
6147978014dSRobert Watson 	 * When a thread is passed, we take into account the thread's socket
6157978014dSRobert Watson 	 * buffer size limit.  The caller will generally pass curthread, but
6167978014dSRobert Watson 	 * in the TCP input path, NULL will be passed to indicate that no
6177978014dSRobert Watson 	 * appropriate thread resource limits are available.  In that case,
6187978014dSRobert Watson 	 * we don't apply a process limit.
619ecf72308SBrian Feldman 	 */
62079cb7eb4SDavid Greenman 	if (cc > sb_max_adj)
621df8bae1dSRodney W. Grimes 		return (0);
62291d5354aSJohn Baldwin 	if (td != NULL) {
623f6f6d240SMateusz Guzik 		sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
62491d5354aSJohn Baldwin 	} else
62591d5354aSJohn Baldwin 		sbsize_limit = RLIM_INFINITY;
626f535380cSDon Lewis 	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
62791d5354aSJohn Baldwin 	    sbsize_limit))
628ecf72308SBrian Feldman 		return (0);
6294b29bc4fSGarrett Wollman 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
630df8bae1dSRodney W. Grimes 	if (sb->sb_lowat > sb->sb_hiwat)
631df8bae1dSRodney W. Grimes 		sb->sb_lowat = sb->sb_hiwat;
632df8bae1dSRodney W. Grimes 	return (1);
633df8bae1dSRodney W. Grimes }
634df8bae1dSRodney W. Grimes 
6353f11a2f3SRobert Watson int
63664290befSGleb Smirnoff sbsetopt(struct socket *so, int cmd, u_long cc)
6373f11a2f3SRobert Watson {
63864290befSGleb Smirnoff 	struct sockbuf *sb;
63964290befSGleb Smirnoff 	short *flags;
64064290befSGleb Smirnoff 	u_int *hiwat, *lowat;
6413f11a2f3SRobert Watson 	int error;
6423f11a2f3SRobert Watson 
643b2037136SMatt Macy 	sb = NULL;
64464290befSGleb Smirnoff 	SOCK_LOCK(so);
64564290befSGleb Smirnoff 	if (SOLISTENING(so)) {
64664290befSGleb Smirnoff 		switch (cmd) {
64764290befSGleb Smirnoff 			case SO_SNDLOWAT:
64864290befSGleb Smirnoff 			case SO_SNDBUF:
64964290befSGleb Smirnoff 				lowat = &so->sol_sbsnd_lowat;
65064290befSGleb Smirnoff 				hiwat = &so->sol_sbsnd_hiwat;
65164290befSGleb Smirnoff 				flags = &so->sol_sbsnd_flags;
65264290befSGleb Smirnoff 				break;
65364290befSGleb Smirnoff 			case SO_RCVLOWAT:
65464290befSGleb Smirnoff 			case SO_RCVBUF:
65564290befSGleb Smirnoff 				lowat = &so->sol_sbrcv_lowat;
65664290befSGleb Smirnoff 				hiwat = &so->sol_sbrcv_hiwat;
65764290befSGleb Smirnoff 				flags = &so->sol_sbrcv_flags;
65864290befSGleb Smirnoff 				break;
65964290befSGleb Smirnoff 		}
66064290befSGleb Smirnoff 	} else {
66164290befSGleb Smirnoff 		switch (cmd) {
66264290befSGleb Smirnoff 			case SO_SNDLOWAT:
66364290befSGleb Smirnoff 			case SO_SNDBUF:
66464290befSGleb Smirnoff 				sb = &so->so_snd;
66564290befSGleb Smirnoff 				break;
66664290befSGleb Smirnoff 			case SO_RCVLOWAT:
66764290befSGleb Smirnoff 			case SO_RCVBUF:
66864290befSGleb Smirnoff 				sb = &so->so_rcv;
66964290befSGleb Smirnoff 				break;
67064290befSGleb Smirnoff 		}
67164290befSGleb Smirnoff 		flags = &sb->sb_flags;
67264290befSGleb Smirnoff 		hiwat = &sb->sb_hiwat;
67364290befSGleb Smirnoff 		lowat = &sb->sb_lowat;
6743f11a2f3SRobert Watson 		SOCKBUF_LOCK(sb);
67564290befSGleb Smirnoff 	}
67664290befSGleb Smirnoff 
67764290befSGleb Smirnoff 	error = 0;
67864290befSGleb Smirnoff 	switch (cmd) {
67964290befSGleb Smirnoff 	case SO_SNDBUF:
68064290befSGleb Smirnoff 	case SO_RCVBUF:
68164290befSGleb Smirnoff 		if (SOLISTENING(so)) {
68264290befSGleb Smirnoff 			if (cc > sb_max_adj) {
68364290befSGleb Smirnoff 				error = ENOBUFS;
68464290befSGleb Smirnoff 				break;
68564290befSGleb Smirnoff 			}
68664290befSGleb Smirnoff 			*hiwat = cc;
68764290befSGleb Smirnoff 			if (*lowat > *hiwat)
68864290befSGleb Smirnoff 				*lowat = *hiwat;
68964290befSGleb Smirnoff 		} else {
69064290befSGleb Smirnoff 			if (!sbreserve_locked(sb, cc, so, curthread))
69164290befSGleb Smirnoff 				error = ENOBUFS;
69264290befSGleb Smirnoff 		}
69364290befSGleb Smirnoff 		if (error == 0)
69464290befSGleb Smirnoff 			*flags &= ~SB_AUTOSIZE;
69564290befSGleb Smirnoff 		break;
69664290befSGleb Smirnoff 	case SO_SNDLOWAT:
69764290befSGleb Smirnoff 	case SO_RCVLOWAT:
69864290befSGleb Smirnoff 		/*
69964290befSGleb Smirnoff 		 * Make sure the low-water is never greater than the
70064290befSGleb Smirnoff 		 * high-water.
70164290befSGleb Smirnoff 		 */
70264290befSGleb Smirnoff 		*lowat = (cc > *hiwat) ? *hiwat : cc;
70364290befSGleb Smirnoff 		break;
70464290befSGleb Smirnoff 	}
70564290befSGleb Smirnoff 
70664290befSGleb Smirnoff 	if (!SOLISTENING(so))
7073f11a2f3SRobert Watson 		SOCKBUF_UNLOCK(sb);
70864290befSGleb Smirnoff 	SOCK_UNLOCK(so);
7093f11a2f3SRobert Watson 	return (error);
7103f11a2f3SRobert Watson }
7113f11a2f3SRobert Watson 
712df8bae1dSRodney W. Grimes /*
713df8bae1dSRodney W. Grimes  * Free mbufs held by a socket, and reserved mbuf space.
714df8bae1dSRodney W. Grimes  */
7153f0bfcccSRobert Watson void
716050ac265SRobert Watson sbrelease_internal(struct sockbuf *sb, struct socket *so)
717eaa6dfbcSRobert Watson {
718eaa6dfbcSRobert Watson 
719eaa6dfbcSRobert Watson 	sbflush_internal(sb);
720eaa6dfbcSRobert Watson 	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
721eaa6dfbcSRobert Watson 	    RLIM_INFINITY);
722eaa6dfbcSRobert Watson 	sb->sb_mbmax = 0;
723eaa6dfbcSRobert Watson }
724eaa6dfbcSRobert Watson 
72526f9a767SRodney W. Grimes void
726050ac265SRobert Watson sbrelease_locked(struct sockbuf *sb, struct socket *so)
727df8bae1dSRodney W. Grimes {
728df8bae1dSRodney W. Grimes 
729a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
730a34b7046SRobert Watson 
731eaa6dfbcSRobert Watson 	sbrelease_internal(sb, so);
732df8bae1dSRodney W. Grimes }
733df8bae1dSRodney W. Grimes 
734a34b7046SRobert Watson void
735050ac265SRobert Watson sbrelease(struct sockbuf *sb, struct socket *so)
736a34b7046SRobert Watson {
737a34b7046SRobert Watson 
738a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
739a34b7046SRobert Watson 	sbrelease_locked(sb, so);
740a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
741a34b7046SRobert Watson }
742eaa6dfbcSRobert Watson 
743eaa6dfbcSRobert Watson void
744050ac265SRobert Watson sbdestroy(struct sockbuf *sb, struct socket *so)
745eaa6dfbcSRobert Watson {
746eaa6dfbcSRobert Watson 
747eaa6dfbcSRobert Watson 	sbrelease_internal(sb, so);
748b2e60773SJohn Baldwin #ifdef KERN_TLS
749b2e60773SJohn Baldwin 	if (sb->sb_tls_info != NULL)
750b2e60773SJohn Baldwin 		ktls_free(sb->sb_tls_info);
751b2e60773SJohn Baldwin 	sb->sb_tls_info = NULL;
752b2e60773SJohn Baldwin #endif
753eaa6dfbcSRobert Watson }
754eaa6dfbcSRobert Watson 
755df8bae1dSRodney W. Grimes /*
756050ac265SRobert Watson  * Routines to add and remove data from an mbuf queue.
757df8bae1dSRodney W. Grimes  *
758050ac265SRobert Watson  * The routines sbappend() or sbappendrecord() are normally called to append
759050ac265SRobert Watson  * new mbufs to a socket buffer, after checking that adequate space is
760050ac265SRobert Watson  * available, comparing the function sbspace() with the amount of data to be
761050ac265SRobert Watson  * added.  sbappendrecord() differs from sbappend() in that data supplied is
762050ac265SRobert Watson  * treated as the beginning of a new record.  To place a sender's address,
763050ac265SRobert Watson  * optional access rights, and data in a socket receive buffer,
764050ac265SRobert Watson  * sbappendaddr() should be used.  To place access rights and data in a
765050ac265SRobert Watson  * socket receive buffer, sbappendrights() should be used.  In either case,
766050ac265SRobert Watson  * the new data begins a new record.  Note that unlike sbappend() and
767050ac265SRobert Watson  * sbappendrecord(), these routines check for the caller that there will be
768050ac265SRobert Watson  * enough space to store the data.  Each fails if there is not enough space,
769050ac265SRobert Watson  * or if it cannot find mbufs to store additional information in.
770df8bae1dSRodney W. Grimes  *
771050ac265SRobert Watson  * Reliable protocols may use the socket send buffer to hold data awaiting
772050ac265SRobert Watson  * acknowledgement.  Data is normally copied from a socket send buffer in a
773050ac265SRobert Watson  * protocol with m_copy for output to a peer, and then removing the data from
774050ac265SRobert Watson  * the socket buffer with sbdrop() or sbdroprecord() when the data is
775050ac265SRobert Watson  * acknowledged by the peer.
776df8bae1dSRodney W. Grimes  */
777395bb186SSam Leffler #ifdef SOCKBUF_DEBUG
778395bb186SSam Leffler void
779395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line)
780395bb186SSam Leffler {
781395bb186SSam Leffler 	struct mbuf *m = sb->sb_mb;
782395bb186SSam Leffler 
783a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
784a34b7046SRobert Watson 
785395bb186SSam Leffler 	while (m && m->m_nextpkt)
786395bb186SSam Leffler 		m = m->m_nextpkt;
787395bb186SSam Leffler 
788395bb186SSam Leffler 	if (m != sb->sb_lastrecord) {
789395bb186SSam Leffler 		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
790395bb186SSam Leffler 			__func__, sb->sb_mb, sb->sb_lastrecord, m);
791395bb186SSam Leffler 		printf("packet chain:\n");
792395bb186SSam Leffler 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
793395bb186SSam Leffler 			printf("\t%p\n", m);
794395bb186SSam Leffler 		panic("%s from %s:%u", __func__, file, line);
795395bb186SSam Leffler 	}
796395bb186SSam Leffler }
797395bb186SSam Leffler 
798395bb186SSam Leffler void
799395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line)
800395bb186SSam Leffler {
801395bb186SSam Leffler 	struct mbuf *m = sb->sb_mb;
802395bb186SSam Leffler 	struct mbuf *n;
803395bb186SSam Leffler 
804a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
805a34b7046SRobert Watson 
806395bb186SSam Leffler 	while (m && m->m_nextpkt)
807395bb186SSam Leffler 		m = m->m_nextpkt;
808395bb186SSam Leffler 
809395bb186SSam Leffler 	while (m && m->m_next)
810395bb186SSam Leffler 		m = m->m_next;
811395bb186SSam Leffler 
812395bb186SSam Leffler 	if (m != sb->sb_mbtail) {
813395bb186SSam Leffler 		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
814395bb186SSam Leffler 			__func__, sb->sb_mb, sb->sb_mbtail, m);
815395bb186SSam Leffler 		printf("packet tree:\n");
816395bb186SSam Leffler 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
817395bb186SSam Leffler 			printf("\t");
818395bb186SSam Leffler 			for (n = m; n != NULL; n = n->m_next)
819395bb186SSam Leffler 				printf("%p ", n);
820395bb186SSam Leffler 			printf("\n");
821395bb186SSam Leffler 		}
822395bb186SSam Leffler 		panic("%s from %s:%u", __func__, file, line);
823395bb186SSam Leffler 	}
824*3c0e5685SJohn Baldwin 
825*3c0e5685SJohn Baldwin #ifdef KERN_TLS
826*3c0e5685SJohn Baldwin 	m = sb->sb_mtls;
827*3c0e5685SJohn Baldwin 	while (m && m->m_next)
828*3c0e5685SJohn Baldwin 		m = m->m_next;
829*3c0e5685SJohn Baldwin 
830*3c0e5685SJohn Baldwin 	if (m != sb->sb_mtlstail) {
831*3c0e5685SJohn Baldwin 		printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
832*3c0e5685SJohn Baldwin 			__func__, sb->sb_mtls, sb->sb_mtlstail, m);
833*3c0e5685SJohn Baldwin 		printf("TLS packet tree:\n");
834*3c0e5685SJohn Baldwin 		printf("\t");
835*3c0e5685SJohn Baldwin 		for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
836*3c0e5685SJohn Baldwin 			printf("%p ", m);
837*3c0e5685SJohn Baldwin 		}
838*3c0e5685SJohn Baldwin 		printf("\n");
839*3c0e5685SJohn Baldwin 		panic("%s from %s:%u", __func__, file, line);
840*3c0e5685SJohn Baldwin 	}
841*3c0e5685SJohn Baldwin #endif
842395bb186SSam Leffler }
843395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */
844395bb186SSam Leffler 
845395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do {					\
846a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);					\
847395bb186SSam Leffler 	if ((sb)->sb_lastrecord != NULL)				\
848395bb186SSam Leffler 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
849395bb186SSam Leffler 	else								\
850395bb186SSam Leffler 		(sb)->sb_mb = (m0);					\
851395bb186SSam Leffler 	(sb)->sb_lastrecord = (m0);					\
852395bb186SSam Leffler } while (/*CONSTCOND*/0)
853395bb186SSam Leffler 
854df8bae1dSRodney W. Grimes /*
855050ac265SRobert Watson  * Append mbuf chain m to the last record in the socket buffer sb.  The
856050ac265SRobert Watson  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
857050ac265SRobert Watson  * are discarded and mbufs are compacted where possible.
858df8bae1dSRodney W. Grimes  */
85926f9a767SRodney W. Grimes void
860829fae90SGleb Smirnoff sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
861df8bae1dSRodney W. Grimes {
862050ac265SRobert Watson 	struct mbuf *n;
863df8bae1dSRodney W. Grimes 
864a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
865a34b7046SRobert Watson 
866b85f65afSPedro F. Giffuni 	if (m == NULL)
867df8bae1dSRodney W. Grimes 		return;
868829fae90SGleb Smirnoff 	sbm_clrprotoflags(m, flags);
869395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
870797f2d22SPoul-Henning Kamp 	n = sb->sb_mb;
871797f2d22SPoul-Henning Kamp 	if (n) {
872df8bae1dSRodney W. Grimes 		while (n->m_nextpkt)
873df8bae1dSRodney W. Grimes 			n = n->m_nextpkt;
874df8bae1dSRodney W. Grimes 		do {
875df8bae1dSRodney W. Grimes 			if (n->m_flags & M_EOR) {
876a34b7046SRobert Watson 				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
877df8bae1dSRodney W. Grimes 				return;
878df8bae1dSRodney W. Grimes 			}
879df8bae1dSRodney W. Grimes 		} while (n->m_next && (n = n->m_next));
880395bb186SSam Leffler 	} else {
881395bb186SSam Leffler 		/*
882395bb186SSam Leffler 		 * XXX Would like to simply use sb_mbtail here, but
883395bb186SSam Leffler 		 * XXX I need to verify that I won't miss an EOR that
884395bb186SSam Leffler 		 * XXX way.
885395bb186SSam Leffler 		 */
886395bb186SSam Leffler 		if ((n = sb->sb_lastrecord) != NULL) {
887395bb186SSam Leffler 			do {
888395bb186SSam Leffler 				if (n->m_flags & M_EOR) {
889a34b7046SRobert Watson 					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
890395bb186SSam Leffler 					return;
891395bb186SSam Leffler 				}
892395bb186SSam Leffler 			} while (n->m_next && (n = n->m_next));
893395bb186SSam Leffler 		} else {
894395bb186SSam Leffler 			/*
895395bb186SSam Leffler 			 * If this is the first record in the socket buffer,
896395bb186SSam Leffler 			 * it's also the last record.
897395bb186SSam Leffler 			 */
898395bb186SSam Leffler 			sb->sb_lastrecord = m;
899395bb186SSam Leffler 		}
900df8bae1dSRodney W. Grimes 	}
901df8bae1dSRodney W. Grimes 	sbcompress(sb, m, n);
902395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
903395bb186SSam Leffler }
904395bb186SSam Leffler 
905395bb186SSam Leffler /*
906050ac265SRobert Watson  * Append mbuf chain m to the last record in the socket buffer sb.  The
907050ac265SRobert Watson  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
908050ac265SRobert Watson  * are discarded and mbufs are compacted where possible.
909a34b7046SRobert Watson  */
910a34b7046SRobert Watson void
911829fae90SGleb Smirnoff sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
912a34b7046SRobert Watson {
913a34b7046SRobert Watson 
914a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
915829fae90SGleb Smirnoff 	sbappend_locked(sb, m, flags);
916a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
917a34b7046SRobert Watson }
918a34b7046SRobert Watson 
919*3c0e5685SJohn Baldwin #ifdef KERN_TLS
920*3c0e5685SJohn Baldwin /*
921*3c0e5685SJohn Baldwin  * Append an mbuf containing encrypted TLS data.  The data
922*3c0e5685SJohn Baldwin  * is marked M_NOTREADY until it has been decrypted and
923*3c0e5685SJohn Baldwin  * stored as a TLS record.
924*3c0e5685SJohn Baldwin  */
925*3c0e5685SJohn Baldwin static void
926*3c0e5685SJohn Baldwin sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
927*3c0e5685SJohn Baldwin {
928*3c0e5685SJohn Baldwin 	struct mbuf *n;
929*3c0e5685SJohn Baldwin 
930*3c0e5685SJohn Baldwin 	SBLASTMBUFCHK(sb);
931*3c0e5685SJohn Baldwin 
932*3c0e5685SJohn Baldwin 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
933*3c0e5685SJohn Baldwin 	m_demote(m, 1, 0);
934*3c0e5685SJohn Baldwin 
935*3c0e5685SJohn Baldwin 	for (n = m; n != NULL; n = n->m_next)
936*3c0e5685SJohn Baldwin 		n->m_flags |= M_NOTREADY;
937*3c0e5685SJohn Baldwin 	sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
938*3c0e5685SJohn Baldwin 	ktls_check_rx(sb);
939*3c0e5685SJohn Baldwin }
940*3c0e5685SJohn Baldwin #endif
941*3c0e5685SJohn Baldwin 
942a34b7046SRobert Watson /*
943050ac265SRobert Watson  * This version of sbappend() should only be used when the caller absolutely
944050ac265SRobert Watson  * knows that there will never be more than one record in the socket buffer,
945050ac265SRobert Watson  * that is, a stream protocol (such as TCP).
946395bb186SSam Leffler  */
947395bb186SSam Leffler void
948651e4e6aSGleb Smirnoff sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
949395bb186SSam Leffler {
950a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
951395bb186SSam Leffler 
952395bb186SSam Leffler 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
953*3c0e5685SJohn Baldwin 
954*3c0e5685SJohn Baldwin #ifdef KERN_TLS
955*3c0e5685SJohn Baldwin 	/*
956*3c0e5685SJohn Baldwin 	 * Decrypted TLS records are appended as records via
957*3c0e5685SJohn Baldwin 	 * sbappendrecord().  TCP passes encrypted TLS records to this
958*3c0e5685SJohn Baldwin 	 * function which must be scheduled for decryption.
959*3c0e5685SJohn Baldwin 	 */
960*3c0e5685SJohn Baldwin 	if (sb->sb_flags & SB_TLS_RX) {
961*3c0e5685SJohn Baldwin 		sbappend_ktls_rx(sb, m);
962*3c0e5685SJohn Baldwin 		return;
963*3c0e5685SJohn Baldwin 	}
964*3c0e5685SJohn Baldwin #endif
965*3c0e5685SJohn Baldwin 
966395bb186SSam Leffler 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
967395bb186SSam Leffler 
968395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
969395bb186SSam Leffler 
970b2e60773SJohn Baldwin #ifdef KERN_TLS
971b2e60773SJohn Baldwin 	if (sb->sb_tls_info != NULL)
972b2e60773SJohn Baldwin 		ktls_seq(sb, m);
973b2e60773SJohn Baldwin #endif
974b2e60773SJohn Baldwin 
975844cacd1SGleb Smirnoff 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
976651e4e6aSGleb Smirnoff 	m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
977844cacd1SGleb Smirnoff 
978395bb186SSam Leffler 	sbcompress(sb, m, sb->sb_mbtail);
979395bb186SSam Leffler 
980395bb186SSam Leffler 	sb->sb_lastrecord = sb->sb_mb;
981395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
982df8bae1dSRodney W. Grimes }
983df8bae1dSRodney W. Grimes 
984a34b7046SRobert Watson /*
985050ac265SRobert Watson  * This version of sbappend() should only be used when the caller absolutely
986050ac265SRobert Watson  * knows that there will never be more than one record in the socket buffer,
987050ac265SRobert Watson  * that is, a stream protocol (such as TCP).
988a34b7046SRobert Watson  */
989a34b7046SRobert Watson void
990651e4e6aSGleb Smirnoff sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
991a34b7046SRobert Watson {
992a34b7046SRobert Watson 
993a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
994651e4e6aSGleb Smirnoff 	sbappendstream_locked(sb, m, flags);
995a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
996a34b7046SRobert Watson }
997a34b7046SRobert Watson 
998df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG
99926f9a767SRodney W. Grimes void
100057f43a45SGleb Smirnoff sbcheck(struct sockbuf *sb, const char *file, int line)
1001df8bae1dSRodney W. Grimes {
10020f9d0a73SGleb Smirnoff 	struct mbuf *m, *n, *fnrdy;
10030f9d0a73SGleb Smirnoff 	u_long acc, ccc, mbcnt;
1004*3c0e5685SJohn Baldwin #ifdef KERN_TLS
1005*3c0e5685SJohn Baldwin 	u_long tlscc;
1006*3c0e5685SJohn Baldwin #endif
1007df8bae1dSRodney W. Grimes 
1008a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1009a34b7046SRobert Watson 
10100f9d0a73SGleb Smirnoff 	acc = ccc = mbcnt = 0;
10110f9d0a73SGleb Smirnoff 	fnrdy = NULL;
101257f43a45SGleb Smirnoff 
10130931333fSBill Fenner 	for (m = sb->sb_mb; m; m = n) {
10140931333fSBill Fenner 	    n = m->m_nextpkt;
10150931333fSBill Fenner 	    for (; m; m = m->m_next) {
101657f43a45SGleb Smirnoff 		if (m->m_len == 0) {
101757f43a45SGleb Smirnoff 			printf("sb %p empty mbuf %p\n", sb, m);
101857f43a45SGleb Smirnoff 			goto fail;
101957f43a45SGleb Smirnoff 		}
10200f9d0a73SGleb Smirnoff 		if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
10210f9d0a73SGleb Smirnoff 			if (m != sb->sb_fnrdy) {
10220f9d0a73SGleb Smirnoff 				printf("sb %p: fnrdy %p != m %p\n",
10230f9d0a73SGleb Smirnoff 				    sb, sb->sb_fnrdy, m);
10240f9d0a73SGleb Smirnoff 				goto fail;
10250f9d0a73SGleb Smirnoff 			}
10260f9d0a73SGleb Smirnoff 			fnrdy = m;
10270f9d0a73SGleb Smirnoff 		}
10280f9d0a73SGleb Smirnoff 		if (fnrdy) {
10290f9d0a73SGleb Smirnoff 			if (!(m->m_flags & M_NOTAVAIL)) {
10300f9d0a73SGleb Smirnoff 				printf("sb %p: fnrdy %p, m %p is avail\n",
10310f9d0a73SGleb Smirnoff 				    sb, sb->sb_fnrdy, m);
10320f9d0a73SGleb Smirnoff 				goto fail;
10330f9d0a73SGleb Smirnoff 			}
10340f9d0a73SGleb Smirnoff 		} else
10350f9d0a73SGleb Smirnoff 			acc += m->m_len;
10360f9d0a73SGleb Smirnoff 		ccc += m->m_len;
1037df8bae1dSRodney W. Grimes 		mbcnt += MSIZE;
1038313861b8SJulian Elischer 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
1039df8bae1dSRodney W. Grimes 			mbcnt += m->m_ext.ext_size;
10400931333fSBill Fenner 	    }
1041df8bae1dSRodney W. Grimes 	}
1042*3c0e5685SJohn Baldwin #ifdef KERN_TLS
1043*3c0e5685SJohn Baldwin 	/*
1044*3c0e5685SJohn Baldwin 	 * Account for mbufs "detached" by ktls_detach_record() while
1045*3c0e5685SJohn Baldwin 	 * they are decrypted by ktls_decrypt().  tlsdcc gives a count
1046*3c0e5685SJohn Baldwin 	 * of the detached bytes that are included in ccc.  The mbufs
1047*3c0e5685SJohn Baldwin 	 * and clusters are not included in the socket buffer
1048*3c0e5685SJohn Baldwin 	 * accounting.
1049*3c0e5685SJohn Baldwin 	 */
1050*3c0e5685SJohn Baldwin 	ccc += sb->sb_tlsdcc;
1051*3c0e5685SJohn Baldwin 
1052*3c0e5685SJohn Baldwin 	tlscc = 0;
1053*3c0e5685SJohn Baldwin 	for (m = sb->sb_mtls; m; m = m->m_next) {
1054*3c0e5685SJohn Baldwin 		if (m->m_nextpkt != NULL) {
1055*3c0e5685SJohn Baldwin 			printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
1056*3c0e5685SJohn Baldwin 			goto fail;
1057*3c0e5685SJohn Baldwin 		}
1058*3c0e5685SJohn Baldwin 		if ((m->m_flags & M_NOTREADY) == 0) {
1059*3c0e5685SJohn Baldwin 			printf("sb %p TLS mbuf %p ready\n", sb, m);
1060*3c0e5685SJohn Baldwin 			goto fail;
1061*3c0e5685SJohn Baldwin 		}
1062*3c0e5685SJohn Baldwin 		tlscc += m->m_len;
1063*3c0e5685SJohn Baldwin 		ccc += m->m_len;
1064*3c0e5685SJohn Baldwin 		mbcnt += MSIZE;
1065*3c0e5685SJohn Baldwin 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
1066*3c0e5685SJohn Baldwin 			mbcnt += m->m_ext.ext_size;
1067*3c0e5685SJohn Baldwin 	}
1068*3c0e5685SJohn Baldwin 
1069*3c0e5685SJohn Baldwin 	if (sb->sb_tlscc != tlscc) {
1070*3c0e5685SJohn Baldwin 		printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
1071*3c0e5685SJohn Baldwin 		    sb->sb_tlsdcc);
1072*3c0e5685SJohn Baldwin 		goto fail;
1073*3c0e5685SJohn Baldwin 	}
1074*3c0e5685SJohn Baldwin #endif
10750f9d0a73SGleb Smirnoff 	if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
10760f9d0a73SGleb Smirnoff 		printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
10770f9d0a73SGleb Smirnoff 		    acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
1078*3c0e5685SJohn Baldwin #ifdef KERN_TLS
1079*3c0e5685SJohn Baldwin 		printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
1080*3c0e5685SJohn Baldwin 		    sb->sb_tlsdcc);
1081*3c0e5685SJohn Baldwin #endif
108257f43a45SGleb Smirnoff 		goto fail;
1083df8bae1dSRodney W. Grimes 	}
108457f43a45SGleb Smirnoff 	return;
108557f43a45SGleb Smirnoff fail:
108657f43a45SGleb Smirnoff 	panic("%s from %s:%u", __func__, file, line);
1087df8bae1dSRodney W. Grimes }
1088df8bae1dSRodney W. Grimes #endif
1089df8bae1dSRodney W. Grimes 
1090df8bae1dSRodney W. Grimes /*
1091050ac265SRobert Watson  * As above, except the mbuf chain begins a new record.
1092df8bae1dSRodney W. Grimes  */
109326f9a767SRodney W. Grimes void
1094050ac265SRobert Watson sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
1095df8bae1dSRodney W. Grimes {
1096050ac265SRobert Watson 	struct mbuf *m;
1097df8bae1dSRodney W. Grimes 
1098a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1099a34b7046SRobert Watson 
1100b85f65afSPedro F. Giffuni 	if (m0 == NULL)
1101df8bae1dSRodney W. Grimes 		return;
110253b680caSGleb Smirnoff 	m_clrprotoflags(m0);
1103df8bae1dSRodney W. Grimes 	/*
1104050ac265SRobert Watson 	 * Put the first mbuf on the queue.  Note this permits zero length
1105050ac265SRobert Watson 	 * records.
1106df8bae1dSRodney W. Grimes 	 */
1107df8bae1dSRodney W. Grimes 	sballoc(sb, m0);
1108395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
1109395bb186SSam Leffler 	SBLINKRECORD(sb, m0);
1110e72a94adSMaksim Yevmenkin 	sb->sb_mbtail = m0;
1111df8bae1dSRodney W. Grimes 	m = m0->m_next;
1112df8bae1dSRodney W. Grimes 	m0->m_next = 0;
1113df8bae1dSRodney W. Grimes 	if (m && (m0->m_flags & M_EOR)) {
1114df8bae1dSRodney W. Grimes 		m0->m_flags &= ~M_EOR;
1115df8bae1dSRodney W. Grimes 		m->m_flags |= M_EOR;
1116df8bae1dSRodney W. Grimes 	}
1117e72a94adSMaksim Yevmenkin 	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
1118df8bae1dSRodney W. Grimes 	sbcompress(sb, m, m0);
1119df8bae1dSRodney W. Grimes }
1120df8bae1dSRodney W. Grimes 
1121df8bae1dSRodney W. Grimes /*
1122050ac265SRobert Watson  * As above, except the mbuf chain begins a new record.
1123a34b7046SRobert Watson  */
1124a34b7046SRobert Watson void
1125050ac265SRobert Watson sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1126a34b7046SRobert Watson {
1127a34b7046SRobert Watson 
1128a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
1129a34b7046SRobert Watson 	sbappendrecord_locked(sb, m0);
1130a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1131a34b7046SRobert Watson }
1132a34b7046SRobert Watson 
11338de34a88SAlan Somers /* Helper routine that appends data, control, and address to a sockbuf. */
11348de34a88SAlan Somers static int
11358de34a88SAlan Somers sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
11368de34a88SAlan Somers     struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
1137df8bae1dSRodney W. Grimes {
1138395bb186SSam Leffler 	struct mbuf *m, *n, *nlast;
1139c43cad1aSScott Long #if MSIZE <= 256
1140df8bae1dSRodney W. Grimes 	if (asa->sa_len > MLEN)
1141df8bae1dSRodney W. Grimes 		return (0);
1142c43cad1aSScott Long #endif
1143c8b59ea7SGleb Smirnoff 	m = m_get(M_NOWAIT, MT_SONAME);
1144c8b59ea7SGleb Smirnoff 	if (m == NULL)
1145df8bae1dSRodney W. Grimes 		return (0);
1146df8bae1dSRodney W. Grimes 	m->m_len = asa->sa_len;
114780208239SAlfred Perlstein 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
1148c33a2313SAndrey V. Elsukov 	if (m0) {
114953b680caSGleb Smirnoff 		m_clrprotoflags(m0);
115057386f5dSAndrey V. Elsukov 		m_tag_delete_chain(m0, NULL);
1151c33a2313SAndrey V. Elsukov 		/*
1152c33a2313SAndrey V. Elsukov 		 * Clear some persistent info from pkthdr.
1153c33a2313SAndrey V. Elsukov 		 * We don't use m_demote(), because some netgraph consumers
1154c33a2313SAndrey V. Elsukov 		 * expect M_PKTHDR presence.
1155c33a2313SAndrey V. Elsukov 		 */
1156c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.rcvif = NULL;
1157c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.flowid = 0;
1158c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.csum_flags = 0;
1159c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.fibnum = 0;
1160c33a2313SAndrey V. Elsukov 		m0->m_pkthdr.rsstype = 0;
1161c33a2313SAndrey V. Elsukov 	}
11628de34a88SAlan Somers 	if (ctrl_last)
11638de34a88SAlan Somers 		ctrl_last->m_next = m0;	/* concatenate data to control */
1164df8bae1dSRodney W. Grimes 	else
1165df8bae1dSRodney W. Grimes 		control = m0;
1166df8bae1dSRodney W. Grimes 	m->m_next = control;
1167395bb186SSam Leffler 	for (n = m; n->m_next != NULL; n = n->m_next)
1168df8bae1dSRodney W. Grimes 		sballoc(sb, n);
1169395bb186SSam Leffler 	sballoc(sb, n);
1170395bb186SSam Leffler 	nlast = n;
1171395bb186SSam Leffler 	SBLINKRECORD(sb, m);
1172395bb186SSam Leffler 
1173395bb186SSam Leffler 	sb->sb_mbtail = nlast;
1174395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
1175395bb186SSam Leffler 
1176395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
1177df8bae1dSRodney W. Grimes 	return (1);
1178df8bae1dSRodney W. Grimes }
1179df8bae1dSRodney W. Grimes 
1180a34b7046SRobert Watson /*
1181050ac265SRobert Watson  * Append address and data, and optionally, control (ancillary) data to the
1182050ac265SRobert Watson  * receive queue of a socket.  If present, m0 must include a packet header
1183050ac265SRobert Watson  * with total length.  Returns 0 if no space in sockbuf or insufficient
1184050ac265SRobert Watson  * mbufs.
1185a34b7046SRobert Watson  */
118626f9a767SRodney W. Grimes int
11878de34a88SAlan Somers sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
11888de34a88SAlan Somers     struct mbuf *m0, struct mbuf *control)
11898de34a88SAlan Somers {
11908de34a88SAlan Somers 	struct mbuf *ctrl_last;
11918de34a88SAlan Somers 	int space = asa->sa_len;
11928de34a88SAlan Somers 
11938de34a88SAlan Somers 	SOCKBUF_LOCK_ASSERT(sb);
11948de34a88SAlan Somers 
11958de34a88SAlan Somers 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
11968de34a88SAlan Somers 		panic("sbappendaddr_locked");
11978de34a88SAlan Somers 	if (m0)
11988de34a88SAlan Somers 		space += m0->m_pkthdr.len;
11998de34a88SAlan Somers 	space += m_length(control, &ctrl_last);
12008de34a88SAlan Somers 
12018de34a88SAlan Somers 	if (space > sbspace(sb))
12028de34a88SAlan Somers 		return (0);
12038de34a88SAlan Somers 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
12048de34a88SAlan Somers }
12058de34a88SAlan Somers 
12068de34a88SAlan Somers /*
12078de34a88SAlan Somers  * Append address and data, and optionally, control (ancillary) data to the
12088de34a88SAlan Somers  * receive queue of a socket.  If present, m0 must include a packet header
12098de34a88SAlan Somers  * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
12108de34a88SAlan Somers  * on the receiving sockbuf.
12118de34a88SAlan Somers  */
12128de34a88SAlan Somers int
12138de34a88SAlan Somers sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
12148de34a88SAlan Somers     struct mbuf *m0, struct mbuf *control)
12158de34a88SAlan Somers {
12168de34a88SAlan Somers 	struct mbuf *ctrl_last;
12178de34a88SAlan Somers 
12188de34a88SAlan Somers 	SOCKBUF_LOCK_ASSERT(sb);
12198de34a88SAlan Somers 
12208de34a88SAlan Somers 	ctrl_last = (control == NULL) ? NULL : m_last(control);
12218de34a88SAlan Somers 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
12228de34a88SAlan Somers }
12238de34a88SAlan Somers 
12248de34a88SAlan Somers /*
12258de34a88SAlan Somers  * Append address and data, and optionally, control (ancillary) data to the
12268de34a88SAlan Somers  * receive queue of a socket.  If present, m0 must include a packet header
12278de34a88SAlan Somers  * with total length.  Returns 0 if no space in sockbuf or insufficient
12288de34a88SAlan Somers  * mbufs.
12298de34a88SAlan Somers  */
12308de34a88SAlan Somers int
1231050ac265SRobert Watson sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
1232050ac265SRobert Watson     struct mbuf *m0, struct mbuf *control)
1233a34b7046SRobert Watson {
1234a34b7046SRobert Watson 	int retval;
1235a34b7046SRobert Watson 
1236a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
1237a34b7046SRobert Watson 	retval = sbappendaddr_locked(sb, asa, m0, control);
1238a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1239a34b7046SRobert Watson 	return (retval);
1240a34b7046SRobert Watson }
1241a34b7046SRobert Watson 
12425b0480f2SMark Johnston void
1243050ac265SRobert Watson sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
124425f4ddfbSMark Johnston     struct mbuf *control, int flags)
1245df8bae1dSRodney W. Grimes {
12465b0480f2SMark Johnston 	struct mbuf *m, *mlast;
1247df8bae1dSRodney W. Grimes 
124825f4ddfbSMark Johnston 	sbm_clrprotoflags(m0, flags);
12495b0480f2SMark Johnston 	m_last(control)->m_next = m0;
1250395bb186SSam Leffler 
1251395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
1252395bb186SSam Leffler 
1253395bb186SSam Leffler 	for (m = control; m->m_next; m = m->m_next)
1254df8bae1dSRodney W. Grimes 		sballoc(sb, m);
1255395bb186SSam Leffler 	sballoc(sb, m);
1256395bb186SSam Leffler 	mlast = m;
1257395bb186SSam Leffler 	SBLINKRECORD(sb, control);
1258395bb186SSam Leffler 
1259395bb186SSam Leffler 	sb->sb_mbtail = mlast;
1260395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
1261395bb186SSam Leffler 
1262395bb186SSam Leffler 	SBLASTRECORDCHK(sb);
1263df8bae1dSRodney W. Grimes }
1264df8bae1dSRodney W. Grimes 
12655b0480f2SMark Johnston void
126625f4ddfbSMark Johnston sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
126725f4ddfbSMark Johnston     int flags)
1268a34b7046SRobert Watson {
1269a34b7046SRobert Watson 
1270a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
127125f4ddfbSMark Johnston 	sbappendcontrol_locked(sb, m0, control, flags);
1272a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1273a34b7046SRobert Watson }
1274a34b7046SRobert Watson 
1275df8bae1dSRodney W. Grimes /*
12767da7362bSRobert Watson  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
12777da7362bSRobert Watson  * (n).  If (n) is NULL, the buffer is presumed empty.
12787da7362bSRobert Watson  *
12797da7362bSRobert Watson  * When the data is compressed, mbufs in the chain may be handled in one of
12807da7362bSRobert Watson  * three ways:
12817da7362bSRobert Watson  *
12827da7362bSRobert Watson  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
12837da7362bSRobert Watson  *     record boundary, and no change in data type).
12847da7362bSRobert Watson  *
12857da7362bSRobert Watson  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
12867da7362bSRobert Watson  *     an mbuf already in the socket buffer.  This can occur if an
12870f9d0a73SGleb Smirnoff  *     appropriate mbuf exists, there is room, both mbufs are not marked as
12880f9d0a73SGleb Smirnoff  *     not ready, and no merging of data types will occur.
12897da7362bSRobert Watson  *
12907da7362bSRobert Watson  * (3) The mbuf may be appended to the end of the existing mbuf chain.
12917da7362bSRobert Watson  *
12927da7362bSRobert Watson  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
12937da7362bSRobert Watson  * end-of-record.
1294df8bae1dSRodney W. Grimes  */
129526f9a767SRodney W. Grimes void
1296050ac265SRobert Watson sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1297df8bae1dSRodney W. Grimes {
1298050ac265SRobert Watson 	int eor = 0;
1299050ac265SRobert Watson 	struct mbuf *o;
1300df8bae1dSRodney W. Grimes 
1301a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1302a34b7046SRobert Watson 
1303df8bae1dSRodney W. Grimes 	while (m) {
1304df8bae1dSRodney W. Grimes 		eor |= m->m_flags & M_EOR;
1305df8bae1dSRodney W. Grimes 		if (m->m_len == 0 &&
1306df8bae1dSRodney W. Grimes 		    (eor == 0 ||
1307df8bae1dSRodney W. Grimes 		     (((o = m->m_next) || (o = n)) &&
1308df8bae1dSRodney W. Grimes 		      o->m_type == m->m_type))) {
1309395bb186SSam Leffler 			if (sb->sb_lastrecord == m)
1310395bb186SSam Leffler 				sb->sb_lastrecord = m->m_next;
1311df8bae1dSRodney W. Grimes 			m = m_free(m);
1312df8bae1dSRodney W. Grimes 			continue;
1313df8bae1dSRodney W. Grimes 		}
131432af0d74SDavid Malone 		if (n && (n->m_flags & M_EOR) == 0 &&
131532af0d74SDavid Malone 		    M_WRITABLE(n) &&
13165e0f5cfaSKip Macy 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
13170f9d0a73SGleb Smirnoff 		    !(m->m_flags & M_NOTREADY) &&
13186edfd179SGleb Smirnoff 		    !(n->m_flags & (M_NOTREADY | M_EXTPG)) &&
1319b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m) &&
1320b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(n) &&
132132af0d74SDavid Malone 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
132232af0d74SDavid Malone 		    m->m_len <= M_TRAILINGSPACE(n) &&
1323df8bae1dSRodney W. Grimes 		    n->m_type == m->m_type) {
132482334850SJohn Baldwin 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
1325df8bae1dSRodney W. Grimes 			n->m_len += m->m_len;
13260f9d0a73SGleb Smirnoff 			sb->sb_ccc += m->m_len;
13270f9d0a73SGleb Smirnoff 			if (sb->sb_fnrdy == NULL)
13280f9d0a73SGleb Smirnoff 				sb->sb_acc += m->m_len;
132934333b16SAndre Oppermann 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
1330b3f1af6bSTim J. Robbins 				/* XXX: Probably don't need.*/
133104ac9b97SKelly Yancey 				sb->sb_ctl += m->m_len;
1332df8bae1dSRodney W. Grimes 			m = m_free(m);
1333df8bae1dSRodney W. Grimes 			continue;
1334df8bae1dSRodney W. Grimes 		}
13356edfd179SGleb Smirnoff 		if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) &&
1336b2e60773SJohn Baldwin 		    (m->m_flags & M_NOTREADY) == 0 &&
1337b2e60773SJohn Baldwin 		    !mbuf_has_tls_session(m))
133882334850SJohn Baldwin 			(void)mb_unmapped_compress(m);
1339df8bae1dSRodney W. Grimes 		if (n)
1340df8bae1dSRodney W. Grimes 			n->m_next = m;
1341df8bae1dSRodney W. Grimes 		else
1342df8bae1dSRodney W. Grimes 			sb->sb_mb = m;
1343395bb186SSam Leffler 		sb->sb_mbtail = m;
1344df8bae1dSRodney W. Grimes 		sballoc(sb, m);
1345df8bae1dSRodney W. Grimes 		n = m;
1346df8bae1dSRodney W. Grimes 		m->m_flags &= ~M_EOR;
1347df8bae1dSRodney W. Grimes 		m = m->m_next;
1348df8bae1dSRodney W. Grimes 		n->m_next = 0;
1349df8bae1dSRodney W. Grimes 	}
1350df8bae1dSRodney W. Grimes 	if (eor) {
13517da7362bSRobert Watson 		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
1352df8bae1dSRodney W. Grimes 		n->m_flags |= eor;
1353df8bae1dSRodney W. Grimes 	}
1354395bb186SSam Leffler 	SBLASTMBUFCHK(sb);
1355df8bae1dSRodney W. Grimes }
1356df8bae1dSRodney W. Grimes 
1357*3c0e5685SJohn Baldwin #ifdef KERN_TLS
1358*3c0e5685SJohn Baldwin /*
1359*3c0e5685SJohn Baldwin  * A version of sbcompress() for encrypted TLS RX mbufs.  These mbufs
1360*3c0e5685SJohn Baldwin  * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
1361*3c0e5685SJohn Baldwin  * a bit simpler (no EOR markers, always MT_DATA, etc.).
1362*3c0e5685SJohn Baldwin  */
1363*3c0e5685SJohn Baldwin static void
1364*3c0e5685SJohn Baldwin sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1365*3c0e5685SJohn Baldwin {
1366*3c0e5685SJohn Baldwin 
1367*3c0e5685SJohn Baldwin 	SOCKBUF_LOCK_ASSERT(sb);
1368*3c0e5685SJohn Baldwin 
1369*3c0e5685SJohn Baldwin 	while (m) {
1370*3c0e5685SJohn Baldwin 		KASSERT((m->m_flags & M_EOR) == 0,
1371*3c0e5685SJohn Baldwin 		    ("TLS RX mbuf %p with EOR", m));
1372*3c0e5685SJohn Baldwin 		KASSERT(m->m_type == MT_DATA,
1373*3c0e5685SJohn Baldwin 		    ("TLS RX mbuf %p is not MT_DATA", m));
1374*3c0e5685SJohn Baldwin 		KASSERT((m->m_flags & M_NOTREADY) != 0,
1375*3c0e5685SJohn Baldwin 		    ("TLS RX mbuf %p ready", m));
1376*3c0e5685SJohn Baldwin 		KASSERT((m->m_flags & M_EXTPG) == 0,
1377*3c0e5685SJohn Baldwin 		    ("TLS RX mbuf %p unmapped", m));
1378*3c0e5685SJohn Baldwin 
1379*3c0e5685SJohn Baldwin 		if (m->m_len == 0) {
1380*3c0e5685SJohn Baldwin 			m = m_free(m);
1381*3c0e5685SJohn Baldwin 			continue;
1382*3c0e5685SJohn Baldwin 		}
1383*3c0e5685SJohn Baldwin 
1384*3c0e5685SJohn Baldwin 		/*
1385*3c0e5685SJohn Baldwin 		 * Even though both 'n' and 'm' are NOTREADY, it's ok
1386*3c0e5685SJohn Baldwin 		 * to coalesce the data.
1387*3c0e5685SJohn Baldwin 		 */
1388*3c0e5685SJohn Baldwin 		if (n &&
1389*3c0e5685SJohn Baldwin 		    M_WRITABLE(n) &&
1390*3c0e5685SJohn Baldwin 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
1391*3c0e5685SJohn Baldwin 		    !(n->m_flags & (M_EXTPG)) &&
1392*3c0e5685SJohn Baldwin 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1393*3c0e5685SJohn Baldwin 		    m->m_len <= M_TRAILINGSPACE(n)) {
1394*3c0e5685SJohn Baldwin 			m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
1395*3c0e5685SJohn Baldwin 			n->m_len += m->m_len;
1396*3c0e5685SJohn Baldwin 			sb->sb_ccc += m->m_len;
1397*3c0e5685SJohn Baldwin 			sb->sb_tlscc += m->m_len;
1398*3c0e5685SJohn Baldwin 			m = m_free(m);
1399*3c0e5685SJohn Baldwin 			continue;
1400*3c0e5685SJohn Baldwin 		}
1401*3c0e5685SJohn Baldwin 		if (n)
1402*3c0e5685SJohn Baldwin 			n->m_next = m;
1403*3c0e5685SJohn Baldwin 		else
1404*3c0e5685SJohn Baldwin 			sb->sb_mtls = m;
1405*3c0e5685SJohn Baldwin 		sb->sb_mtlstail = m;
1406*3c0e5685SJohn Baldwin 		sballoc_ktls_rx(sb, m);
1407*3c0e5685SJohn Baldwin 		n = m;
1408*3c0e5685SJohn Baldwin 		m = m->m_next;
1409*3c0e5685SJohn Baldwin 		n->m_next = NULL;
1410*3c0e5685SJohn Baldwin 	}
1411*3c0e5685SJohn Baldwin 	SBLASTMBUFCHK(sb);
1412*3c0e5685SJohn Baldwin }
1413*3c0e5685SJohn Baldwin #endif
1414*3c0e5685SJohn Baldwin 
1415df8bae1dSRodney W. Grimes /*
1416050ac265SRobert Watson  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
1417df8bae1dSRodney W. Grimes  */
1418eaa6dfbcSRobert Watson static void
1419050ac265SRobert Watson sbflush_internal(struct sockbuf *sb)
1420df8bae1dSRodney W. Grimes {
1421df8bae1dSRodney W. Grimes 
1422*3c0e5685SJohn Baldwin 	while (sb->sb_mbcnt || sb->sb_tlsdcc) {
142323f84772SPierre Beyssac 		/*
1424761a9a1fSGleb Smirnoff 		 * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
142523f84772SPierre Beyssac 		 * we would loop forever. Panic instead.
142623f84772SPierre Beyssac 		 */
14270f9d0a73SGleb Smirnoff 		if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
142823f84772SPierre Beyssac 			break;
14290f9d0a73SGleb Smirnoff 		m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
143023f84772SPierre Beyssac 	}
14310f9d0a73SGleb Smirnoff 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
14320f9d0a73SGleb Smirnoff 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
14330f9d0a73SGleb Smirnoff 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
1434a34b7046SRobert Watson }
1435a34b7046SRobert Watson 
1436a34b7046SRobert Watson void
1437050ac265SRobert Watson sbflush_locked(struct sockbuf *sb)
1438eaa6dfbcSRobert Watson {
1439eaa6dfbcSRobert Watson 
1440eaa6dfbcSRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1441eaa6dfbcSRobert Watson 	sbflush_internal(sb);
1442eaa6dfbcSRobert Watson }
1443eaa6dfbcSRobert Watson 
1444eaa6dfbcSRobert Watson void
1445050ac265SRobert Watson sbflush(struct sockbuf *sb)
1446a34b7046SRobert Watson {
1447a34b7046SRobert Watson 
1448a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
1449a34b7046SRobert Watson 	sbflush_locked(sb);
1450a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1451df8bae1dSRodney W. Grimes }
1452df8bae1dSRodney W. Grimes 
1453df8bae1dSRodney W. Grimes /*
14541d2df300SGleb Smirnoff  * Cut data from (the front of) a sockbuf.
1455df8bae1dSRodney W. Grimes  */
14561d2df300SGleb Smirnoff static struct mbuf *
14571d2df300SGleb Smirnoff sbcut_internal(struct sockbuf *sb, int len)
1458df8bae1dSRodney W. Grimes {
14590f9d0a73SGleb Smirnoff 	struct mbuf *m, *next, *mfree;
1460*3c0e5685SJohn Baldwin 	bool is_tls;
1461df8bae1dSRodney W. Grimes 
1462f41b2de7SHiren Panchasara 	KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
1463b5b023b9SHiren Panchasara 	    __func__, len));
1464b5b023b9SHiren Panchasara 	KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
1465b5b023b9SHiren Panchasara 	    __func__, len, sb->sb_ccc));
1466b5b023b9SHiren Panchasara 
1467df8bae1dSRodney W. Grimes 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
1468*3c0e5685SJohn Baldwin 	is_tls = false;
14691d2df300SGleb Smirnoff 	mfree = NULL;
14701d2df300SGleb Smirnoff 
1471df8bae1dSRodney W. Grimes 	while (len > 0) {
14728146bcfeSGleb Smirnoff 		if (m == NULL) {
1473*3c0e5685SJohn Baldwin #ifdef KERN_TLS
1474*3c0e5685SJohn Baldwin 			if (next == NULL && !is_tls) {
1475*3c0e5685SJohn Baldwin 				if (sb->sb_tlsdcc != 0) {
1476*3c0e5685SJohn Baldwin 					MPASS(len >= sb->sb_tlsdcc);
1477*3c0e5685SJohn Baldwin 					len -= sb->sb_tlsdcc;
1478*3c0e5685SJohn Baldwin 					sb->sb_ccc -= sb->sb_tlsdcc;
1479*3c0e5685SJohn Baldwin 					sb->sb_tlsdcc = 0;
1480*3c0e5685SJohn Baldwin 					if (len == 0)
1481*3c0e5685SJohn Baldwin 						break;
1482*3c0e5685SJohn Baldwin 				}
1483*3c0e5685SJohn Baldwin 				next = sb->sb_mtls;
1484*3c0e5685SJohn Baldwin 				is_tls = true;
1485*3c0e5685SJohn Baldwin 			}
1486*3c0e5685SJohn Baldwin #endif
14878146bcfeSGleb Smirnoff 			KASSERT(next, ("%s: no next, len %d", __func__, len));
1488df8bae1dSRodney W. Grimes 			m = next;
1489df8bae1dSRodney W. Grimes 			next = m->m_nextpkt;
1490df8bae1dSRodney W. Grimes 		}
1491df8bae1dSRodney W. Grimes 		if (m->m_len > len) {
14920f9d0a73SGleb Smirnoff 			KASSERT(!(m->m_flags & M_NOTAVAIL),
14930f9d0a73SGleb Smirnoff 			    ("%s: m %p M_NOTAVAIL", __func__, m));
1494df8bae1dSRodney W. Grimes 			m->m_len -= len;
1495df8bae1dSRodney W. Grimes 			m->m_data += len;
14960f9d0a73SGleb Smirnoff 			sb->sb_ccc -= len;
14970f9d0a73SGleb Smirnoff 			sb->sb_acc -= len;
14984e023759SAndre Oppermann 			if (sb->sb_sndptroff != 0)
14994e023759SAndre Oppermann 				sb->sb_sndptroff -= len;
150034333b16SAndre Oppermann 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
150104ac9b97SKelly Yancey 				sb->sb_ctl -= len;
1502df8bae1dSRodney W. Grimes 			break;
1503df8bae1dSRodney W. Grimes 		}
1504df8bae1dSRodney W. Grimes 		len -= m->m_len;
1505*3c0e5685SJohn Baldwin #ifdef KERN_TLS
1506*3c0e5685SJohn Baldwin 		if (is_tls)
1507*3c0e5685SJohn Baldwin 			sbfree_ktls_rx(sb, m);
1508*3c0e5685SJohn Baldwin 		else
1509*3c0e5685SJohn Baldwin #endif
1510df8bae1dSRodney W. Grimes 			sbfree(sb, m);
15110f9d0a73SGleb Smirnoff 		/*
15120f9d0a73SGleb Smirnoff 		 * Do not put M_NOTREADY buffers to the free list, they
15130f9d0a73SGleb Smirnoff 		 * are referenced from outside.
15140f9d0a73SGleb Smirnoff 		 */
1515*3c0e5685SJohn Baldwin 		if (m->m_flags & M_NOTREADY && !is_tls)
15160f9d0a73SGleb Smirnoff 			m = m->m_next;
15170f9d0a73SGleb Smirnoff 		else {
15180f9d0a73SGleb Smirnoff 			struct mbuf *n;
15190f9d0a73SGleb Smirnoff 
15201d2df300SGleb Smirnoff 			n = m->m_next;
15211d2df300SGleb Smirnoff 			m->m_next = mfree;
15221d2df300SGleb Smirnoff 			mfree = m;
15231d2df300SGleb Smirnoff 			m = n;
1524df8bae1dSRodney W. Grimes 		}
15250f9d0a73SGleb Smirnoff 	}
1526e834a840SGleb Smirnoff 	/*
1527e834a840SGleb Smirnoff 	 * Free any zero-length mbufs from the buffer.
1528e834a840SGleb Smirnoff 	 * For SOCK_DGRAM sockets such mbufs represent empty records.
1529e834a840SGleb Smirnoff 	 * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
1530e834a840SGleb Smirnoff 	 * when sosend_generic() needs to send only control data.
1531e834a840SGleb Smirnoff 	 */
1532e834a840SGleb Smirnoff 	while (m && m->m_len == 0) {
1533e834a840SGleb Smirnoff 		struct mbuf *n;
1534e834a840SGleb Smirnoff 
1535e834a840SGleb Smirnoff 		sbfree(sb, m);
1536e834a840SGleb Smirnoff 		n = m->m_next;
1537e834a840SGleb Smirnoff 		m->m_next = mfree;
1538e834a840SGleb Smirnoff 		mfree = m;
1539e834a840SGleb Smirnoff 		m = n;
1540e834a840SGleb Smirnoff 	}
1541*3c0e5685SJohn Baldwin #ifdef KERN_TLS
1542*3c0e5685SJohn Baldwin 	if (is_tls) {
1543*3c0e5685SJohn Baldwin 		sb->sb_mb = NULL;
1544*3c0e5685SJohn Baldwin 		sb->sb_mtls = m;
1545*3c0e5685SJohn Baldwin 		if (m == NULL)
1546*3c0e5685SJohn Baldwin 			sb->sb_mtlstail = NULL;
1547*3c0e5685SJohn Baldwin 	} else
1548*3c0e5685SJohn Baldwin #endif
1549df8bae1dSRodney W. Grimes 	if (m) {
1550df8bae1dSRodney W. Grimes 		sb->sb_mb = m;
1551df8bae1dSRodney W. Grimes 		m->m_nextpkt = next;
1552df8bae1dSRodney W. Grimes 	} else
1553df8bae1dSRodney W. Grimes 		sb->sb_mb = next;
1554395bb186SSam Leffler 	/*
1555050ac265SRobert Watson 	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
1556050ac265SRobert Watson 	 * sb_lastrecord is up-to-date if we dropped part of the last record.
1557395bb186SSam Leffler 	 */
1558395bb186SSam Leffler 	m = sb->sb_mb;
1559395bb186SSam Leffler 	if (m == NULL) {
1560395bb186SSam Leffler 		sb->sb_mbtail = NULL;
1561395bb186SSam Leffler 		sb->sb_lastrecord = NULL;
1562395bb186SSam Leffler 	} else if (m->m_nextpkt == NULL) {
1563395bb186SSam Leffler 		sb->sb_lastrecord = m;
1564395bb186SSam Leffler 	}
15651d2df300SGleb Smirnoff 
15661d2df300SGleb Smirnoff 	return (mfree);
1567df8bae1dSRodney W. Grimes }
1568df8bae1dSRodney W. Grimes 
1569df8bae1dSRodney W. Grimes /*
1570a34b7046SRobert Watson  * Drop data from (the front of) a sockbuf.
1571a34b7046SRobert Watson  */
1572a34b7046SRobert Watson void
1573050ac265SRobert Watson sbdrop_locked(struct sockbuf *sb, int len)
1574eaa6dfbcSRobert Watson {
1575eaa6dfbcSRobert Watson 
1576eaa6dfbcSRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
15771d2df300SGleb Smirnoff 	m_freem(sbcut_internal(sb, len));
15781d2df300SGleb Smirnoff }
1579eaa6dfbcSRobert Watson 
15801d2df300SGleb Smirnoff /*
15811d2df300SGleb Smirnoff  * Drop data from (the front of) a sockbuf,
15821d2df300SGleb Smirnoff  * and return it to caller.
15831d2df300SGleb Smirnoff  */
15841d2df300SGleb Smirnoff struct mbuf *
15851d2df300SGleb Smirnoff sbcut_locked(struct sockbuf *sb, int len)
15861d2df300SGleb Smirnoff {
15871d2df300SGleb Smirnoff 
15881d2df300SGleb Smirnoff 	SOCKBUF_LOCK_ASSERT(sb);
15891d2df300SGleb Smirnoff 	return (sbcut_internal(sb, len));
1590eaa6dfbcSRobert Watson }
1591eaa6dfbcSRobert Watson 
1592eaa6dfbcSRobert Watson void
1593050ac265SRobert Watson sbdrop(struct sockbuf *sb, int len)
1594a34b7046SRobert Watson {
15951d2df300SGleb Smirnoff 	struct mbuf *mfree;
1596a34b7046SRobert Watson 
1597a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
15981d2df300SGleb Smirnoff 	mfree = sbcut_internal(sb, len);
1599a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
16001d2df300SGleb Smirnoff 
16011d2df300SGleb Smirnoff 	m_freem(mfree);
1602a34b7046SRobert Watson }
1603a34b7046SRobert Watson 
160489e560f4SRandall Stewart struct mbuf *
160589e560f4SRandall Stewart sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
160689e560f4SRandall Stewart {
160789e560f4SRandall Stewart 	struct mbuf *m;
160889e560f4SRandall Stewart 
160989e560f4SRandall Stewart 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
161089e560f4SRandall Stewart 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
161189e560f4SRandall Stewart 		*moff = off;
161289e560f4SRandall Stewart 		if (sb->sb_sndptr == NULL) {
161389e560f4SRandall Stewart 			sb->sb_sndptr = sb->sb_mb;
161489e560f4SRandall Stewart 			sb->sb_sndptroff = 0;
161589e560f4SRandall Stewart 		}
161689e560f4SRandall Stewart 		return (sb->sb_mb);
161789e560f4SRandall Stewart 	} else {
161889e560f4SRandall Stewart 		m = sb->sb_sndptr;
161989e560f4SRandall Stewart 		off -= sb->sb_sndptroff;
162089e560f4SRandall Stewart 	}
162189e560f4SRandall Stewart 	*moff = off;
162289e560f4SRandall Stewart 	return (m);
162389e560f4SRandall Stewart }
162489e560f4SRandall Stewart 
162589e560f4SRandall Stewart void
162689e560f4SRandall Stewart sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
162789e560f4SRandall Stewart {
162889e560f4SRandall Stewart 	/*
162989e560f4SRandall Stewart 	 * A small copy was done, advance forward the sb_sbsndptr to cover
163089e560f4SRandall Stewart 	 * it.
163189e560f4SRandall Stewart 	 */
163289e560f4SRandall Stewart 	struct mbuf *m;
163389e560f4SRandall Stewart 
163489e560f4SRandall Stewart 	if (mb != sb->sb_sndptr) {
163589e560f4SRandall Stewart 		/* Did not copyout at the same mbuf */
163689e560f4SRandall Stewart 		return;
163789e560f4SRandall Stewart 	}
163889e560f4SRandall Stewart 	m = mb;
163989e560f4SRandall Stewart 	while (m && (len > 0)) {
164089e560f4SRandall Stewart 		if (len >= m->m_len) {
164189e560f4SRandall Stewart 			len -= m->m_len;
164289e560f4SRandall Stewart 			if (m->m_next) {
164389e560f4SRandall Stewart 				sb->sb_sndptroff += m->m_len;
164489e560f4SRandall Stewart 				sb->sb_sndptr = m->m_next;
164589e560f4SRandall Stewart 			}
164689e560f4SRandall Stewart 			m = m->m_next;
164789e560f4SRandall Stewart 		} else {
164889e560f4SRandall Stewart 			len = 0;
164989e560f4SRandall Stewart 		}
165089e560f4SRandall Stewart 	}
165189e560f4SRandall Stewart }
165289e560f4SRandall Stewart 
1653a34b7046SRobert Watson /*
16549fd573c3SHans Petter Selasky  * Return the first mbuf and the mbuf data offset for the provided
16559fd573c3SHans Petter Selasky  * send offset without changing the "sb_sndptroff" field.
16569fd573c3SHans Petter Selasky  */
16579fd573c3SHans Petter Selasky struct mbuf *
16589fd573c3SHans Petter Selasky sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
16599fd573c3SHans Petter Selasky {
16609fd573c3SHans Petter Selasky 	struct mbuf *m;
16619fd573c3SHans Petter Selasky 
16629fd573c3SHans Petter Selasky 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
16639fd573c3SHans Petter Selasky 
16649fd573c3SHans Petter Selasky 	/*
16659fd573c3SHans Petter Selasky 	 * If the "off" is below the stored offset, which happens on
16669fd573c3SHans Petter Selasky 	 * retransmits, just use "sb_mb":
16679fd573c3SHans Petter Selasky 	 */
16689fd573c3SHans Petter Selasky 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
16699fd573c3SHans Petter Selasky 		m = sb->sb_mb;
16709fd573c3SHans Petter Selasky 	} else {
16719fd573c3SHans Petter Selasky 		m = sb->sb_sndptr;
16729fd573c3SHans Petter Selasky 		off -= sb->sb_sndptroff;
16739fd573c3SHans Petter Selasky 	}
16749fd573c3SHans Petter Selasky 	while (off > 0 && m != NULL) {
16759fd573c3SHans Petter Selasky 		if (off < m->m_len)
16769fd573c3SHans Petter Selasky 			break;
16779fd573c3SHans Petter Selasky 		off -= m->m_len;
16789fd573c3SHans Petter Selasky 		m = m->m_next;
16799fd573c3SHans Petter Selasky 	}
16809fd573c3SHans Petter Selasky 	*moff = off;
16819fd573c3SHans Petter Selasky 	return (m);
16829fd573c3SHans Petter Selasky }
16839fd573c3SHans Petter Selasky 
16849fd573c3SHans Petter Selasky /*
1685050ac265SRobert Watson  * Drop a record off the front of a sockbuf and move the next record to the
1686050ac265SRobert Watson  * front.
1687df8bae1dSRodney W. Grimes  */
168826f9a767SRodney W. Grimes void
1689050ac265SRobert Watson sbdroprecord_locked(struct sockbuf *sb)
1690df8bae1dSRodney W. Grimes {
1691050ac265SRobert Watson 	struct mbuf *m;
1692df8bae1dSRodney W. Grimes 
1693a34b7046SRobert Watson 	SOCKBUF_LOCK_ASSERT(sb);
1694a34b7046SRobert Watson 
1695df8bae1dSRodney W. Grimes 	m = sb->sb_mb;
1696df8bae1dSRodney W. Grimes 	if (m) {
1697df8bae1dSRodney W. Grimes 		sb->sb_mb = m->m_nextpkt;
1698df8bae1dSRodney W. Grimes 		do {
1699df8bae1dSRodney W. Grimes 			sbfree(sb, m);
1700ecde8f7cSMatthew Dillon 			m = m_free(m);
1701797f2d22SPoul-Henning Kamp 		} while (m);
1702df8bae1dSRodney W. Grimes 	}
1703395bb186SSam Leffler 	SB_EMPTY_FIXUP(sb);
1704df8bae1dSRodney W. Grimes }
17051e4ad9ceSGarrett Wollman 
170682c23ebaSBill Fenner /*
1707050ac265SRobert Watson  * Drop a record off the front of a sockbuf and move the next record to the
1708050ac265SRobert Watson  * front.
1709a34b7046SRobert Watson  */
1710a34b7046SRobert Watson void
1711050ac265SRobert Watson sbdroprecord(struct sockbuf *sb)
1712a34b7046SRobert Watson {
1713a34b7046SRobert Watson 
1714a34b7046SRobert Watson 	SOCKBUF_LOCK(sb);
1715a34b7046SRobert Watson 	sbdroprecord_locked(sb);
1716a34b7046SRobert Watson 	SOCKBUF_UNLOCK(sb);
1717a34b7046SRobert Watson }
1718a34b7046SRobert Watson 
171920d9e5e8SRobert Watson /*
17208c799760SRobert Watson  * Create a "control" mbuf containing the specified data with the specified
17218c799760SRobert Watson  * type for presentation on a socket buffer.
172220d9e5e8SRobert Watson  */
172320d9e5e8SRobert Watson struct mbuf *
1724*3c0e5685SJohn Baldwin sbcreatecontrol_how(void *p, int size, int type, int level, int wait)
172520d9e5e8SRobert Watson {
1726d19e16a7SRobert Watson 	struct cmsghdr *cp;
172720d9e5e8SRobert Watson 	struct mbuf *m;
172820d9e5e8SRobert Watson 
1729*3c0e5685SJohn Baldwin 	MBUF_CHECKSLEEP(wait);
173020d9e5e8SRobert Watson 	if (CMSG_SPACE((u_int)size) > MCLBYTES)
173120d9e5e8SRobert Watson 		return ((struct mbuf *) NULL);
173220d9e5e8SRobert Watson 	if (CMSG_SPACE((u_int)size) > MLEN)
1733*3c0e5685SJohn Baldwin 		m = m_getcl(wait, MT_CONTROL, 0);
173420d9e5e8SRobert Watson 	else
1735*3c0e5685SJohn Baldwin 		m = m_get(wait, MT_CONTROL);
173620d9e5e8SRobert Watson 	if (m == NULL)
173720d9e5e8SRobert Watson 		return ((struct mbuf *) NULL);
173820d9e5e8SRobert Watson 	cp = mtod(m, struct cmsghdr *);
173920d9e5e8SRobert Watson 	m->m_len = 0;
174020d9e5e8SRobert Watson 	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
174120d9e5e8SRobert Watson 	    ("sbcreatecontrol: short mbuf"));
17422827952eSXin LI 	/*
17432827952eSXin LI 	 * Don't leave the padding between the msg header and the
17442827952eSXin LI 	 * cmsg data and the padding after the cmsg data un-initialized.
17452827952eSXin LI 	 */
17462827952eSXin LI 	bzero(cp, CMSG_SPACE((u_int)size));
174720d9e5e8SRobert Watson 	if (p != NULL)
174820d9e5e8SRobert Watson 		(void)memcpy(CMSG_DATA(cp), p, size);
174920d9e5e8SRobert Watson 	m->m_len = CMSG_SPACE(size);
175020d9e5e8SRobert Watson 	cp->cmsg_len = CMSG_LEN(size);
175120d9e5e8SRobert Watson 	cp->cmsg_level = level;
175220d9e5e8SRobert Watson 	cp->cmsg_type = type;
175320d9e5e8SRobert Watson 	return (m);
175420d9e5e8SRobert Watson }
175520d9e5e8SRobert Watson 
1756*3c0e5685SJohn Baldwin struct mbuf *
1757*3c0e5685SJohn Baldwin sbcreatecontrol(caddr_t p, int size, int type, int level)
1758*3c0e5685SJohn Baldwin {
1759*3c0e5685SJohn Baldwin 
1760*3c0e5685SJohn Baldwin 	return (sbcreatecontrol_how(p, size, type, level, M_NOWAIT));
1761*3c0e5685SJohn Baldwin }
1762*3c0e5685SJohn Baldwin 
176320d9e5e8SRobert Watson /*
17648c799760SRobert Watson  * This does the same for socket buffers that sotoxsocket does for sockets:
17658c799760SRobert Watson  * generate an user-format data structure describing the socket buffer.  Note
17668c799760SRobert Watson  * that the xsockbuf structure, since it is always embedded in a socket, does
17678c799760SRobert Watson  * not include a self pointer nor a length.  We make this entry point public
17688c799760SRobert Watson  * in case some other mechanism needs it.
176920d9e5e8SRobert Watson  */
177020d9e5e8SRobert Watson void
177120d9e5e8SRobert Watson sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
177220d9e5e8SRobert Watson {
1773d19e16a7SRobert Watson 
17740f9d0a73SGleb Smirnoff 	xsb->sb_cc = sb->sb_ccc;
177520d9e5e8SRobert Watson 	xsb->sb_hiwat = sb->sb_hiwat;
177620d9e5e8SRobert Watson 	xsb->sb_mbcnt = sb->sb_mbcnt;
177749f287f8SGeorge V. Neville-Neil 	xsb->sb_mcnt = sb->sb_mcnt;
177849f287f8SGeorge V. Neville-Neil 	xsb->sb_ccnt = sb->sb_ccnt;
177920d9e5e8SRobert Watson 	xsb->sb_mbmax = sb->sb_mbmax;
178020d9e5e8SRobert Watson 	xsb->sb_lowat = sb->sb_lowat;
178120d9e5e8SRobert Watson 	xsb->sb_flags = sb->sb_flags;
178220d9e5e8SRobert Watson 	xsb->sb_timeo = sb->sb_timeo;
178320d9e5e8SRobert Watson }
178420d9e5e8SRobert Watson 
1785639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1786639acc13SGarrett Wollman static int dummy;
1787e8cdbb48SPawel Biernacki SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, "");
17887029da5cSPawel Biernacki SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
17897029da5cSPawel Biernacki     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &sb_max, 0,
17907029da5cSPawel Biernacki     sysctl_handle_sb_max, "LU",
17917029da5cSPawel Biernacki     "Maximum socket buffer size");
17921b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
17933eb9ab52SEitan Adler     &sb_efficiency, 0, "Socket buffer size waste factor");
1794