19454b2d8SWarner Losh /*-
251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni *
4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1990, 1993
5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved.
6df8bae1dSRodney W. Grimes *
7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes * are met:
10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution.
1569a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes * without specific prior written permission.
18df8bae1dSRodney W. Grimes *
19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes */
31df8bae1dSRodney W. Grimes
32677b542eSDavid E. O'Brien #include <sys/cdefs.h>
33b2e60773SJohn Baldwin #include "opt_kern_tls.h"
345b86eac4SJesper Skriver #include "opt_param.h"
35335654d7SRobert Watson
36df8bae1dSRodney W. Grimes #include <sys/param.h>
37960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */
38ff5c09daSGarrett Wollman #include <sys/kernel.h>
39b2e60773SJohn Baldwin #include <sys/ktls.h>
40fb919e4dSMark Murray #include <sys/lock.h>
418ec07310SGleb Smirnoff #include <sys/malloc.h>
42df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
43ec45f952SMark Johnston #include <sys/msan.h>
44960ed29cSSeigo Tanimura #include <sys/mutex.h>
45fb919e4dSMark Murray #include <sys/proc.h>
46df8bae1dSRodney W. Grimes #include <sys/protosw.h>
472f9a2132SBrian Feldman #include <sys/resourcevar.h>
48960ed29cSSeigo Tanimura #include <sys/signalvar.h>
49df8bae1dSRodney W. Grimes #include <sys/socket.h>
50df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
517abab911SRobert Watson #include <sys/sx.h>
52ff5c09daSGarrett Wollman #include <sys/sysctl.h>
5326f9a767SRodney W. Grimes
54fe8c78f0SHans Petter Selasky #include <netinet/in.h>
55fe8c78f0SHans Petter Selasky
56f14cce87SRobert Watson /*
57f14cce87SRobert Watson * Function pointer set by the AIO routines so that the socket buffer code
58f14cce87SRobert Watson * can call back into the AIO module if it is loaded.
59f14cce87SRobert Watson */
6021d56e9cSAlfred Perlstein void (*aio_swake)(struct socket *, struct sockbuf *);
6121d56e9cSAlfred Perlstein
62df8bae1dSRodney W. Grimes /*
63f14cce87SRobert Watson * Primitive routines for operating on socket buffers
64df8bae1dSRodney W. Grimes */
65df8bae1dSRodney W. Grimes
667b660faaSAlexander V. Chernikov #define BUF_MAX_ADJ(_sz) (((u_quad_t)(_sz)) * MCLBYTES / (MSIZE + MCLBYTES))
677b660faaSAlexander V. Chernikov
6879cb7eb4SDavid Greenman u_long sb_max = SB_MAX;
697b660faaSAlexander V. Chernikov u_long sb_max_adj = BUF_MAX_ADJ(SB_MAX);
70df8bae1dSRodney W. Grimes
714b29bc4fSGarrett Wollman static u_long sb_efficiency = 8; /* parameter for sbreserve() */
724b29bc4fSGarrett Wollman
73d1385ab2SMateusz Guzik #ifdef KERN_TLS
743c0e5685SJohn Baldwin static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
753c0e5685SJohn Baldwin struct mbuf *n);
76d1385ab2SMateusz Guzik #endif
771d2df300SGleb Smirnoff static struct mbuf *sbcut_internal(struct sockbuf *sb, int len);
78*67c1c4dfSGleb Smirnoff static void sbunreserve_locked(struct socket *so, sb_which which);
79eaa6dfbcSRobert Watson
80df8bae1dSRodney W. Grimes /*
81829fae90SGleb Smirnoff * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
82829fae90SGleb Smirnoff */
83829fae90SGleb Smirnoff static void
sbm_clrprotoflags(struct mbuf * m,int flags)84829fae90SGleb Smirnoff sbm_clrprotoflags(struct mbuf *m, int flags)
85829fae90SGleb Smirnoff {
86829fae90SGleb Smirnoff int mask;
87829fae90SGleb Smirnoff
88829fae90SGleb Smirnoff mask = ~M_PROTOFLAGS;
89829fae90SGleb Smirnoff if (flags & PRUS_NOTREADY)
90829fae90SGleb Smirnoff mask |= M_NOTREADY;
91829fae90SGleb Smirnoff while (m) {
92829fae90SGleb Smirnoff m->m_flags &= mask;
93829fae90SGleb Smirnoff m = m->m_next;
94829fae90SGleb Smirnoff }
95829fae90SGleb Smirnoff }
96829fae90SGleb Smirnoff
97829fae90SGleb Smirnoff /*
983807631bSJohn Baldwin * Compress M_NOTREADY mbufs after they have been readied by sbready().
993807631bSJohn Baldwin *
1003807631bSJohn Baldwin * sbcompress() skips M_NOTREADY mbufs since the data is not available to
1013807631bSJohn Baldwin * be copied at the time of sbcompress(). This function combines small
1023807631bSJohn Baldwin * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first
1033807631bSJohn Baldwin * mbuf sbready() marked ready, and 'end' is the first mbuf still not
1043807631bSJohn Baldwin * ready.
1053807631bSJohn Baldwin */
1063807631bSJohn Baldwin static void
sbready_compress(struct sockbuf * sb,struct mbuf * m0,struct mbuf * end)1073807631bSJohn Baldwin sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end)
1083807631bSJohn Baldwin {
1093807631bSJohn Baldwin struct mbuf *m, *n;
1103807631bSJohn Baldwin int ext_size;
1113807631bSJohn Baldwin
1123807631bSJohn Baldwin SOCKBUF_LOCK_ASSERT(sb);
1133807631bSJohn Baldwin
1143807631bSJohn Baldwin if ((sb->sb_flags & SB_NOCOALESCE) != 0)
1153807631bSJohn Baldwin return;
1163807631bSJohn Baldwin
1173807631bSJohn Baldwin for (m = m0; m != end; m = m->m_next) {
1183807631bSJohn Baldwin MPASS((m->m_flags & M_NOTREADY) == 0);
119c4ad247bSAndrew Gallatin /*
120c4ad247bSAndrew Gallatin * NB: In sbcompress(), 'n' is the last mbuf in the
121c4ad247bSAndrew Gallatin * socket buffer and 'm' is the new mbuf being copied
122c4ad247bSAndrew Gallatin * into the trailing space of 'n'. Here, the roles
123c4ad247bSAndrew Gallatin * are reversed and 'n' is the next mbuf after 'm'
124c4ad247bSAndrew Gallatin * that is being copied into the trailing space of
125c4ad247bSAndrew Gallatin * 'm'.
126c4ad247bSAndrew Gallatin */
127c4ad247bSAndrew Gallatin n = m->m_next;
128c4ad247bSAndrew Gallatin #ifdef KERN_TLS
129c4ad247bSAndrew Gallatin /* Try to coalesce adjacent ktls mbuf hdr/trailers. */
130c4ad247bSAndrew Gallatin if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
1316edfd179SGleb Smirnoff (m->m_flags & M_EXTPG) &&
1326edfd179SGleb Smirnoff (n->m_flags & M_EXTPG) &&
133c4ad247bSAndrew Gallatin !mbuf_has_tls_session(m) &&
134c4ad247bSAndrew Gallatin !mbuf_has_tls_session(n)) {
135c4ad247bSAndrew Gallatin int hdr_len, trail_len;
136c4ad247bSAndrew Gallatin
1377b6c99d0SGleb Smirnoff hdr_len = n->m_epg_hdrlen;
1387b6c99d0SGleb Smirnoff trail_len = m->m_epg_trllen;
139c4ad247bSAndrew Gallatin if (trail_len != 0 && hdr_len != 0 &&
140c4ad247bSAndrew Gallatin trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) {
141c4ad247bSAndrew Gallatin /* copy n's header to m's trailer */
14223feb563SAndrew Gallatin memcpy(&m->m_epg_trail[trail_len],
14323feb563SAndrew Gallatin n->m_epg_hdr, hdr_len);
1447b6c99d0SGleb Smirnoff m->m_epg_trllen += hdr_len;
145c4ad247bSAndrew Gallatin m->m_len += hdr_len;
1467b6c99d0SGleb Smirnoff n->m_epg_hdrlen = 0;
147c4ad247bSAndrew Gallatin n->m_len -= hdr_len;
148c4ad247bSAndrew Gallatin }
149c4ad247bSAndrew Gallatin }
150c4ad247bSAndrew Gallatin #endif
1513807631bSJohn Baldwin
1523807631bSJohn Baldwin /* Compress small unmapped mbufs into plain mbufs. */
1536edfd179SGleb Smirnoff if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN &&
154b2e60773SJohn Baldwin !mbuf_has_tls_session(m)) {
1553807631bSJohn Baldwin ext_size = m->m_ext.ext_size;
156d59bc188SGleb Smirnoff if (mb_unmapped_compress(m) == 0)
1573807631bSJohn Baldwin sb->sb_mbcnt -= ext_size;
1583807631bSJohn Baldwin }
1593807631bSJohn Baldwin
1603807631bSJohn Baldwin while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 &&
1613807631bSJohn Baldwin M_WRITABLE(m) &&
1626edfd179SGleb Smirnoff (m->m_flags & M_EXTPG) == 0 &&
163b2e60773SJohn Baldwin !mbuf_has_tls_session(n) &&
164b2e60773SJohn Baldwin !mbuf_has_tls_session(m) &&
1653807631bSJohn Baldwin n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
1663807631bSJohn Baldwin n->m_len <= M_TRAILINGSPACE(m) &&
1673807631bSJohn Baldwin m->m_type == n->m_type) {
1683807631bSJohn Baldwin KASSERT(sb->sb_lastrecord != n,
1693807631bSJohn Baldwin ("%s: merging start of record (%p) into previous mbuf (%p)",
1703807631bSJohn Baldwin __func__, n, m));
1713807631bSJohn Baldwin m_copydata(n, 0, n->m_len, mtodo(m, m->m_len));
1723807631bSJohn Baldwin m->m_len += n->m_len;
1733807631bSJohn Baldwin m->m_next = n->m_next;
1743807631bSJohn Baldwin m->m_flags |= n->m_flags & M_EOR;
1753807631bSJohn Baldwin if (sb->sb_mbtail == n)
1763807631bSJohn Baldwin sb->sb_mbtail = m;
1773807631bSJohn Baldwin
1783807631bSJohn Baldwin sb->sb_mbcnt -= MSIZE;
179d59bc188SGleb Smirnoff if (n->m_flags & M_EXT)
1803807631bSJohn Baldwin sb->sb_mbcnt -= n->m_ext.ext_size;
1813807631bSJohn Baldwin m_free(n);
1823807631bSJohn Baldwin n = m->m_next;
1833807631bSJohn Baldwin }
1843807631bSJohn Baldwin }
1853807631bSJohn Baldwin SBLASTRECORDCHK(sb);
1863807631bSJohn Baldwin SBLASTMBUFCHK(sb);
1873807631bSJohn Baldwin }
1883807631bSJohn Baldwin
1893807631bSJohn Baldwin /*
19082334850SJohn Baldwin * Mark ready "count" units of I/O starting with "m". Most mbufs
19161664ee7SGleb Smirnoff * count as a single unit of I/O except for M_EXTPG mbufs which
19261664ee7SGleb Smirnoff * are backed by multiple pages.
1930f9d0a73SGleb Smirnoff */
1940f9d0a73SGleb Smirnoff int
sbready(struct sockbuf * sb,struct mbuf * m0,int count)19582334850SJohn Baldwin sbready(struct sockbuf *sb, struct mbuf *m0, int count)
1960f9d0a73SGleb Smirnoff {
19782334850SJohn Baldwin struct mbuf *m;
1980f9d0a73SGleb Smirnoff u_int blocker;
1990f9d0a73SGleb Smirnoff
2000f9d0a73SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb);
2010f9d0a73SGleb Smirnoff KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
20282334850SJohn Baldwin KASSERT(count > 0, ("%s: invalid count %d", __func__, count));
2030f9d0a73SGleb Smirnoff
20482334850SJohn Baldwin m = m0;
2050f9d0a73SGleb Smirnoff blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
2060f9d0a73SGleb Smirnoff
20782334850SJohn Baldwin while (count > 0) {
2080f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY,
2090f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m));
210c2a8fd6fSJohn Baldwin if ((m->m_flags & M_EXTPG) != 0 && m->m_epg_npgs != 0) {
2117b6c99d0SGleb Smirnoff if (count < m->m_epg_nrdy) {
2127b6c99d0SGleb Smirnoff m->m_epg_nrdy -= count;
21382334850SJohn Baldwin count = 0;
21482334850SJohn Baldwin break;
21582334850SJohn Baldwin }
2167b6c99d0SGleb Smirnoff count -= m->m_epg_nrdy;
2177b6c99d0SGleb Smirnoff m->m_epg_nrdy = 0;
21882334850SJohn Baldwin } else
21982334850SJohn Baldwin count--;
22082334850SJohn Baldwin
2210f9d0a73SGleb Smirnoff m->m_flags &= ~(M_NOTREADY | blocker);
2220f9d0a73SGleb Smirnoff if (blocker)
2230f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len;
22482334850SJohn Baldwin m = m->m_next;
2250f9d0a73SGleb Smirnoff }
2260f9d0a73SGleb Smirnoff
22782334850SJohn Baldwin /*
22882334850SJohn Baldwin * If the first mbuf is still not fully ready because only
22982334850SJohn Baldwin * some of its backing pages were readied, no further progress
23082334850SJohn Baldwin * can be made.
23182334850SJohn Baldwin */
23282334850SJohn Baldwin if (m0 == m) {
23382334850SJohn Baldwin MPASS(m->m_flags & M_NOTREADY);
2340f9d0a73SGleb Smirnoff return (EINPROGRESS);
23582334850SJohn Baldwin }
23682334850SJohn Baldwin
23782334850SJohn Baldwin if (!blocker) {
2383807631bSJohn Baldwin sbready_compress(sb, m0, m);
23982334850SJohn Baldwin return (EINPROGRESS);
24082334850SJohn Baldwin }
2410f9d0a73SGleb Smirnoff
2420f9d0a73SGleb Smirnoff /* This one was blocking all the queue. */
2430f9d0a73SGleb Smirnoff for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
2440f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_BLOCKED,
2450f9d0a73SGleb Smirnoff ("%s: m %p !M_BLOCKED", __func__, m));
2460f9d0a73SGleb Smirnoff m->m_flags &= ~M_BLOCKED;
2470f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len;
2480f9d0a73SGleb Smirnoff }
2490f9d0a73SGleb Smirnoff
2500f9d0a73SGleb Smirnoff sb->sb_fnrdy = m;
2513807631bSJohn Baldwin sbready_compress(sb, m0, m);
2520f9d0a73SGleb Smirnoff
2530f9d0a73SGleb Smirnoff return (0);
2540f9d0a73SGleb Smirnoff }
2550f9d0a73SGleb Smirnoff
2560f9d0a73SGleb Smirnoff /*
2578967b220SGleb Smirnoff * Adjust sockbuf state reflecting allocation of m.
2588967b220SGleb Smirnoff */
2598967b220SGleb Smirnoff void
sballoc(struct sockbuf * sb,struct mbuf * m)2608967b220SGleb Smirnoff sballoc(struct sockbuf *sb, struct mbuf *m)
2618967b220SGleb Smirnoff {
2628967b220SGleb Smirnoff
2638967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb);
2648967b220SGleb Smirnoff
2650f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len;
2660f9d0a73SGleb Smirnoff
2670f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) {
2680f9d0a73SGleb Smirnoff if (m->m_flags & M_NOTREADY)
2690f9d0a73SGleb Smirnoff sb->sb_fnrdy = m;
2700f9d0a73SGleb Smirnoff else
2710f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len;
2720f9d0a73SGleb Smirnoff } else
2730f9d0a73SGleb Smirnoff m->m_flags |= M_BLOCKED;
2748967b220SGleb Smirnoff
2758967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
2768967b220SGleb Smirnoff sb->sb_ctl += m->m_len;
2778967b220SGleb Smirnoff
2788967b220SGleb Smirnoff sb->sb_mbcnt += MSIZE;
2798967b220SGleb Smirnoff
280d59bc188SGleb Smirnoff if (m->m_flags & M_EXT)
2818967b220SGleb Smirnoff sb->sb_mbcnt += m->m_ext.ext_size;
2828967b220SGleb Smirnoff }
2838967b220SGleb Smirnoff
2848967b220SGleb Smirnoff /*
2858967b220SGleb Smirnoff * Adjust sockbuf state reflecting freeing of m.
2868967b220SGleb Smirnoff */
2878967b220SGleb Smirnoff void
sbfree(struct sockbuf * sb,struct mbuf * m)2888967b220SGleb Smirnoff sbfree(struct sockbuf *sb, struct mbuf *m)
2898967b220SGleb Smirnoff {
2908967b220SGleb Smirnoff
2918967b220SGleb Smirnoff #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */
2928967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb);
2938967b220SGleb Smirnoff #endif
2948967b220SGleb Smirnoff
2950f9d0a73SGleb Smirnoff sb->sb_ccc -= m->m_len;
2960f9d0a73SGleb Smirnoff
2970f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL))
2980f9d0a73SGleb Smirnoff sb->sb_acc -= m->m_len;
2990f9d0a73SGleb Smirnoff
3000f9d0a73SGleb Smirnoff if (m == sb->sb_fnrdy) {
3010f9d0a73SGleb Smirnoff struct mbuf *n;
3020f9d0a73SGleb Smirnoff
3030f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY,
3040f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m));
3050f9d0a73SGleb Smirnoff
3060f9d0a73SGleb Smirnoff n = m->m_next;
3070f9d0a73SGleb Smirnoff while (n != NULL && !(n->m_flags & M_NOTREADY)) {
3080f9d0a73SGleb Smirnoff n->m_flags &= ~M_BLOCKED;
3090f9d0a73SGleb Smirnoff sb->sb_acc += n->m_len;
3100f9d0a73SGleb Smirnoff n = n->m_next;
3110f9d0a73SGleb Smirnoff }
3120f9d0a73SGleb Smirnoff sb->sb_fnrdy = n;
3130f9d0a73SGleb Smirnoff }
3148967b220SGleb Smirnoff
3158967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
3168967b220SGleb Smirnoff sb->sb_ctl -= m->m_len;
3178967b220SGleb Smirnoff
3188967b220SGleb Smirnoff sb->sb_mbcnt -= MSIZE;
319d59bc188SGleb Smirnoff if (m->m_flags & M_EXT)
3208967b220SGleb Smirnoff sb->sb_mbcnt -= m->m_ext.ext_size;
3218967b220SGleb Smirnoff
3228967b220SGleb Smirnoff if (sb->sb_sndptr == m) {
3238967b220SGleb Smirnoff sb->sb_sndptr = NULL;
3248967b220SGleb Smirnoff sb->sb_sndptroff = 0;
3258967b220SGleb Smirnoff }
3268967b220SGleb Smirnoff if (sb->sb_sndptroff != 0)
3278967b220SGleb Smirnoff sb->sb_sndptroff -= m->m_len;
3288967b220SGleb Smirnoff }
3298967b220SGleb Smirnoff
3303c0e5685SJohn Baldwin #ifdef KERN_TLS
3313c0e5685SJohn Baldwin /*
3323c0e5685SJohn Baldwin * Similar to sballoc/sbfree but does not adjust state associated with
3333c0e5685SJohn Baldwin * the sb_mb chain such as sb_fnrdy or sb_sndptr*. Also assumes mbufs
3343c0e5685SJohn Baldwin * are not ready.
3353c0e5685SJohn Baldwin */
3363c0e5685SJohn Baldwin void
sballoc_ktls_rx(struct sockbuf * sb,struct mbuf * m)3373c0e5685SJohn Baldwin sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
3383c0e5685SJohn Baldwin {
3393c0e5685SJohn Baldwin
3403c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb);
3413c0e5685SJohn Baldwin
3423c0e5685SJohn Baldwin sb->sb_ccc += m->m_len;
3433c0e5685SJohn Baldwin sb->sb_tlscc += m->m_len;
3443c0e5685SJohn Baldwin
3453c0e5685SJohn Baldwin sb->sb_mbcnt += MSIZE;
3463c0e5685SJohn Baldwin
347d59bc188SGleb Smirnoff if (m->m_flags & M_EXT)
3483c0e5685SJohn Baldwin sb->sb_mbcnt += m->m_ext.ext_size;
3493c0e5685SJohn Baldwin }
3503c0e5685SJohn Baldwin
3513c0e5685SJohn Baldwin void
sbfree_ktls_rx(struct sockbuf * sb,struct mbuf * m)3523c0e5685SJohn Baldwin sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
3533c0e5685SJohn Baldwin {
3543c0e5685SJohn Baldwin
3553c0e5685SJohn Baldwin #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */
3563c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb);
3573c0e5685SJohn Baldwin #endif
3583c0e5685SJohn Baldwin
3593c0e5685SJohn Baldwin sb->sb_ccc -= m->m_len;
3603c0e5685SJohn Baldwin sb->sb_tlscc -= m->m_len;
3613c0e5685SJohn Baldwin
3623c0e5685SJohn Baldwin sb->sb_mbcnt -= MSIZE;
3633c0e5685SJohn Baldwin
364d59bc188SGleb Smirnoff if (m->m_flags & M_EXT)
3653c0e5685SJohn Baldwin sb->sb_mbcnt -= m->m_ext.ext_size;
3663c0e5685SJohn Baldwin }
3673c0e5685SJohn Baldwin #endif
3683c0e5685SJohn Baldwin
3698967b220SGleb Smirnoff /*
370050ac265SRobert Watson * Socantsendmore indicates that no more data will be sent on the socket; it
371050ac265SRobert Watson * would normally be applied to a socket when the user informs the system
372050ac265SRobert Watson * that no more data is to be sent, by the protocol code (in case
373050ac265SRobert Watson * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be
374050ac265SRobert Watson * received, and will normally be applied to the socket by a protocol when it
375050ac265SRobert Watson * detects that the peer will send no more data. Data queued for reading in
376050ac265SRobert Watson * the socket may yet be read.
377df8bae1dSRodney W. Grimes */
378a34b7046SRobert Watson void
socantsendmore_locked(struct socket * so)379050ac265SRobert Watson socantsendmore_locked(struct socket *so)
380a34b7046SRobert Watson {
381a34b7046SRobert Watson
38243283184SGleb Smirnoff SOCK_SENDBUF_LOCK_ASSERT(so);
383a34b7046SRobert Watson
384a34b7046SRobert Watson so->so_snd.sb_state |= SBS_CANTSENDMORE;
385a34b7046SRobert Watson sowwakeup_locked(so);
38643283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK_ASSERT(so);
387a34b7046SRobert Watson }
388df8bae1dSRodney W. Grimes
38926f9a767SRodney W. Grimes void
socantsendmore(struct socket * so)390050ac265SRobert Watson socantsendmore(struct socket *so)
391df8bae1dSRodney W. Grimes {
392df8bae1dSRodney W. Grimes
39343283184SGleb Smirnoff SOCK_SENDBUF_LOCK(so);
394a34b7046SRobert Watson socantsendmore_locked(so);
39543283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK_ASSERT(so);
396a34b7046SRobert Watson }
397a34b7046SRobert Watson
398a34b7046SRobert Watson void
socantrcvmore_locked(struct socket * so)399050ac265SRobert Watson socantrcvmore_locked(struct socket *so)
400a34b7046SRobert Watson {
401a34b7046SRobert Watson
40243283184SGleb Smirnoff SOCK_RECVBUF_LOCK_ASSERT(so);
403a34b7046SRobert Watson
404a34b7046SRobert Watson so->so_rcv.sb_state |= SBS_CANTRCVMORE;
4053c0e5685SJohn Baldwin #ifdef KERN_TLS
4063c0e5685SJohn Baldwin if (so->so_rcv.sb_flags & SB_TLS_RX)
4073c0e5685SJohn Baldwin ktls_check_rx(&so->so_rcv);
4083c0e5685SJohn Baldwin #endif
409a34b7046SRobert Watson sorwakeup_locked(so);
41043283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so);
411df8bae1dSRodney W. Grimes }
412df8bae1dSRodney W. Grimes
41326f9a767SRodney W. Grimes void
socantrcvmore(struct socket * so)414050ac265SRobert Watson socantrcvmore(struct socket *so)
415df8bae1dSRodney W. Grimes {
416df8bae1dSRodney W. Grimes
41743283184SGleb Smirnoff SOCK_RECVBUF_LOCK(so);
418a34b7046SRobert Watson socantrcvmore_locked(so);
41943283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so);
420df8bae1dSRodney W. Grimes }
421df8bae1dSRodney W. Grimes
4227045b160SRoy Marples void
soroverflow_locked(struct socket * so)4237045b160SRoy Marples soroverflow_locked(struct socket *so)
4247045b160SRoy Marples {
4257045b160SRoy Marples
42643283184SGleb Smirnoff SOCK_RECVBUF_LOCK_ASSERT(so);
4277045b160SRoy Marples
4287045b160SRoy Marples if (so->so_options & SO_RERROR) {
4297045b160SRoy Marples so->so_rerror = ENOBUFS;
4307045b160SRoy Marples sorwakeup_locked(so);
4317045b160SRoy Marples } else
43243283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so);
4337045b160SRoy Marples
43443283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so);
4357045b160SRoy Marples }
4367045b160SRoy Marples
4377045b160SRoy Marples void
soroverflow(struct socket * so)4387045b160SRoy Marples soroverflow(struct socket *so)
4397045b160SRoy Marples {
4407045b160SRoy Marples
44143283184SGleb Smirnoff SOCK_RECVBUF_LOCK(so);
4427045b160SRoy Marples soroverflow_locked(so);
44343283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so);
4447045b160SRoy Marples }
4457045b160SRoy Marples
446df8bae1dSRodney W. Grimes /*
447df8bae1dSRodney W. Grimes * Wait for data to arrive at/drain from a socket buffer.
448df8bae1dSRodney W. Grimes */
44926f9a767SRodney W. Grimes int
sbwait(struct socket * so,sb_which which)45043283184SGleb Smirnoff sbwait(struct socket *so, sb_which which)
451df8bae1dSRodney W. Grimes {
45243283184SGleb Smirnoff struct sockbuf *sb;
453df8bae1dSRodney W. Grimes
45443283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which);
45531f555a1SRobert Watson
45643283184SGleb Smirnoff sb = sobuf(so, which);
457df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WAIT;
45843283184SGleb Smirnoff return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which),
4599d7fb768SGleb Smirnoff PSOCK | PCATCH, "sbwait", sb->sb_timeo, 0, 0));
460df8bae1dSRodney W. Grimes }
461df8bae1dSRodney W. Grimes
462df8bae1dSRodney W. Grimes /*
463050ac265SRobert Watson * Wakeup processes waiting on a socket buffer. Do asynchronous notification
464050ac265SRobert Watson * via SIGIO if the socket has the SS_ASYNC flag set.
465a34b7046SRobert Watson *
466a34b7046SRobert Watson * Called with the socket buffer lock held; will release the lock by the end
467a34b7046SRobert Watson * of the function. This allows the caller to acquire the socket buffer lock
468a34b7046SRobert Watson * while testing for the need for various sorts of wakeup and hold it through
469a34b7046SRobert Watson * to the point where it's no longer required. We currently hold the lock
470a34b7046SRobert Watson * through calls out to other subsystems (with the exception of kqueue), and
471a34b7046SRobert Watson * then release it to avoid lock order issues. It's not clear that's
472a34b7046SRobert Watson * correct.
473df8bae1dSRodney W. Grimes */
47443283184SGleb Smirnoff static __always_inline void
sowakeup(struct socket * so,const sb_which which)47543283184SGleb Smirnoff sowakeup(struct socket *so, const sb_which which)
476df8bae1dSRodney W. Grimes {
47743283184SGleb Smirnoff struct sockbuf *sb;
47874fb0ba7SJohn Baldwin int ret;
479d48d4b25SSeigo Tanimura
48043283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which);
481a34b7046SRobert Watson
48243283184SGleb Smirnoff sb = sobuf(so, which);
483779f106aSGleb Smirnoff selwakeuppri(sb->sb_sel, PSOCK);
484779f106aSGleb Smirnoff if (!SEL_WAITING(sb->sb_sel))
485df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_SEL;
486df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_WAIT) {
487df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_WAIT;
4880f9d0a73SGleb Smirnoff wakeup(&sb->sb_acc);
489df8bae1dSRodney W. Grimes }
490779f106aSGleb Smirnoff KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
49198c92369SNavdeep Parhar if (sb->sb_upcall != NULL) {
492eb1b1807SGleb Smirnoff ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
49374fb0ba7SJohn Baldwin if (ret == SU_ISCONNECTED) {
49474fb0ba7SJohn Baldwin KASSERT(sb == &so->so_rcv,
49574fb0ba7SJohn Baldwin ("SO_SND upcall returned SU_ISCONNECTED"));
49674fb0ba7SJohn Baldwin soupcall_clear(so, SO_RCV);
49774fb0ba7SJohn Baldwin }
49874fb0ba7SJohn Baldwin } else
49974fb0ba7SJohn Baldwin ret = SU_OK;
5004cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_AIO)
50143283184SGleb Smirnoff sowakeup_aio(so, which);
50243283184SGleb Smirnoff SOCK_BUF_UNLOCK(so, which);
503555b3e2fSGleb Smirnoff if (ret == SU_ISCONNECTED)
50474fb0ba7SJohn Baldwin soisconnected(so);
50574fb0ba7SJohn Baldwin if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
50674fb0ba7SJohn Baldwin pgsigio(&so->so_sigio, SIGIO, 0);
50743283184SGleb Smirnoff SOCK_BUF_UNLOCK_ASSERT(so, which);
50843283184SGleb Smirnoff }
50943283184SGleb Smirnoff
510a1da7dc1SMark Johnston static void
splice_push(struct socket * so)511a1da7dc1SMark Johnston splice_push(struct socket *so)
512a1da7dc1SMark Johnston {
513a1da7dc1SMark Johnston struct so_splice *sp;
514a1da7dc1SMark Johnston
515a1da7dc1SMark Johnston SOCK_RECVBUF_LOCK_ASSERT(so);
516a1da7dc1SMark Johnston
517a1da7dc1SMark Johnston sp = so->so_splice;
518a1da7dc1SMark Johnston mtx_lock(&sp->mtx);
519a1da7dc1SMark Johnston SOCK_RECVBUF_UNLOCK(so);
520a1da7dc1SMark Johnston so_splice_dispatch(sp);
521a1da7dc1SMark Johnston }
522a1da7dc1SMark Johnston
523a1da7dc1SMark Johnston static void
splice_pull(struct socket * so)524a1da7dc1SMark Johnston splice_pull(struct socket *so)
525a1da7dc1SMark Johnston {
526a1da7dc1SMark Johnston struct so_splice *sp;
527a1da7dc1SMark Johnston
528a1da7dc1SMark Johnston SOCK_SENDBUF_LOCK_ASSERT(so);
529a1da7dc1SMark Johnston
530a1da7dc1SMark Johnston sp = so->so_splice_back;
531a1da7dc1SMark Johnston mtx_lock(&sp->mtx);
532a1da7dc1SMark Johnston SOCK_SENDBUF_UNLOCK(so);
533a1da7dc1SMark Johnston so_splice_dispatch(sp);
534a1da7dc1SMark Johnston }
535a1da7dc1SMark Johnston
53643283184SGleb Smirnoff /*
53743283184SGleb Smirnoff * Do we need to notify the other side when I/O is possible?
53843283184SGleb Smirnoff */
53943283184SGleb Smirnoff static __always_inline bool
sb_notify(const struct sockbuf * sb)54043283184SGleb Smirnoff sb_notify(const struct sockbuf *sb)
54143283184SGleb Smirnoff {
54243283184SGleb Smirnoff return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC |
54343283184SGleb Smirnoff SB_UPCALL | SB_AIO | SB_KNOTE)) != 0);
54443283184SGleb Smirnoff }
54543283184SGleb Smirnoff
54643283184SGleb Smirnoff void
sorwakeup_locked(struct socket * so)54743283184SGleb Smirnoff sorwakeup_locked(struct socket *so)
54843283184SGleb Smirnoff {
54943283184SGleb Smirnoff SOCK_RECVBUF_LOCK_ASSERT(so);
550a1da7dc1SMark Johnston if (so->so_rcv.sb_flags & SB_SPLICED)
551a1da7dc1SMark Johnston splice_push(so);
552a1da7dc1SMark Johnston else if (sb_notify(&so->so_rcv))
55343283184SGleb Smirnoff sowakeup(so, SO_RCV);
55443283184SGleb Smirnoff else
55543283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so);
55643283184SGleb Smirnoff }
55743283184SGleb Smirnoff
55843283184SGleb Smirnoff void
sowwakeup_locked(struct socket * so)55943283184SGleb Smirnoff sowwakeup_locked(struct socket *so)
56043283184SGleb Smirnoff {
56143283184SGleb Smirnoff SOCK_SENDBUF_LOCK_ASSERT(so);
562a1da7dc1SMark Johnston if (so->so_snd.sb_flags & SB_SPLICED)
563a1da7dc1SMark Johnston splice_pull(so);
564a1da7dc1SMark Johnston else if (sb_notify(&so->so_snd))
56543283184SGleb Smirnoff sowakeup(so, SO_SND);
56643283184SGleb Smirnoff else
56743283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK(so);
568df8bae1dSRodney W. Grimes }
569df8bae1dSRodney W. Grimes
570df8bae1dSRodney W. Grimes /*
571df8bae1dSRodney W. Grimes * Socket buffer (struct sockbuf) utility routines.
572df8bae1dSRodney W. Grimes *
573050ac265SRobert Watson * Each socket contains two socket buffers: one for sending data and one for
574050ac265SRobert Watson * receiving data. Each buffer contains a queue of mbufs, information about
575050ac265SRobert Watson * the number of mbufs and amount of data in the queue, and other fields
576050ac265SRobert Watson * allowing select() statements and notification on data availability to be
577050ac265SRobert Watson * implemented.
578df8bae1dSRodney W. Grimes *
579050ac265SRobert Watson * Data stored in a socket buffer is maintained as a list of records. Each
580050ac265SRobert Watson * record is a list of mbufs chained together with the m_next field. Records
581050ac265SRobert Watson * are chained together with the m_nextpkt field. The upper level routine
582050ac265SRobert Watson * soreceive() expects the following conventions to be observed when placing
583050ac265SRobert Watson * information in the receive buffer:
584df8bae1dSRodney W. Grimes *
585050ac265SRobert Watson * 1. If the protocol requires each message be preceded by the sender's name,
586050ac265SRobert Watson * then a record containing that name must be present before any
587050ac265SRobert Watson * associated data (mbuf's must be of type MT_SONAME).
588050ac265SRobert Watson * 2. If the protocol supports the exchange of ``access rights'' (really just
589050ac265SRobert Watson * additional data associated with the message), and there are ``rights''
590050ac265SRobert Watson * to be received, then a record containing this data should be present
591050ac265SRobert Watson * (mbuf's must be of type MT_RIGHTS).
592050ac265SRobert Watson * 3. If a name or rights record exists, then it must be followed by a data
593050ac265SRobert Watson * record, perhaps of zero length.
594df8bae1dSRodney W. Grimes *
595df8bae1dSRodney W. Grimes * Before using a new socket structure it is first necessary to reserve
596df8bae1dSRodney W. Grimes * buffer space to the socket, by calling sbreserve(). This should commit
597df8bae1dSRodney W. Grimes * some of the available buffer space in the system buffer pool for the
598050ac265SRobert Watson * socket (currently, it does nothing but enforce limits). The space should
599050ac265SRobert Watson * be released by calling sbrelease() when the socket is destroyed.
600df8bae1dSRodney W. Grimes */
60126f9a767SRodney W. Grimes int
soreserve(struct socket * so,u_long sndcc,u_long rcvcc)602050ac265SRobert Watson soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
603df8bae1dSRodney W. Grimes {
604b40ce416SJulian Elischer struct thread *td = curthread;
605df8bae1dSRodney W. Grimes
60643283184SGleb Smirnoff SOCK_SENDBUF_LOCK(so);
60743283184SGleb Smirnoff SOCK_RECVBUF_LOCK(so);
60843283184SGleb Smirnoff if (sbreserve_locked(so, SO_SND, sndcc, td) == 0)
6093f11a2f3SRobert Watson goto bad;
61043283184SGleb Smirnoff if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0)
6113f11a2f3SRobert Watson goto bad2;
612df8bae1dSRodney W. Grimes if (so->so_rcv.sb_lowat == 0)
613df8bae1dSRodney W. Grimes so->so_rcv.sb_lowat = 1;
614df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat == 0)
615df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = MCLBYTES;
616df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
617df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
61843283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so);
61943283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK(so);
620df8bae1dSRodney W. Grimes return (0);
621df8bae1dSRodney W. Grimes bad2:
622*67c1c4dfSGleb Smirnoff sbunreserve_locked(so, SO_SND);
623df8bae1dSRodney W. Grimes bad:
62443283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so);
62543283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK(so);
626df8bae1dSRodney W. Grimes return (ENOBUFS);
627df8bae1dSRodney W. Grimes }
628df8bae1dSRodney W. Grimes
62979cb7eb4SDavid Greenman static int
sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)63079cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
63179cb7eb4SDavid Greenman {
63279cb7eb4SDavid Greenman int error = 0;
63386a93d51SJohn Baldwin u_long tmp_sb_max = sb_max;
63479cb7eb4SDavid Greenman
63586a93d51SJohn Baldwin error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
63679cb7eb4SDavid Greenman if (error || !req->newptr)
63779cb7eb4SDavid Greenman return (error);
63886a93d51SJohn Baldwin if (tmp_sb_max < MSIZE + MCLBYTES)
63979cb7eb4SDavid Greenman return (EINVAL);
64086a93d51SJohn Baldwin sb_max = tmp_sb_max;
6417b660faaSAlexander V. Chernikov sb_max_adj = BUF_MAX_ADJ(sb_max);
64279cb7eb4SDavid Greenman return (0);
64379cb7eb4SDavid Greenman }
64479cb7eb4SDavid Greenman
645df8bae1dSRodney W. Grimes /*
646050ac265SRobert Watson * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't
647050ac265SRobert Watson * become limiting if buffering efficiency is near the normal case.
648df8bae1dSRodney W. Grimes */
64943283184SGleb Smirnoff bool
sbreserve_locked_limit(struct socket * so,sb_which which,u_long cc,u_long buf_max,struct thread * td)6507b660faaSAlexander V. Chernikov sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
6517b660faaSAlexander V. Chernikov u_long buf_max, struct thread *td)
652df8bae1dSRodney W. Grimes {
65343283184SGleb Smirnoff struct sockbuf *sb = sobuf(so, which);
65491d5354aSJohn Baldwin rlim_t sbsize_limit;
655ecf72308SBrian Feldman
65643283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which);
6573f11a2f3SRobert Watson
658ecf72308SBrian Feldman /*
6597978014dSRobert Watson * When a thread is passed, we take into account the thread's socket
6607978014dSRobert Watson * buffer size limit. The caller will generally pass curthread, but
6617978014dSRobert Watson * in the TCP input path, NULL will be passed to indicate that no
6627978014dSRobert Watson * appropriate thread resource limits are available. In that case,
6637978014dSRobert Watson * we don't apply a process limit.
664ecf72308SBrian Feldman */
6657b660faaSAlexander V. Chernikov if (cc > BUF_MAX_ADJ(buf_max))
66643283184SGleb Smirnoff return (false);
66791d5354aSJohn Baldwin if (td != NULL) {
668f6f6d240SMateusz Guzik sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
66991d5354aSJohn Baldwin } else
67091d5354aSJohn Baldwin sbsize_limit = RLIM_INFINITY;
671f535380cSDon Lewis if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
67291d5354aSJohn Baldwin sbsize_limit))
67343283184SGleb Smirnoff return (false);
6747b660faaSAlexander V. Chernikov sb->sb_mbmax = min(cc * sb_efficiency, buf_max);
675df8bae1dSRodney W. Grimes if (sb->sb_lowat > sb->sb_hiwat)
676df8bae1dSRodney W. Grimes sb->sb_lowat = sb->sb_hiwat;
67743283184SGleb Smirnoff return (true);
678df8bae1dSRodney W. Grimes }
679df8bae1dSRodney W. Grimes
6807b660faaSAlexander V. Chernikov bool
sbreserve_locked(struct socket * so,sb_which which,u_long cc,struct thread * td)6817b660faaSAlexander V. Chernikov sbreserve_locked(struct socket *so, sb_which which, u_long cc,
6827b660faaSAlexander V. Chernikov struct thread *td)
6837b660faaSAlexander V. Chernikov {
6847b660faaSAlexander V. Chernikov return (sbreserve_locked_limit(so, which, cc, sb_max, td));
6857b660faaSAlexander V. Chernikov }
6867b660faaSAlexander V. Chernikov
687*67c1c4dfSGleb Smirnoff static void
sbunreserve_locked(struct socket * so,sb_which which)688*67c1c4dfSGleb Smirnoff sbunreserve_locked(struct socket *so, sb_which which)
689*67c1c4dfSGleb Smirnoff {
690*67c1c4dfSGleb Smirnoff struct sockbuf *sb = sobuf(so, which);
691*67c1c4dfSGleb Smirnoff
692*67c1c4dfSGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which);
693*67c1c4dfSGleb Smirnoff
694*67c1c4dfSGleb Smirnoff (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
695*67c1c4dfSGleb Smirnoff RLIM_INFINITY);
696*67c1c4dfSGleb Smirnoff sb->sb_mbmax = 0;
697*67c1c4dfSGleb Smirnoff }
698*67c1c4dfSGleb Smirnoff
6993f11a2f3SRobert Watson int
sbsetopt(struct socket * so,struct sockopt * sopt)700f6696856SAlexander V. Chernikov sbsetopt(struct socket *so, struct sockopt *sopt)
7013f11a2f3SRobert Watson {
70264290befSGleb Smirnoff struct sockbuf *sb;
70343283184SGleb Smirnoff sb_which wh;
70464290befSGleb Smirnoff short *flags;
705f6696856SAlexander V. Chernikov u_int cc, *hiwat, *lowat;
706f6696856SAlexander V. Chernikov int error, optval;
707f6696856SAlexander V. Chernikov
708f6696856SAlexander V. Chernikov error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
709f6696856SAlexander V. Chernikov if (error != 0)
710f6696856SAlexander V. Chernikov return (error);
711f6696856SAlexander V. Chernikov
712f6696856SAlexander V. Chernikov /*
713f6696856SAlexander V. Chernikov * Values < 1 make no sense for any of these options,
714f6696856SAlexander V. Chernikov * so disallow them.
715f6696856SAlexander V. Chernikov */
716f6696856SAlexander V. Chernikov if (optval < 1)
717f6696856SAlexander V. Chernikov return (EINVAL);
718f6696856SAlexander V. Chernikov cc = optval;
7193f11a2f3SRobert Watson
720b2037136SMatt Macy sb = NULL;
72164290befSGleb Smirnoff SOCK_LOCK(so);
72264290befSGleb Smirnoff if (SOLISTENING(so)) {
723f6696856SAlexander V. Chernikov switch (sopt->sopt_name) {
72464290befSGleb Smirnoff case SO_SNDLOWAT:
72564290befSGleb Smirnoff case SO_SNDBUF:
72664290befSGleb Smirnoff lowat = &so->sol_sbsnd_lowat;
72764290befSGleb Smirnoff hiwat = &so->sol_sbsnd_hiwat;
72864290befSGleb Smirnoff flags = &so->sol_sbsnd_flags;
72964290befSGleb Smirnoff break;
73064290befSGleb Smirnoff case SO_RCVLOWAT:
73164290befSGleb Smirnoff case SO_RCVBUF:
73264290befSGleb Smirnoff lowat = &so->sol_sbrcv_lowat;
73364290befSGleb Smirnoff hiwat = &so->sol_sbrcv_hiwat;
73464290befSGleb Smirnoff flags = &so->sol_sbrcv_flags;
73564290befSGleb Smirnoff break;
73664290befSGleb Smirnoff }
73764290befSGleb Smirnoff } else {
738f6696856SAlexander V. Chernikov switch (sopt->sopt_name) {
73964290befSGleb Smirnoff case SO_SNDLOWAT:
74064290befSGleb Smirnoff case SO_SNDBUF:
74164290befSGleb Smirnoff sb = &so->so_snd;
74243283184SGleb Smirnoff wh = SO_SND;
74364290befSGleb Smirnoff break;
74464290befSGleb Smirnoff case SO_RCVLOWAT:
74564290befSGleb Smirnoff case SO_RCVBUF:
74664290befSGleb Smirnoff sb = &so->so_rcv;
74743283184SGleb Smirnoff wh = SO_RCV;
74864290befSGleb Smirnoff break;
74964290befSGleb Smirnoff }
75064290befSGleb Smirnoff flags = &sb->sb_flags;
75164290befSGleb Smirnoff hiwat = &sb->sb_hiwat;
75264290befSGleb Smirnoff lowat = &sb->sb_lowat;
75343283184SGleb Smirnoff SOCK_BUF_LOCK(so, wh);
75464290befSGleb Smirnoff }
75564290befSGleb Smirnoff
75664290befSGleb Smirnoff error = 0;
757f6696856SAlexander V. Chernikov switch (sopt->sopt_name) {
75864290befSGleb Smirnoff case SO_SNDBUF:
75964290befSGleb Smirnoff case SO_RCVBUF:
76064290befSGleb Smirnoff if (SOLISTENING(so)) {
76164290befSGleb Smirnoff if (cc > sb_max_adj) {
76264290befSGleb Smirnoff error = ENOBUFS;
76364290befSGleb Smirnoff break;
76464290befSGleb Smirnoff }
76564290befSGleb Smirnoff *hiwat = cc;
76664290befSGleb Smirnoff if (*lowat > *hiwat)
76764290befSGleb Smirnoff *lowat = *hiwat;
76864290befSGleb Smirnoff } else {
76943283184SGleb Smirnoff if (!sbreserve_locked(so, wh, cc, curthread))
77064290befSGleb Smirnoff error = ENOBUFS;
77164290befSGleb Smirnoff }
77264290befSGleb Smirnoff if (error == 0)
77364290befSGleb Smirnoff *flags &= ~SB_AUTOSIZE;
77464290befSGleb Smirnoff break;
77564290befSGleb Smirnoff case SO_SNDLOWAT:
77664290befSGleb Smirnoff case SO_RCVLOWAT:
77764290befSGleb Smirnoff /*
77864290befSGleb Smirnoff * Make sure the low-water is never greater than the
77964290befSGleb Smirnoff * high-water.
78064290befSGleb Smirnoff */
78164290befSGleb Smirnoff *lowat = (cc > *hiwat) ? *hiwat : cc;
78264290befSGleb Smirnoff break;
78364290befSGleb Smirnoff }
78464290befSGleb Smirnoff
78564290befSGleb Smirnoff if (!SOLISTENING(so))
78643283184SGleb Smirnoff SOCK_BUF_UNLOCK(so, wh);
78764290befSGleb Smirnoff SOCK_UNLOCK(so);
7883f11a2f3SRobert Watson return (error);
7893f11a2f3SRobert Watson }
7903f11a2f3SRobert Watson
791df8bae1dSRodney W. Grimes /*
792df8bae1dSRodney W. Grimes * Free mbufs held by a socket, and reserved mbuf space.
793df8bae1dSRodney W. Grimes */
79426f9a767SRodney W. Grimes void
sbrelease_locked(struct socket * so,sb_which which)79543283184SGleb Smirnoff sbrelease_locked(struct socket *so, sb_which which)
796df8bae1dSRodney W. Grimes {
797371392bcSGleb Smirnoff struct sockbuf *sb = sobuf(so, which);
798df8bae1dSRodney W. Grimes
79943283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which);
800a34b7046SRobert Watson
801371392bcSGleb Smirnoff sbflush_locked(sb);
802*67c1c4dfSGleb Smirnoff sbunreserve_locked(so, which);
803df8bae1dSRodney W. Grimes }
804df8bae1dSRodney W. Grimes
805a34b7046SRobert Watson void
sbrelease(struct socket * so,sb_which which)80643283184SGleb Smirnoff sbrelease(struct socket *so, sb_which which)
807a34b7046SRobert Watson {
808a34b7046SRobert Watson
80943283184SGleb Smirnoff SOCK_BUF_LOCK(so, which);
81043283184SGleb Smirnoff sbrelease_locked(so, which);
81143283184SGleb Smirnoff SOCK_BUF_UNLOCK(so, which);
812a34b7046SRobert Watson }
813eaa6dfbcSRobert Watson
814eaa6dfbcSRobert Watson void
sbdestroy(struct socket * so,sb_which which)81543283184SGleb Smirnoff sbdestroy(struct socket *so, sb_which which)
816eaa6dfbcSRobert Watson {
817b2e60773SJohn Baldwin #ifdef KERN_TLS
81843283184SGleb Smirnoff struct sockbuf *sb = sobuf(so, which);
81943283184SGleb Smirnoff
820b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL)
821b2e60773SJohn Baldwin ktls_free(sb->sb_tls_info);
822b2e60773SJohn Baldwin sb->sb_tls_info = NULL;
823b2e60773SJohn Baldwin #endif
824371392bcSGleb Smirnoff sbrelease_locked(so, which);
825eaa6dfbcSRobert Watson }
826eaa6dfbcSRobert Watson
827df8bae1dSRodney W. Grimes /*
828050ac265SRobert Watson * Routines to add and remove data from an mbuf queue.
829df8bae1dSRodney W. Grimes *
830050ac265SRobert Watson * The routines sbappend() or sbappendrecord() are normally called to append
831050ac265SRobert Watson * new mbufs to a socket buffer, after checking that adequate space is
832050ac265SRobert Watson * available, comparing the function sbspace() with the amount of data to be
833050ac265SRobert Watson * added. sbappendrecord() differs from sbappend() in that data supplied is
834050ac265SRobert Watson * treated as the beginning of a new record. To place a sender's address,
835050ac265SRobert Watson * optional access rights, and data in a socket receive buffer,
836050ac265SRobert Watson * sbappendaddr() should be used. To place access rights and data in a
837050ac265SRobert Watson * socket receive buffer, sbappendrights() should be used. In either case,
838050ac265SRobert Watson * the new data begins a new record. Note that unlike sbappend() and
839050ac265SRobert Watson * sbappendrecord(), these routines check for the caller that there will be
840050ac265SRobert Watson * enough space to store the data. Each fails if there is not enough space,
841050ac265SRobert Watson * or if it cannot find mbufs to store additional information in.
842df8bae1dSRodney W. Grimes *
843050ac265SRobert Watson * Reliable protocols may use the socket send buffer to hold data awaiting
844050ac265SRobert Watson * acknowledgement. Data is normally copied from a socket send buffer in a
845050ac265SRobert Watson * protocol with m_copy for output to a peer, and then removing the data from
846050ac265SRobert Watson * the socket buffer with sbdrop() or sbdroprecord() when the data is
847050ac265SRobert Watson * acknowledged by the peer.
848df8bae1dSRodney W. Grimes */
849395bb186SSam Leffler #ifdef SOCKBUF_DEBUG
850395bb186SSam Leffler void
sblastrecordchk(struct sockbuf * sb,const char * file,int line)851395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line)
852395bb186SSam Leffler {
853395bb186SSam Leffler struct mbuf *m = sb->sb_mb;
854395bb186SSam Leffler
855a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
856a34b7046SRobert Watson
857395bb186SSam Leffler while (m && m->m_nextpkt)
858395bb186SSam Leffler m = m->m_nextpkt;
859395bb186SSam Leffler
860395bb186SSam Leffler if (m != sb->sb_lastrecord) {
861395bb186SSam Leffler printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
862395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_lastrecord, m);
863395bb186SSam Leffler printf("packet chain:\n");
864395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
865395bb186SSam Leffler printf("\t%p\n", m);
866395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line);
867395bb186SSam Leffler }
868395bb186SSam Leffler }
869395bb186SSam Leffler
870395bb186SSam Leffler void
sblastmbufchk(struct sockbuf * sb,const char * file,int line)871395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line)
872395bb186SSam Leffler {
873395bb186SSam Leffler struct mbuf *m = sb->sb_mb;
874395bb186SSam Leffler struct mbuf *n;
875395bb186SSam Leffler
876a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
877a34b7046SRobert Watson
878395bb186SSam Leffler while (m && m->m_nextpkt)
879395bb186SSam Leffler m = m->m_nextpkt;
880395bb186SSam Leffler
881395bb186SSam Leffler while (m && m->m_next)
882395bb186SSam Leffler m = m->m_next;
883395bb186SSam Leffler
884395bb186SSam Leffler if (m != sb->sb_mbtail) {
885395bb186SSam Leffler printf("%s: sb_mb %p sb_mbtail %p last %p\n",
886395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_mbtail, m);
887395bb186SSam Leffler printf("packet tree:\n");
888395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
889395bb186SSam Leffler printf("\t");
890395bb186SSam Leffler for (n = m; n != NULL; n = n->m_next)
891395bb186SSam Leffler printf("%p ", n);
892395bb186SSam Leffler printf("\n");
893395bb186SSam Leffler }
894395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line);
895395bb186SSam Leffler }
8963c0e5685SJohn Baldwin
8973c0e5685SJohn Baldwin #ifdef KERN_TLS
8983c0e5685SJohn Baldwin m = sb->sb_mtls;
8993c0e5685SJohn Baldwin while (m && m->m_next)
9003c0e5685SJohn Baldwin m = m->m_next;
9013c0e5685SJohn Baldwin
9023c0e5685SJohn Baldwin if (m != sb->sb_mtlstail) {
9033c0e5685SJohn Baldwin printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
9043c0e5685SJohn Baldwin __func__, sb->sb_mtls, sb->sb_mtlstail, m);
9053c0e5685SJohn Baldwin printf("TLS packet tree:\n");
9063c0e5685SJohn Baldwin printf("\t");
9073c0e5685SJohn Baldwin for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
9083c0e5685SJohn Baldwin printf("%p ", m);
9093c0e5685SJohn Baldwin }
9103c0e5685SJohn Baldwin printf("\n");
9113c0e5685SJohn Baldwin panic("%s from %s:%u", __func__, file, line);
9123c0e5685SJohn Baldwin }
9133c0e5685SJohn Baldwin #endif
914395bb186SSam Leffler }
915395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */
916395bb186SSam Leffler
917395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do { \
918a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); \
919395bb186SSam Leffler if ((sb)->sb_lastrecord != NULL) \
920395bb186SSam Leffler (sb)->sb_lastrecord->m_nextpkt = (m0); \
921395bb186SSam Leffler else \
922395bb186SSam Leffler (sb)->sb_mb = (m0); \
923395bb186SSam Leffler (sb)->sb_lastrecord = (m0); \
924395bb186SSam Leffler } while (/*CONSTCOND*/0)
925395bb186SSam Leffler
926df8bae1dSRodney W. Grimes /*
927050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The
928050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs
929050ac265SRobert Watson * are discarded and mbufs are compacted where possible.
930df8bae1dSRodney W. Grimes */
93126f9a767SRodney W. Grimes void
sbappend_locked(struct sockbuf * sb,struct mbuf * m,int flags)932829fae90SGleb Smirnoff sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
933df8bae1dSRodney W. Grimes {
934050ac265SRobert Watson struct mbuf *n;
935df8bae1dSRodney W. Grimes
936a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
937a34b7046SRobert Watson
938b85f65afSPedro F. Giffuni if (m == NULL)
939df8bae1dSRodney W. Grimes return;
940ec45f952SMark Johnston kmsan_check_mbuf(m, "sbappend");
941829fae90SGleb Smirnoff sbm_clrprotoflags(m, flags);
942395bb186SSam Leffler SBLASTRECORDCHK(sb);
943797f2d22SPoul-Henning Kamp n = sb->sb_mb;
944797f2d22SPoul-Henning Kamp if (n) {
945df8bae1dSRodney W. Grimes while (n->m_nextpkt)
946df8bae1dSRodney W. Grimes n = n->m_nextpkt;
947df8bae1dSRodney W. Grimes do {
948df8bae1dSRodney W. Grimes if (n->m_flags & M_EOR) {
949a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
950df8bae1dSRodney W. Grimes return;
951df8bae1dSRodney W. Grimes }
952df8bae1dSRodney W. Grimes } while (n->m_next && (n = n->m_next));
953395bb186SSam Leffler } else {
954395bb186SSam Leffler /*
955395bb186SSam Leffler * XXX Would like to simply use sb_mbtail here, but
956395bb186SSam Leffler * XXX I need to verify that I won't miss an EOR that
957395bb186SSam Leffler * XXX way.
958395bb186SSam Leffler */
959395bb186SSam Leffler if ((n = sb->sb_lastrecord) != NULL) {
960395bb186SSam Leffler do {
961395bb186SSam Leffler if (n->m_flags & M_EOR) {
962a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
963395bb186SSam Leffler return;
964395bb186SSam Leffler }
965395bb186SSam Leffler } while (n->m_next && (n = n->m_next));
966395bb186SSam Leffler } else {
967395bb186SSam Leffler /*
968395bb186SSam Leffler * If this is the first record in the socket buffer,
969395bb186SSam Leffler * it's also the last record.
970395bb186SSam Leffler */
971395bb186SSam Leffler sb->sb_lastrecord = m;
972395bb186SSam Leffler }
973df8bae1dSRodney W. Grimes }
974df8bae1dSRodney W. Grimes sbcompress(sb, m, n);
975395bb186SSam Leffler SBLASTRECORDCHK(sb);
976395bb186SSam Leffler }
977395bb186SSam Leffler
978395bb186SSam Leffler /*
979050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The
980050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs
981050ac265SRobert Watson * are discarded and mbufs are compacted where possible.
982a34b7046SRobert Watson */
983a34b7046SRobert Watson void
sbappend(struct sockbuf * sb,struct mbuf * m,int flags)984829fae90SGleb Smirnoff sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
985a34b7046SRobert Watson {
986a34b7046SRobert Watson
987a34b7046SRobert Watson SOCKBUF_LOCK(sb);
988829fae90SGleb Smirnoff sbappend_locked(sb, m, flags);
989a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
990a34b7046SRobert Watson }
991a34b7046SRobert Watson
9923c0e5685SJohn Baldwin #ifdef KERN_TLS
9933c0e5685SJohn Baldwin /*
9943c0e5685SJohn Baldwin * Append an mbuf containing encrypted TLS data. The data
9953c0e5685SJohn Baldwin * is marked M_NOTREADY until it has been decrypted and
9963c0e5685SJohn Baldwin * stored as a TLS record.
9973c0e5685SJohn Baldwin */
9983c0e5685SJohn Baldwin static void
sbappend_ktls_rx(struct sockbuf * sb,struct mbuf * m)9993c0e5685SJohn Baldwin sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
10003c0e5685SJohn Baldwin {
1001fe8c78f0SHans Petter Selasky struct ifnet *ifp;
10023c0e5685SJohn Baldwin struct mbuf *n;
1003fe8c78f0SHans Petter Selasky int flags;
1004fe8c78f0SHans Petter Selasky
1005fe8c78f0SHans Petter Selasky ifp = NULL;
1006fe8c78f0SHans Petter Selasky flags = M_NOTREADY;
10073c0e5685SJohn Baldwin
10083c0e5685SJohn Baldwin SBLASTMBUFCHK(sb);
10093c0e5685SJohn Baldwin
1010fe8c78f0SHans Petter Selasky /* Mbuf chain must start with a packet header. */
1011fe8c78f0SHans Petter Selasky MPASS((m->m_flags & M_PKTHDR) != 0);
10123c0e5685SJohn Baldwin
1013fe8c78f0SHans Petter Selasky /* Remove all packet headers and mbuf tags to get a pure data chain. */
1014fe8c78f0SHans Petter Selasky for (n = m; n != NULL; n = n->m_next) {
1015fe8c78f0SHans Petter Selasky if (n->m_flags & M_PKTHDR) {
1016fe8c78f0SHans Petter Selasky ifp = m->m_pkthdr.leaf_rcvif;
1017fe8c78f0SHans Petter Selasky if ((n->m_pkthdr.csum_flags & CSUM_TLS_MASK) ==
1018fe8c78f0SHans Petter Selasky CSUM_TLS_DECRYPTED) {
1019fe8c78f0SHans Petter Selasky /* Mark all mbufs in this packet decrypted. */
1020fe8c78f0SHans Petter Selasky flags = M_NOTREADY | M_DECRYPTED;
1021fe8c78f0SHans Petter Selasky } else {
1022fe8c78f0SHans Petter Selasky flags = M_NOTREADY;
1023fe8c78f0SHans Petter Selasky }
1024fe8c78f0SHans Petter Selasky m_demote_pkthdr(n);
1025fe8c78f0SHans Petter Selasky }
1026fe8c78f0SHans Petter Selasky
1027fe8c78f0SHans Petter Selasky n->m_flags &= M_DEMOTEFLAGS;
1028fe8c78f0SHans Petter Selasky n->m_flags |= flags;
1029fe8c78f0SHans Petter Selasky
1030fe8c78f0SHans Petter Selasky MPASS((n->m_flags & M_NOTREADY) != 0);
1031fe8c78f0SHans Petter Selasky }
1032fe8c78f0SHans Petter Selasky
10333c0e5685SJohn Baldwin sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
10343c0e5685SJohn Baldwin ktls_check_rx(sb);
1035fe8c78f0SHans Petter Selasky
1036fe8c78f0SHans Petter Selasky /* Check for incoming packet route changes: */
1037fe8c78f0SHans Petter Selasky if (ifp != NULL && sb->sb_tls_info->rx_ifp != NULL &&
1038fe8c78f0SHans Petter Selasky sb->sb_tls_info->rx_ifp != ifp)
1039fe8c78f0SHans Petter Selasky ktls_input_ifp_mismatch(sb, ifp);
10403c0e5685SJohn Baldwin }
10413c0e5685SJohn Baldwin #endif
10423c0e5685SJohn Baldwin
1043a34b7046SRobert Watson /*
1044050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely
1045050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer,
1046050ac265SRobert Watson * that is, a stream protocol (such as TCP).
1047395bb186SSam Leffler */
1048395bb186SSam Leffler void
sbappendstream_locked(struct sockbuf * sb,struct mbuf * m,int flags)1049651e4e6aSGleb Smirnoff sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
1050395bb186SSam Leffler {
1051a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
1052395bb186SSam Leffler
1053395bb186SSam Leffler KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
10543c0e5685SJohn Baldwin
1055ec45f952SMark Johnston kmsan_check_mbuf(m, "sbappend");
1056ec45f952SMark Johnston
10573c0e5685SJohn Baldwin #ifdef KERN_TLS
10583c0e5685SJohn Baldwin /*
10593c0e5685SJohn Baldwin * Decrypted TLS records are appended as records via
10603c0e5685SJohn Baldwin * sbappendrecord(). TCP passes encrypted TLS records to this
10613c0e5685SJohn Baldwin * function which must be scheduled for decryption.
10623c0e5685SJohn Baldwin */
10633c0e5685SJohn Baldwin if (sb->sb_flags & SB_TLS_RX) {
10643c0e5685SJohn Baldwin sbappend_ktls_rx(sb, m);
10653c0e5685SJohn Baldwin return;
10663c0e5685SJohn Baldwin }
10673c0e5685SJohn Baldwin #endif
10683c0e5685SJohn Baldwin
1069395bb186SSam Leffler KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
1070395bb186SSam Leffler
1071395bb186SSam Leffler SBLASTMBUFCHK(sb);
1072395bb186SSam Leffler
1073b2e60773SJohn Baldwin #ifdef KERN_TLS
1074b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL)
1075b2e60773SJohn Baldwin ktls_seq(sb, m);
1076b2e60773SJohn Baldwin #endif
1077b2e60773SJohn Baldwin
1078844cacd1SGleb Smirnoff /* Remove all packet headers and mbuf tags to get a pure data chain. */
1079651e4e6aSGleb Smirnoff m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
1080844cacd1SGleb Smirnoff
1081395bb186SSam Leffler sbcompress(sb, m, sb->sb_mbtail);
1082395bb186SSam Leffler
1083395bb186SSam Leffler sb->sb_lastrecord = sb->sb_mb;
1084395bb186SSam Leffler SBLASTRECORDCHK(sb);
1085df8bae1dSRodney W. Grimes }
1086df8bae1dSRodney W. Grimes
1087a34b7046SRobert Watson /*
1088050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely
1089050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer,
1090050ac265SRobert Watson * that is, a stream protocol (such as TCP).
1091a34b7046SRobert Watson */
1092a34b7046SRobert Watson void
sbappendstream(struct sockbuf * sb,struct mbuf * m,int flags)1093651e4e6aSGleb Smirnoff sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
1094a34b7046SRobert Watson {
1095a34b7046SRobert Watson
1096a34b7046SRobert Watson SOCKBUF_LOCK(sb);
1097651e4e6aSGleb Smirnoff sbappendstream_locked(sb, m, flags);
1098a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
1099a34b7046SRobert Watson }
1100a34b7046SRobert Watson
1101df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG
110226f9a767SRodney W. Grimes void
sbcheck(struct sockbuf * sb,const char * file,int line)110357f43a45SGleb Smirnoff sbcheck(struct sockbuf *sb, const char *file, int line)
1104df8bae1dSRodney W. Grimes {
11050f9d0a73SGleb Smirnoff struct mbuf *m, *n, *fnrdy;
11060f9d0a73SGleb Smirnoff u_long acc, ccc, mbcnt;
11073c0e5685SJohn Baldwin #ifdef KERN_TLS
11083c0e5685SJohn Baldwin u_long tlscc;
11093c0e5685SJohn Baldwin #endif
1110df8bae1dSRodney W. Grimes
1111a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
1112a34b7046SRobert Watson
11130f9d0a73SGleb Smirnoff acc = ccc = mbcnt = 0;
11140f9d0a73SGleb Smirnoff fnrdy = NULL;
111557f43a45SGleb Smirnoff
11160931333fSBill Fenner for (m = sb->sb_mb; m; m = n) {
11170931333fSBill Fenner n = m->m_nextpkt;
11180931333fSBill Fenner for (; m; m = m->m_next) {
111957f43a45SGleb Smirnoff if (m->m_len == 0) {
112057f43a45SGleb Smirnoff printf("sb %p empty mbuf %p\n", sb, m);
112157f43a45SGleb Smirnoff goto fail;
112257f43a45SGleb Smirnoff }
11230f9d0a73SGleb Smirnoff if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
11240f9d0a73SGleb Smirnoff if (m != sb->sb_fnrdy) {
11250f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p != m %p\n",
11260f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m);
11270f9d0a73SGleb Smirnoff goto fail;
11280f9d0a73SGleb Smirnoff }
11290f9d0a73SGleb Smirnoff fnrdy = m;
11300f9d0a73SGleb Smirnoff }
11310f9d0a73SGleb Smirnoff if (fnrdy) {
11320f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) {
11330f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p, m %p is avail\n",
11340f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m);
11350f9d0a73SGleb Smirnoff goto fail;
11360f9d0a73SGleb Smirnoff }
11370f9d0a73SGleb Smirnoff } else
11380f9d0a73SGleb Smirnoff acc += m->m_len;
11390f9d0a73SGleb Smirnoff ccc += m->m_len;
1140df8bae1dSRodney W. Grimes mbcnt += MSIZE;
1141313861b8SJulian Elischer if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
1142df8bae1dSRodney W. Grimes mbcnt += m->m_ext.ext_size;
11430931333fSBill Fenner }
1144df8bae1dSRodney W. Grimes }
11453c0e5685SJohn Baldwin #ifdef KERN_TLS
11463c0e5685SJohn Baldwin /*
11473c0e5685SJohn Baldwin * Account for mbufs "detached" by ktls_detach_record() while
11483c0e5685SJohn Baldwin * they are decrypted by ktls_decrypt(). tlsdcc gives a count
11493c0e5685SJohn Baldwin * of the detached bytes that are included in ccc. The mbufs
11503c0e5685SJohn Baldwin * and clusters are not included in the socket buffer
11513c0e5685SJohn Baldwin * accounting.
11523c0e5685SJohn Baldwin */
11533c0e5685SJohn Baldwin ccc += sb->sb_tlsdcc;
11543c0e5685SJohn Baldwin
11553c0e5685SJohn Baldwin tlscc = 0;
11563c0e5685SJohn Baldwin for (m = sb->sb_mtls; m; m = m->m_next) {
11573c0e5685SJohn Baldwin if (m->m_nextpkt != NULL) {
11583c0e5685SJohn Baldwin printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
11593c0e5685SJohn Baldwin goto fail;
11603c0e5685SJohn Baldwin }
11613c0e5685SJohn Baldwin if ((m->m_flags & M_NOTREADY) == 0) {
11623c0e5685SJohn Baldwin printf("sb %p TLS mbuf %p ready\n", sb, m);
11633c0e5685SJohn Baldwin goto fail;
11643c0e5685SJohn Baldwin }
11653c0e5685SJohn Baldwin tlscc += m->m_len;
11663c0e5685SJohn Baldwin ccc += m->m_len;
11673c0e5685SJohn Baldwin mbcnt += MSIZE;
11683c0e5685SJohn Baldwin if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
11693c0e5685SJohn Baldwin mbcnt += m->m_ext.ext_size;
11703c0e5685SJohn Baldwin }
11713c0e5685SJohn Baldwin
11723c0e5685SJohn Baldwin if (sb->sb_tlscc != tlscc) {
11733c0e5685SJohn Baldwin printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
11743c0e5685SJohn Baldwin sb->sb_tlsdcc);
11753c0e5685SJohn Baldwin goto fail;
11763c0e5685SJohn Baldwin }
11773c0e5685SJohn Baldwin #endif
11780f9d0a73SGleb Smirnoff if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
11790f9d0a73SGleb Smirnoff printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
11800f9d0a73SGleb Smirnoff acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
11813c0e5685SJohn Baldwin #ifdef KERN_TLS
11823c0e5685SJohn Baldwin printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
11833c0e5685SJohn Baldwin sb->sb_tlsdcc);
11843c0e5685SJohn Baldwin #endif
118557f43a45SGleb Smirnoff goto fail;
1186df8bae1dSRodney W. Grimes }
118757f43a45SGleb Smirnoff return;
118857f43a45SGleb Smirnoff fail:
118957f43a45SGleb Smirnoff panic("%s from %s:%u", __func__, file, line);
1190df8bae1dSRodney W. Grimes }
1191df8bae1dSRodney W. Grimes #endif
1192df8bae1dSRodney W. Grimes
1193df8bae1dSRodney W. Grimes /*
1194050ac265SRobert Watson * As above, except the mbuf chain begins a new record.
1195df8bae1dSRodney W. Grimes */
119626f9a767SRodney W. Grimes void
sbappendrecord_locked(struct sockbuf * sb,struct mbuf * m0)1197050ac265SRobert Watson sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
1198df8bae1dSRodney W. Grimes {
1199050ac265SRobert Watson struct mbuf *m;
1200df8bae1dSRodney W. Grimes
1201a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
1202a34b7046SRobert Watson
1203b85f65afSPedro F. Giffuni if (m0 == NULL)
1204df8bae1dSRodney W. Grimes return;
1205ec45f952SMark Johnston
1206ec45f952SMark Johnston kmsan_check_mbuf(m0, "sbappend");
120753b680caSGleb Smirnoff m_clrprotoflags(m0);
1208ec45f952SMark Johnston
1209df8bae1dSRodney W. Grimes /*
1210050ac265SRobert Watson * Put the first mbuf on the queue. Note this permits zero length
1211050ac265SRobert Watson * records.
1212df8bae1dSRodney W. Grimes */
1213df8bae1dSRodney W. Grimes sballoc(sb, m0);
1214395bb186SSam Leffler SBLASTRECORDCHK(sb);
1215395bb186SSam Leffler SBLINKRECORD(sb, m0);
1216e72a94adSMaksim Yevmenkin sb->sb_mbtail = m0;
1217df8bae1dSRodney W. Grimes m = m0->m_next;
1218df8bae1dSRodney W. Grimes m0->m_next = 0;
1219df8bae1dSRodney W. Grimes if (m && (m0->m_flags & M_EOR)) {
1220df8bae1dSRodney W. Grimes m0->m_flags &= ~M_EOR;
1221df8bae1dSRodney W. Grimes m->m_flags |= M_EOR;
1222df8bae1dSRodney W. Grimes }
1223e72a94adSMaksim Yevmenkin /* always call sbcompress() so it can do SBLASTMBUFCHK() */
1224df8bae1dSRodney W. Grimes sbcompress(sb, m, m0);
1225df8bae1dSRodney W. Grimes }
1226df8bae1dSRodney W. Grimes
1227df8bae1dSRodney W. Grimes /*
1228050ac265SRobert Watson * As above, except the mbuf chain begins a new record.
1229a34b7046SRobert Watson */
1230a34b7046SRobert Watson void
sbappendrecord(struct sockbuf * sb,struct mbuf * m0)1231050ac265SRobert Watson sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
1232a34b7046SRobert Watson {
1233a34b7046SRobert Watson
1234a34b7046SRobert Watson SOCKBUF_LOCK(sb);
1235a34b7046SRobert Watson sbappendrecord_locked(sb, m0);
1236a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
1237a34b7046SRobert Watson }
1238a34b7046SRobert Watson
12398de34a88SAlan Somers /* Helper routine that appends data, control, and address to a sockbuf. */
12408de34a88SAlan Somers static int
sbappendaddr_locked_internal(struct sockbuf * sb,const struct sockaddr * asa,struct mbuf * m0,struct mbuf * control,struct mbuf * ctrl_last)12418de34a88SAlan Somers sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
12428de34a88SAlan Somers struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
1243df8bae1dSRodney W. Grimes {
1244395bb186SSam Leffler struct mbuf *m, *n, *nlast;
1245ec45f952SMark Johnston
1246ec45f952SMark Johnston if (m0 != NULL)
1247ec45f952SMark Johnston kmsan_check_mbuf(m0, "sbappend");
1248ec45f952SMark Johnston if (control != NULL)
1249ec45f952SMark Johnston kmsan_check_mbuf(control, "sbappend");
1250ec45f952SMark Johnston
1251c43cad1aSScott Long #if MSIZE <= 256
1252df8bae1dSRodney W. Grimes if (asa->sa_len > MLEN)
1253df8bae1dSRodney W. Grimes return (0);
1254c43cad1aSScott Long #endif
1255c8b59ea7SGleb Smirnoff m = m_get(M_NOWAIT, MT_SONAME);
1256c8b59ea7SGleb Smirnoff if (m == NULL)
1257df8bae1dSRodney W. Grimes return (0);
1258df8bae1dSRodney W. Grimes m->m_len = asa->sa_len;
125980208239SAlfred Perlstein bcopy(asa, mtod(m, caddr_t), asa->sa_len);
1260c33a2313SAndrey V. Elsukov if (m0) {
126117cbcf33SHans Petter Selasky M_ASSERT_NO_SND_TAG(m0);
126253b680caSGleb Smirnoff m_clrprotoflags(m0);
126357386f5dSAndrey V. Elsukov m_tag_delete_chain(m0, NULL);
1264c33a2313SAndrey V. Elsukov /*
1265c33a2313SAndrey V. Elsukov * Clear some persistent info from pkthdr.
1266c33a2313SAndrey V. Elsukov * We don't use m_demote(), because some netgraph consumers
1267c33a2313SAndrey V. Elsukov * expect M_PKTHDR presence.
1268c33a2313SAndrey V. Elsukov */
1269c33a2313SAndrey V. Elsukov m0->m_pkthdr.rcvif = NULL;
1270c33a2313SAndrey V. Elsukov m0->m_pkthdr.flowid = 0;
1271c33a2313SAndrey V. Elsukov m0->m_pkthdr.csum_flags = 0;
1272c33a2313SAndrey V. Elsukov m0->m_pkthdr.fibnum = 0;
1273c33a2313SAndrey V. Elsukov m0->m_pkthdr.rsstype = 0;
1274c33a2313SAndrey V. Elsukov }
12758de34a88SAlan Somers if (ctrl_last)
12768de34a88SAlan Somers ctrl_last->m_next = m0; /* concatenate data to control */
1277df8bae1dSRodney W. Grimes else
1278df8bae1dSRodney W. Grimes control = m0;
1279df8bae1dSRodney W. Grimes m->m_next = control;
1280395bb186SSam Leffler for (n = m; n->m_next != NULL; n = n->m_next)
1281df8bae1dSRodney W. Grimes sballoc(sb, n);
1282395bb186SSam Leffler sballoc(sb, n);
1283395bb186SSam Leffler nlast = n;
1284395bb186SSam Leffler SBLINKRECORD(sb, m);
1285395bb186SSam Leffler
1286395bb186SSam Leffler sb->sb_mbtail = nlast;
1287395bb186SSam Leffler SBLASTMBUFCHK(sb);
1288395bb186SSam Leffler
1289395bb186SSam Leffler SBLASTRECORDCHK(sb);
1290df8bae1dSRodney W. Grimes return (1);
1291df8bae1dSRodney W. Grimes }
1292df8bae1dSRodney W. Grimes
1293a34b7046SRobert Watson /*
1294050ac265SRobert Watson * Append address and data, and optionally, control (ancillary) data to the
1295050ac265SRobert Watson * receive queue of a socket. If present, m0 must include a packet header
1296050ac265SRobert Watson * with total length. Returns 0 if no space in sockbuf or insufficient
1297050ac265SRobert Watson * mbufs.
1298a34b7046SRobert Watson */
129926f9a767SRodney W. Grimes int
sbappendaddr_locked(struct sockbuf * sb,const struct sockaddr * asa,struct mbuf * m0,struct mbuf * control)13008de34a88SAlan Somers sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
13018de34a88SAlan Somers struct mbuf *m0, struct mbuf *control)
13028de34a88SAlan Somers {
13038de34a88SAlan Somers struct mbuf *ctrl_last;
13048de34a88SAlan Somers int space = asa->sa_len;
13058de34a88SAlan Somers
13068de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb);
13078de34a88SAlan Somers
13088de34a88SAlan Somers if (m0 && (m0->m_flags & M_PKTHDR) == 0)
13098de34a88SAlan Somers panic("sbappendaddr_locked");
13108de34a88SAlan Somers if (m0)
13118de34a88SAlan Somers space += m0->m_pkthdr.len;
13128de34a88SAlan Somers space += m_length(control, &ctrl_last);
13138de34a88SAlan Somers
13148de34a88SAlan Somers if (space > sbspace(sb))
13158de34a88SAlan Somers return (0);
13168de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
13178de34a88SAlan Somers }
13188de34a88SAlan Somers
13198de34a88SAlan Somers /*
13208de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the
13218de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header
13228de34a88SAlan Somers * with total length. Returns 0 if insufficient mbufs. Does not validate space
13238de34a88SAlan Somers * on the receiving sockbuf.
13248de34a88SAlan Somers */
13258de34a88SAlan Somers int
sbappendaddr_nospacecheck_locked(struct sockbuf * sb,const struct sockaddr * asa,struct mbuf * m0,struct mbuf * control)13268de34a88SAlan Somers sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
13278de34a88SAlan Somers struct mbuf *m0, struct mbuf *control)
13288de34a88SAlan Somers {
13298de34a88SAlan Somers struct mbuf *ctrl_last;
13308de34a88SAlan Somers
13318de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb);
13328de34a88SAlan Somers
13338de34a88SAlan Somers ctrl_last = (control == NULL) ? NULL : m_last(control);
13348de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
13358de34a88SAlan Somers }
13368de34a88SAlan Somers
13378de34a88SAlan Somers /*
13388de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the
13398de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header
13408de34a88SAlan Somers * with total length. Returns 0 if no space in sockbuf or insufficient
13418de34a88SAlan Somers * mbufs.
13428de34a88SAlan Somers */
13438de34a88SAlan Somers int
sbappendaddr(struct sockbuf * sb,const struct sockaddr * asa,struct mbuf * m0,struct mbuf * control)1344050ac265SRobert Watson sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
1345050ac265SRobert Watson struct mbuf *m0, struct mbuf *control)
1346a34b7046SRobert Watson {
1347a34b7046SRobert Watson int retval;
1348a34b7046SRobert Watson
1349a34b7046SRobert Watson SOCKBUF_LOCK(sb);
1350a34b7046SRobert Watson retval = sbappendaddr_locked(sb, asa, m0, control);
1351a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
1352a34b7046SRobert Watson return (retval);
1353a34b7046SRobert Watson }
1354a34b7046SRobert Watson
13555b0480f2SMark Johnston void
sbappendcontrol_locked(struct sockbuf * sb,struct mbuf * m0,struct mbuf * control,int flags)1356050ac265SRobert Watson sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
135725f4ddfbSMark Johnston struct mbuf *control, int flags)
1358df8bae1dSRodney W. Grimes {
13595b0480f2SMark Johnston struct mbuf *m, *mlast;
1360df8bae1dSRodney W. Grimes
136130f8cb81SMark Johnston if (m0 != NULL)
1362ec45f952SMark Johnston kmsan_check_mbuf(m0, "sbappend");
1363ec45f952SMark Johnston kmsan_check_mbuf(control, "sbappend");
1364ec45f952SMark Johnston
136525f4ddfbSMark Johnston sbm_clrprotoflags(m0, flags);
13665b0480f2SMark Johnston m_last(control)->m_next = m0;
1367395bb186SSam Leffler
1368395bb186SSam Leffler SBLASTRECORDCHK(sb);
1369395bb186SSam Leffler
1370395bb186SSam Leffler for (m = control; m->m_next; m = m->m_next)
1371df8bae1dSRodney W. Grimes sballoc(sb, m);
1372395bb186SSam Leffler sballoc(sb, m);
1373395bb186SSam Leffler mlast = m;
1374395bb186SSam Leffler SBLINKRECORD(sb, control);
1375395bb186SSam Leffler
1376395bb186SSam Leffler sb->sb_mbtail = mlast;
1377395bb186SSam Leffler SBLASTMBUFCHK(sb);
1378395bb186SSam Leffler
1379395bb186SSam Leffler SBLASTRECORDCHK(sb);
1380df8bae1dSRodney W. Grimes }
1381df8bae1dSRodney W. Grimes
13825b0480f2SMark Johnston void
sbappendcontrol(struct sockbuf * sb,struct mbuf * m0,struct mbuf * control,int flags)138325f4ddfbSMark Johnston sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control,
138425f4ddfbSMark Johnston int flags)
1385a34b7046SRobert Watson {
1386a34b7046SRobert Watson
1387a34b7046SRobert Watson SOCKBUF_LOCK(sb);
138825f4ddfbSMark Johnston sbappendcontrol_locked(sb, m0, control, flags);
1389a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
1390a34b7046SRobert Watson }
1391a34b7046SRobert Watson
1392df8bae1dSRodney W. Grimes /*
13937da7362bSRobert Watson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
13947da7362bSRobert Watson * (n). If (n) is NULL, the buffer is presumed empty.
13957da7362bSRobert Watson *
13967da7362bSRobert Watson * When the data is compressed, mbufs in the chain may be handled in one of
13977da7362bSRobert Watson * three ways:
13987da7362bSRobert Watson *
13997da7362bSRobert Watson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
14007da7362bSRobert Watson * record boundary, and no change in data type).
14017da7362bSRobert Watson *
14027da7362bSRobert Watson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
14037da7362bSRobert Watson * an mbuf already in the socket buffer. This can occur if an
14040f9d0a73SGleb Smirnoff * appropriate mbuf exists, there is room, both mbufs are not marked as
14050f9d0a73SGleb Smirnoff * not ready, and no merging of data types will occur.
14067da7362bSRobert Watson *
14077da7362bSRobert Watson * (3) The mbuf may be appended to the end of the existing mbuf chain.
14087da7362bSRobert Watson *
14097da7362bSRobert Watson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
14107da7362bSRobert Watson * end-of-record.
1411df8bae1dSRodney W. Grimes */
141226f9a767SRodney W. Grimes void
sbcompress(struct sockbuf * sb,struct mbuf * m,struct mbuf * n)1413050ac265SRobert Watson sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
1414df8bae1dSRodney W. Grimes {
1415050ac265SRobert Watson int eor = 0;
1416050ac265SRobert Watson struct mbuf *o;
1417df8bae1dSRodney W. Grimes
1418a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
1419a34b7046SRobert Watson
1420df8bae1dSRodney W. Grimes while (m) {
1421df8bae1dSRodney W. Grimes eor |= m->m_flags & M_EOR;
1422df8bae1dSRodney W. Grimes if (m->m_len == 0 &&
1423df8bae1dSRodney W. Grimes (eor == 0 ||
1424df8bae1dSRodney W. Grimes (((o = m->m_next) || (o = n)) &&
1425df8bae1dSRodney W. Grimes o->m_type == m->m_type))) {
1426395bb186SSam Leffler if (sb->sb_lastrecord == m)
1427395bb186SSam Leffler sb->sb_lastrecord = m->m_next;
1428df8bae1dSRodney W. Grimes m = m_free(m);
1429df8bae1dSRodney W. Grimes continue;
1430df8bae1dSRodney W. Grimes }
143132af0d74SDavid Malone if (n && (n->m_flags & M_EOR) == 0 &&
143232af0d74SDavid Malone M_WRITABLE(n) &&
14335e0f5cfaSKip Macy ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
14340f9d0a73SGleb Smirnoff !(m->m_flags & M_NOTREADY) &&
14356edfd179SGleb Smirnoff !(n->m_flags & (M_NOTREADY | M_EXTPG)) &&
1436b2e60773SJohn Baldwin !mbuf_has_tls_session(m) &&
1437b2e60773SJohn Baldwin !mbuf_has_tls_session(n) &&
143832af0d74SDavid Malone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
143932af0d74SDavid Malone m->m_len <= M_TRAILINGSPACE(n) &&
1440df8bae1dSRodney W. Grimes n->m_type == m->m_type) {
144182334850SJohn Baldwin m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
1442df8bae1dSRodney W. Grimes n->m_len += m->m_len;
14430f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len;
14440f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL)
14450f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len;
144634333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
1447b3f1af6bSTim J. Robbins /* XXX: Probably don't need.*/
144804ac9b97SKelly Yancey sb->sb_ctl += m->m_len;
1449df8bae1dSRodney W. Grimes m = m_free(m);
1450df8bae1dSRodney W. Grimes continue;
1451df8bae1dSRodney W. Grimes }
14526edfd179SGleb Smirnoff if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) &&
1453b2e60773SJohn Baldwin (m->m_flags & M_NOTREADY) == 0 &&
1454b2e60773SJohn Baldwin !mbuf_has_tls_session(m))
145582334850SJohn Baldwin (void)mb_unmapped_compress(m);
1456df8bae1dSRodney W. Grimes if (n)
1457df8bae1dSRodney W. Grimes n->m_next = m;
1458df8bae1dSRodney W. Grimes else
1459df8bae1dSRodney W. Grimes sb->sb_mb = m;
1460395bb186SSam Leffler sb->sb_mbtail = m;
1461df8bae1dSRodney W. Grimes sballoc(sb, m);
1462df8bae1dSRodney W. Grimes n = m;
1463df8bae1dSRodney W. Grimes m->m_flags &= ~M_EOR;
1464df8bae1dSRodney W. Grimes m = m->m_next;
1465df8bae1dSRodney W. Grimes n->m_next = 0;
1466df8bae1dSRodney W. Grimes }
1467df8bae1dSRodney W. Grimes if (eor) {
14687da7362bSRobert Watson KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
1469df8bae1dSRodney W. Grimes n->m_flags |= eor;
1470df8bae1dSRodney W. Grimes }
1471395bb186SSam Leffler SBLASTMBUFCHK(sb);
1472df8bae1dSRodney W. Grimes }
1473df8bae1dSRodney W. Grimes
14743c0e5685SJohn Baldwin #ifdef KERN_TLS
14753c0e5685SJohn Baldwin /*
14763c0e5685SJohn Baldwin * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs
14773c0e5685SJohn Baldwin * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
14783c0e5685SJohn Baldwin * a bit simpler (no EOR markers, always MT_DATA, etc.).
14793c0e5685SJohn Baldwin */
14803c0e5685SJohn Baldwin static void
sbcompress_ktls_rx(struct sockbuf * sb,struct mbuf * m,struct mbuf * n)14813c0e5685SJohn Baldwin sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
14823c0e5685SJohn Baldwin {
14833c0e5685SJohn Baldwin
14843c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb);
14853c0e5685SJohn Baldwin
14863c0e5685SJohn Baldwin while (m) {
14873c0e5685SJohn Baldwin KASSERT((m->m_flags & M_EOR) == 0,
14883c0e5685SJohn Baldwin ("TLS RX mbuf %p with EOR", m));
14893c0e5685SJohn Baldwin KASSERT(m->m_type == MT_DATA,
14903c0e5685SJohn Baldwin ("TLS RX mbuf %p is not MT_DATA", m));
14913c0e5685SJohn Baldwin KASSERT((m->m_flags & M_NOTREADY) != 0,
14923c0e5685SJohn Baldwin ("TLS RX mbuf %p ready", m));
14933c0e5685SJohn Baldwin KASSERT((m->m_flags & M_EXTPG) == 0,
14943c0e5685SJohn Baldwin ("TLS RX mbuf %p unmapped", m));
14953c0e5685SJohn Baldwin
14963c0e5685SJohn Baldwin if (m->m_len == 0) {
14973c0e5685SJohn Baldwin m = m_free(m);
14983c0e5685SJohn Baldwin continue;
14993c0e5685SJohn Baldwin }
15003c0e5685SJohn Baldwin
15013c0e5685SJohn Baldwin /*
15023c0e5685SJohn Baldwin * Even though both 'n' and 'm' are NOTREADY, it's ok
15033c0e5685SJohn Baldwin * to coalesce the data.
15043c0e5685SJohn Baldwin */
15053c0e5685SJohn Baldwin if (n &&
15063c0e5685SJohn Baldwin M_WRITABLE(n) &&
15073c0e5685SJohn Baldwin ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
1508fe8c78f0SHans Petter Selasky !((m->m_flags ^ n->m_flags) & M_DECRYPTED) &&
1509fe8c78f0SHans Petter Selasky !(n->m_flags & M_EXTPG) &&
15103c0e5685SJohn Baldwin m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
15113c0e5685SJohn Baldwin m->m_len <= M_TRAILINGSPACE(n)) {
15123c0e5685SJohn Baldwin m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
15133c0e5685SJohn Baldwin n->m_len += m->m_len;
15143c0e5685SJohn Baldwin sb->sb_ccc += m->m_len;
15153c0e5685SJohn Baldwin sb->sb_tlscc += m->m_len;
15163c0e5685SJohn Baldwin m = m_free(m);
15173c0e5685SJohn Baldwin continue;
15183c0e5685SJohn Baldwin }
15193c0e5685SJohn Baldwin if (n)
15203c0e5685SJohn Baldwin n->m_next = m;
15213c0e5685SJohn Baldwin else
15223c0e5685SJohn Baldwin sb->sb_mtls = m;
15233c0e5685SJohn Baldwin sb->sb_mtlstail = m;
15243c0e5685SJohn Baldwin sballoc_ktls_rx(sb, m);
15253c0e5685SJohn Baldwin n = m;
15263c0e5685SJohn Baldwin m = m->m_next;
15273c0e5685SJohn Baldwin n->m_next = NULL;
15283c0e5685SJohn Baldwin }
15293c0e5685SJohn Baldwin SBLASTMBUFCHK(sb);
15303c0e5685SJohn Baldwin }
15313c0e5685SJohn Baldwin #endif
15323c0e5685SJohn Baldwin
1533df8bae1dSRodney W. Grimes /*
1534050ac265SRobert Watson * Free all mbufs in a sockbuf. Check that all resources are reclaimed.
1535df8bae1dSRodney W. Grimes */
1536371392bcSGleb Smirnoff void
sbflush_locked(struct sockbuf * sb)1537371392bcSGleb Smirnoff sbflush_locked(struct sockbuf *sb)
1538df8bae1dSRodney W. Grimes {
1539df8bae1dSRodney W. Grimes
1540371392bcSGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb);
1541371392bcSGleb Smirnoff
15423c0e5685SJohn Baldwin while (sb->sb_mbcnt || sb->sb_tlsdcc) {
154323f84772SPierre Beyssac /*
1544761a9a1fSGleb Smirnoff * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
154523f84772SPierre Beyssac * we would loop forever. Panic instead.
154623f84772SPierre Beyssac */
15470f9d0a73SGleb Smirnoff if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
154823f84772SPierre Beyssac break;
15490f9d0a73SGleb Smirnoff m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
155023f84772SPierre Beyssac }
15510f9d0a73SGleb Smirnoff KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
15520f9d0a73SGleb Smirnoff ("%s: ccc %u mb %p mbcnt %u", __func__,
15530f9d0a73SGleb Smirnoff sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
1554a34b7046SRobert Watson }
1555a34b7046SRobert Watson
1556a34b7046SRobert Watson void
sbflush(struct sockbuf * sb)1557050ac265SRobert Watson sbflush(struct sockbuf *sb)
1558a34b7046SRobert Watson {
1559a34b7046SRobert Watson
1560a34b7046SRobert Watson SOCKBUF_LOCK(sb);
1561a34b7046SRobert Watson sbflush_locked(sb);
1562a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
1563df8bae1dSRodney W. Grimes }
1564df8bae1dSRodney W. Grimes
1565df8bae1dSRodney W. Grimes /*
15661d2df300SGleb Smirnoff * Cut data from (the front of) a sockbuf.
1567df8bae1dSRodney W. Grimes */
15681d2df300SGleb Smirnoff static struct mbuf *
sbcut_internal(struct sockbuf * sb,int len)15691d2df300SGleb Smirnoff sbcut_internal(struct sockbuf *sb, int len)
1570df8bae1dSRodney W. Grimes {
15710f9d0a73SGleb Smirnoff struct mbuf *m, *next, *mfree;
15723c0e5685SJohn Baldwin bool is_tls;
1573df8bae1dSRodney W. Grimes
1574f41b2de7SHiren Panchasara KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
1575b5b023b9SHiren Panchasara __func__, len));
1576b5b023b9SHiren Panchasara KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
1577b5b023b9SHiren Panchasara __func__, len, sb->sb_ccc));
1578b5b023b9SHiren Panchasara
1579df8bae1dSRodney W. Grimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
15803c0e5685SJohn Baldwin is_tls = false;
15811d2df300SGleb Smirnoff mfree = NULL;
15821d2df300SGleb Smirnoff
1583df8bae1dSRodney W. Grimes while (len > 0) {
15848146bcfeSGleb Smirnoff if (m == NULL) {
15853c0e5685SJohn Baldwin #ifdef KERN_TLS
15863c0e5685SJohn Baldwin if (next == NULL && !is_tls) {
15873c0e5685SJohn Baldwin if (sb->sb_tlsdcc != 0) {
15883c0e5685SJohn Baldwin MPASS(len >= sb->sb_tlsdcc);
15893c0e5685SJohn Baldwin len -= sb->sb_tlsdcc;
15903c0e5685SJohn Baldwin sb->sb_ccc -= sb->sb_tlsdcc;
15913c0e5685SJohn Baldwin sb->sb_tlsdcc = 0;
15923c0e5685SJohn Baldwin if (len == 0)
15933c0e5685SJohn Baldwin break;
15943c0e5685SJohn Baldwin }
15953c0e5685SJohn Baldwin next = sb->sb_mtls;
15963c0e5685SJohn Baldwin is_tls = true;
15973c0e5685SJohn Baldwin }
15983c0e5685SJohn Baldwin #endif
15998146bcfeSGleb Smirnoff KASSERT(next, ("%s: no next, len %d", __func__, len));
1600df8bae1dSRodney W. Grimes m = next;
1601df8bae1dSRodney W. Grimes next = m->m_nextpkt;
1602df8bae1dSRodney W. Grimes }
1603df8bae1dSRodney W. Grimes if (m->m_len > len) {
16040f9d0a73SGleb Smirnoff KASSERT(!(m->m_flags & M_NOTAVAIL),
16050f9d0a73SGleb Smirnoff ("%s: m %p M_NOTAVAIL", __func__, m));
1606df8bae1dSRodney W. Grimes m->m_len -= len;
1607df8bae1dSRodney W. Grimes m->m_data += len;
16080f9d0a73SGleb Smirnoff sb->sb_ccc -= len;
16090f9d0a73SGleb Smirnoff sb->sb_acc -= len;
16104e023759SAndre Oppermann if (sb->sb_sndptroff != 0)
16114e023759SAndre Oppermann sb->sb_sndptroff -= len;
161234333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
161304ac9b97SKelly Yancey sb->sb_ctl -= len;
1614df8bae1dSRodney W. Grimes break;
1615df8bae1dSRodney W. Grimes }
1616df8bae1dSRodney W. Grimes len -= m->m_len;
16173c0e5685SJohn Baldwin #ifdef KERN_TLS
16183c0e5685SJohn Baldwin if (is_tls)
16193c0e5685SJohn Baldwin sbfree_ktls_rx(sb, m);
16203c0e5685SJohn Baldwin else
16213c0e5685SJohn Baldwin #endif
1622df8bae1dSRodney W. Grimes sbfree(sb, m);
16230f9d0a73SGleb Smirnoff /*
16240f9d0a73SGleb Smirnoff * Do not put M_NOTREADY buffers to the free list, they
16250f9d0a73SGleb Smirnoff * are referenced from outside.
16260f9d0a73SGleb Smirnoff */
16273c0e5685SJohn Baldwin if (m->m_flags & M_NOTREADY && !is_tls)
16280f9d0a73SGleb Smirnoff m = m->m_next;
16290f9d0a73SGleb Smirnoff else {
16300f9d0a73SGleb Smirnoff struct mbuf *n;
16310f9d0a73SGleb Smirnoff
16321d2df300SGleb Smirnoff n = m->m_next;
16331d2df300SGleb Smirnoff m->m_next = mfree;
16341d2df300SGleb Smirnoff mfree = m;
16351d2df300SGleb Smirnoff m = n;
1636df8bae1dSRodney W. Grimes }
16370f9d0a73SGleb Smirnoff }
1638e834a840SGleb Smirnoff /*
1639e834a840SGleb Smirnoff * Free any zero-length mbufs from the buffer.
1640e834a840SGleb Smirnoff * For SOCK_DGRAM sockets such mbufs represent empty records.
1641e834a840SGleb Smirnoff * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
1642e834a840SGleb Smirnoff * when sosend_generic() needs to send only control data.
1643e834a840SGleb Smirnoff */
1644e834a840SGleb Smirnoff while (m && m->m_len == 0) {
1645e834a840SGleb Smirnoff struct mbuf *n;
1646e834a840SGleb Smirnoff
1647e834a840SGleb Smirnoff sbfree(sb, m);
1648e834a840SGleb Smirnoff n = m->m_next;
1649e834a840SGleb Smirnoff m->m_next = mfree;
1650e834a840SGleb Smirnoff mfree = m;
1651e834a840SGleb Smirnoff m = n;
1652e834a840SGleb Smirnoff }
16533c0e5685SJohn Baldwin #ifdef KERN_TLS
16543c0e5685SJohn Baldwin if (is_tls) {
16553c0e5685SJohn Baldwin sb->sb_mb = NULL;
16563c0e5685SJohn Baldwin sb->sb_mtls = m;
16573c0e5685SJohn Baldwin if (m == NULL)
16583c0e5685SJohn Baldwin sb->sb_mtlstail = NULL;
16593c0e5685SJohn Baldwin } else
16603c0e5685SJohn Baldwin #endif
1661df8bae1dSRodney W. Grimes if (m) {
1662df8bae1dSRodney W. Grimes sb->sb_mb = m;
1663df8bae1dSRodney W. Grimes m->m_nextpkt = next;
1664df8bae1dSRodney W. Grimes } else
1665df8bae1dSRodney W. Grimes sb->sb_mb = next;
1666395bb186SSam Leffler /*
1667050ac265SRobert Watson * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure
1668050ac265SRobert Watson * sb_lastrecord is up-to-date if we dropped part of the last record.
1669395bb186SSam Leffler */
1670395bb186SSam Leffler m = sb->sb_mb;
1671395bb186SSam Leffler if (m == NULL) {
1672395bb186SSam Leffler sb->sb_mbtail = NULL;
1673395bb186SSam Leffler sb->sb_lastrecord = NULL;
1674395bb186SSam Leffler } else if (m->m_nextpkt == NULL) {
1675395bb186SSam Leffler sb->sb_lastrecord = m;
1676395bb186SSam Leffler }
16771d2df300SGleb Smirnoff
16781d2df300SGleb Smirnoff return (mfree);
1679df8bae1dSRodney W. Grimes }
1680df8bae1dSRodney W. Grimes
1681df8bae1dSRodney W. Grimes /*
1682a34b7046SRobert Watson * Drop data from (the front of) a sockbuf.
1683a34b7046SRobert Watson */
1684a34b7046SRobert Watson void
sbdrop_locked(struct sockbuf * sb,int len)1685050ac265SRobert Watson sbdrop_locked(struct sockbuf *sb, int len)
1686eaa6dfbcSRobert Watson {
1687eaa6dfbcSRobert Watson
1688eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb);
16891d2df300SGleb Smirnoff m_freem(sbcut_internal(sb, len));
16901d2df300SGleb Smirnoff }
1691eaa6dfbcSRobert Watson
16921d2df300SGleb Smirnoff /*
16931d2df300SGleb Smirnoff * Drop data from (the front of) a sockbuf,
16941d2df300SGleb Smirnoff * and return it to caller.
16951d2df300SGleb Smirnoff */
16961d2df300SGleb Smirnoff struct mbuf *
sbcut_locked(struct sockbuf * sb,int len)16971d2df300SGleb Smirnoff sbcut_locked(struct sockbuf *sb, int len)
16981d2df300SGleb Smirnoff {
16991d2df300SGleb Smirnoff
17001d2df300SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb);
17011d2df300SGleb Smirnoff return (sbcut_internal(sb, len));
1702eaa6dfbcSRobert Watson }
1703eaa6dfbcSRobert Watson
1704eaa6dfbcSRobert Watson void
sbdrop(struct sockbuf * sb,int len)1705050ac265SRobert Watson sbdrop(struct sockbuf *sb, int len)
1706a34b7046SRobert Watson {
17071d2df300SGleb Smirnoff struct mbuf *mfree;
1708a34b7046SRobert Watson
1709a34b7046SRobert Watson SOCKBUF_LOCK(sb);
17101d2df300SGleb Smirnoff mfree = sbcut_internal(sb, len);
1711a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
17121d2df300SGleb Smirnoff
17131d2df300SGleb Smirnoff m_freem(mfree);
1714a34b7046SRobert Watson }
1715a34b7046SRobert Watson
171689e560f4SRandall Stewart struct mbuf *
sbsndptr_noadv(struct sockbuf * sb,uint32_t off,uint32_t * moff)171789e560f4SRandall Stewart sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
171889e560f4SRandall Stewart {
171989e560f4SRandall Stewart struct mbuf *m;
172089e560f4SRandall Stewart
172189e560f4SRandall Stewart KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
172289e560f4SRandall Stewart if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
172389e560f4SRandall Stewart *moff = off;
172489e560f4SRandall Stewart if (sb->sb_sndptr == NULL) {
172589e560f4SRandall Stewart sb->sb_sndptr = sb->sb_mb;
172689e560f4SRandall Stewart sb->sb_sndptroff = 0;
172789e560f4SRandall Stewart }
172889e560f4SRandall Stewart return (sb->sb_mb);
172989e560f4SRandall Stewart } else {
173089e560f4SRandall Stewart m = sb->sb_sndptr;
173189e560f4SRandall Stewart off -= sb->sb_sndptroff;
173289e560f4SRandall Stewart }
173389e560f4SRandall Stewart *moff = off;
173489e560f4SRandall Stewart return (m);
173589e560f4SRandall Stewart }
173689e560f4SRandall Stewart
173789e560f4SRandall Stewart void
sbsndptr_adv(struct sockbuf * sb,struct mbuf * mb,uint32_t len)173889e560f4SRandall Stewart sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
173989e560f4SRandall Stewart {
174089e560f4SRandall Stewart /*
174189e560f4SRandall Stewart * A small copy was done, advance forward the sb_sbsndptr to cover
174289e560f4SRandall Stewart * it.
174389e560f4SRandall Stewart */
174489e560f4SRandall Stewart struct mbuf *m;
174589e560f4SRandall Stewart
174689e560f4SRandall Stewart if (mb != sb->sb_sndptr) {
174789e560f4SRandall Stewart /* Did not copyout at the same mbuf */
174889e560f4SRandall Stewart return;
174989e560f4SRandall Stewart }
175089e560f4SRandall Stewart m = mb;
175189e560f4SRandall Stewart while (m && (len > 0)) {
175289e560f4SRandall Stewart if (len >= m->m_len) {
175389e560f4SRandall Stewart len -= m->m_len;
175489e560f4SRandall Stewart if (m->m_next) {
175589e560f4SRandall Stewart sb->sb_sndptroff += m->m_len;
175689e560f4SRandall Stewart sb->sb_sndptr = m->m_next;
175789e560f4SRandall Stewart }
175889e560f4SRandall Stewart m = m->m_next;
175989e560f4SRandall Stewart } else {
176089e560f4SRandall Stewart len = 0;
176189e560f4SRandall Stewart }
176289e560f4SRandall Stewart }
176389e560f4SRandall Stewart }
176489e560f4SRandall Stewart
1765a34b7046SRobert Watson /*
17669fd573c3SHans Petter Selasky * Return the first mbuf and the mbuf data offset for the provided
17679fd573c3SHans Petter Selasky * send offset without changing the "sb_sndptroff" field.
17689fd573c3SHans Petter Selasky */
17699fd573c3SHans Petter Selasky struct mbuf *
sbsndmbuf(struct sockbuf * sb,u_int off,u_int * moff)17709fd573c3SHans Petter Selasky sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
17719fd573c3SHans Petter Selasky {
17729fd573c3SHans Petter Selasky struct mbuf *m;
17739fd573c3SHans Petter Selasky
17749fd573c3SHans Petter Selasky KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
17759fd573c3SHans Petter Selasky
17769fd573c3SHans Petter Selasky /*
17779fd573c3SHans Petter Selasky * If the "off" is below the stored offset, which happens on
17789fd573c3SHans Petter Selasky * retransmits, just use "sb_mb":
17799fd573c3SHans Petter Selasky */
17809fd573c3SHans Petter Selasky if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
17819fd573c3SHans Petter Selasky m = sb->sb_mb;
17829fd573c3SHans Petter Selasky } else {
17839fd573c3SHans Petter Selasky m = sb->sb_sndptr;
17849fd573c3SHans Petter Selasky off -= sb->sb_sndptroff;
17859fd573c3SHans Petter Selasky }
17869fd573c3SHans Petter Selasky while (off > 0 && m != NULL) {
17879fd573c3SHans Petter Selasky if (off < m->m_len)
17889fd573c3SHans Petter Selasky break;
17899fd573c3SHans Petter Selasky off -= m->m_len;
17909fd573c3SHans Petter Selasky m = m->m_next;
17919fd573c3SHans Petter Selasky }
17929fd573c3SHans Petter Selasky *moff = off;
17939fd573c3SHans Petter Selasky return (m);
17949fd573c3SHans Petter Selasky }
17959fd573c3SHans Petter Selasky
17969fd573c3SHans Petter Selasky /*
1797050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the
1798050ac265SRobert Watson * front.
1799df8bae1dSRodney W. Grimes */
180026f9a767SRodney W. Grimes void
sbdroprecord_locked(struct sockbuf * sb)1801050ac265SRobert Watson sbdroprecord_locked(struct sockbuf *sb)
1802df8bae1dSRodney W. Grimes {
1803050ac265SRobert Watson struct mbuf *m;
1804df8bae1dSRodney W. Grimes
1805a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb);
1806a34b7046SRobert Watson
1807df8bae1dSRodney W. Grimes m = sb->sb_mb;
1808df8bae1dSRodney W. Grimes if (m) {
1809df8bae1dSRodney W. Grimes sb->sb_mb = m->m_nextpkt;
1810df8bae1dSRodney W. Grimes do {
1811df8bae1dSRodney W. Grimes sbfree(sb, m);
1812ecde8f7cSMatthew Dillon m = m_free(m);
1813797f2d22SPoul-Henning Kamp } while (m);
1814df8bae1dSRodney W. Grimes }
1815395bb186SSam Leffler SB_EMPTY_FIXUP(sb);
1816df8bae1dSRodney W. Grimes }
18171e4ad9ceSGarrett Wollman
181882c23ebaSBill Fenner /*
1819050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the
1820050ac265SRobert Watson * front.
1821a34b7046SRobert Watson */
1822a34b7046SRobert Watson void
sbdroprecord(struct sockbuf * sb)1823050ac265SRobert Watson sbdroprecord(struct sockbuf *sb)
1824a34b7046SRobert Watson {
1825a34b7046SRobert Watson
1826a34b7046SRobert Watson SOCKBUF_LOCK(sb);
1827a34b7046SRobert Watson sbdroprecord_locked(sb);
1828a34b7046SRobert Watson SOCKBUF_UNLOCK(sb);
1829a34b7046SRobert Watson }
1830a34b7046SRobert Watson
183120d9e5e8SRobert Watson /*
18328c799760SRobert Watson * Create a "control" mbuf containing the specified data with the specified
18338c799760SRobert Watson * type for presentation on a socket buffer.
183420d9e5e8SRobert Watson */
183520d9e5e8SRobert Watson struct mbuf *
sbcreatecontrol(const void * p,u_int size,int type,int level,int wait)18366890b588SGleb Smirnoff sbcreatecontrol(const void *p, u_int size, int type, int level, int wait)
183720d9e5e8SRobert Watson {
1838d19e16a7SRobert Watson struct cmsghdr *cp;
183920d9e5e8SRobert Watson struct mbuf *m;
184020d9e5e8SRobert Watson
18413c0e5685SJohn Baldwin MBUF_CHECKSLEEP(wait);
18426890b588SGleb Smirnoff
18436890b588SGleb Smirnoff if (wait == M_NOWAIT) {
18446890b588SGleb Smirnoff if (CMSG_SPACE(size) > MCLBYTES)
18456890b588SGleb Smirnoff return (NULL);
18466890b588SGleb Smirnoff } else
1847ad51c47fSGleb Smirnoff KASSERT(CMSG_SPACE(size) <= MCLBYTES,
1848ad51c47fSGleb Smirnoff ("%s: passed CMSG_SPACE(%u) > MCLBYTES", __func__, size));
18496890b588SGleb Smirnoff
18506890b588SGleb Smirnoff if (CMSG_SPACE(size) > MLEN)
18513c0e5685SJohn Baldwin m = m_getcl(wait, MT_CONTROL, 0);
185220d9e5e8SRobert Watson else
18533c0e5685SJohn Baldwin m = m_get(wait, MT_CONTROL);
185420d9e5e8SRobert Watson if (m == NULL)
18556890b588SGleb Smirnoff return (NULL);
18566890b588SGleb Smirnoff
18576890b588SGleb Smirnoff KASSERT(CMSG_SPACE(size) <= M_TRAILINGSPACE(m),
185820d9e5e8SRobert Watson ("sbcreatecontrol: short mbuf"));
18592827952eSXin LI /*
18602827952eSXin LI * Don't leave the padding between the msg header and the
18612827952eSXin LI * cmsg data and the padding after the cmsg data un-initialized.
18622827952eSXin LI */
18636890b588SGleb Smirnoff cp = mtod(m, struct cmsghdr *);
18646890b588SGleb Smirnoff bzero(cp, CMSG_SPACE(size));
186520d9e5e8SRobert Watson if (p != NULL)
186620d9e5e8SRobert Watson (void)memcpy(CMSG_DATA(cp), p, size);
186720d9e5e8SRobert Watson m->m_len = CMSG_SPACE(size);
186820d9e5e8SRobert Watson cp->cmsg_len = CMSG_LEN(size);
186920d9e5e8SRobert Watson cp->cmsg_level = level;
187020d9e5e8SRobert Watson cp->cmsg_type = type;
187120d9e5e8SRobert Watson return (m);
187220d9e5e8SRobert Watson }
187320d9e5e8SRobert Watson
187420d9e5e8SRobert Watson /*
18758c799760SRobert Watson * This does the same for socket buffers that sotoxsocket does for sockets:
18768c799760SRobert Watson * generate an user-format data structure describing the socket buffer. Note
18778c799760SRobert Watson * that the xsockbuf structure, since it is always embedded in a socket, does
18788c799760SRobert Watson * not include a self pointer nor a length. We make this entry point public
18798c799760SRobert Watson * in case some other mechanism needs it.
188020d9e5e8SRobert Watson */
188120d9e5e8SRobert Watson void
sbtoxsockbuf(struct sockbuf * sb,struct xsockbuf * xsb)188220d9e5e8SRobert Watson sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
188320d9e5e8SRobert Watson {
1884d19e16a7SRobert Watson
18850f9d0a73SGleb Smirnoff xsb->sb_cc = sb->sb_ccc;
188620d9e5e8SRobert Watson xsb->sb_hiwat = sb->sb_hiwat;
188720d9e5e8SRobert Watson xsb->sb_mbcnt = sb->sb_mbcnt;
188820d9e5e8SRobert Watson xsb->sb_mbmax = sb->sb_mbmax;
188920d9e5e8SRobert Watson xsb->sb_lowat = sb->sb_lowat;
189020d9e5e8SRobert Watson xsb->sb_flags = sb->sb_flags;
189120d9e5e8SRobert Watson xsb->sb_timeo = sb->sb_timeo;
189220d9e5e8SRobert Watson }
189320d9e5e8SRobert Watson
1894639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
1895639acc13SGarrett Wollman static int dummy;
1896e8cdbb48SPawel Biernacki SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, "");
18977029da5cSPawel Biernacki SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf,
1898fe27f1dbSAlexander Motin CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &sb_max, 0,
18997029da5cSPawel Biernacki sysctl_handle_sb_max, "LU",
19007029da5cSPawel Biernacki "Maximum socket buffer size");
19011b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
19023eb9ab52SEitan Adler &sb_efficiency, 0, "Socket buffer size waste factor");
1903