19454b2d8SWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 1569a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 34677b542eSDavid E. O'Brien #include <sys/cdefs.h> 35677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 36677b542eSDavid E. O'Brien 37b2e60773SJohn Baldwin #include "opt_kern_tls.h" 385b86eac4SJesper Skriver #include "opt_param.h" 39335654d7SRobert Watson 40df8bae1dSRodney W. Grimes #include <sys/param.h> 41960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */ 42ff5c09daSGarrett Wollman #include <sys/kernel.h> 43b2e60773SJohn Baldwin #include <sys/ktls.h> 44fb919e4dSMark Murray #include <sys/lock.h> 458ec07310SGleb Smirnoff #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47960ed29cSSeigo Tanimura #include <sys/mutex.h> 48fb919e4dSMark Murray #include <sys/proc.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 502f9a2132SBrian Feldman #include <sys/resourcevar.h> 51960ed29cSSeigo Tanimura #include <sys/signalvar.h> 52df8bae1dSRodney W. Grimes #include <sys/socket.h> 53df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 547abab911SRobert Watson #include <sys/sx.h> 55ff5c09daSGarrett Wollman #include <sys/sysctl.h> 5626f9a767SRodney W. Grimes 57f14cce87SRobert Watson /* 58f14cce87SRobert Watson * Function pointer set by the AIO routines so that the socket buffer code 59f14cce87SRobert Watson * can call back into the AIO module if it is loaded. 60f14cce87SRobert Watson */ 6121d56e9cSAlfred Perlstein void (*aio_swake)(struct socket *, struct sockbuf *); 6221d56e9cSAlfred Perlstein 63df8bae1dSRodney W. Grimes /* 64f14cce87SRobert Watson * Primitive routines for operating on socket buffers 65df8bae1dSRodney W. Grimes */ 66df8bae1dSRodney W. Grimes 6779cb7eb4SDavid Greenman u_long sb_max = SB_MAX; 6858d14daeSMohan Srinivasan u_long sb_max_adj = 69b233773bSBjoern A. Zeeb (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 70df8bae1dSRodney W. Grimes 714b29bc4fSGarrett Wollman static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 724b29bc4fSGarrett Wollman 73d1385ab2SMateusz Guzik #ifdef KERN_TLS 743c0e5685SJohn Baldwin static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, 753c0e5685SJohn Baldwin struct mbuf *n); 76d1385ab2SMateusz Guzik #endif 771d2df300SGleb Smirnoff static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); 78050ac265SRobert Watson static void sbflush_internal(struct sockbuf *sb); 79eaa6dfbcSRobert Watson 80df8bae1dSRodney W. Grimes /* 81829fae90SGleb Smirnoff * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. 82829fae90SGleb Smirnoff */ 83829fae90SGleb Smirnoff static void 84829fae90SGleb Smirnoff sbm_clrprotoflags(struct mbuf *m, int flags) 85829fae90SGleb Smirnoff { 86829fae90SGleb Smirnoff int mask; 87829fae90SGleb Smirnoff 88829fae90SGleb Smirnoff mask = ~M_PROTOFLAGS; 89829fae90SGleb Smirnoff if (flags & PRUS_NOTREADY) 90829fae90SGleb Smirnoff mask |= M_NOTREADY; 91829fae90SGleb Smirnoff while (m) { 92829fae90SGleb Smirnoff m->m_flags &= mask; 93829fae90SGleb Smirnoff m = m->m_next; 94829fae90SGleb Smirnoff } 95829fae90SGleb Smirnoff } 96829fae90SGleb Smirnoff 97829fae90SGleb Smirnoff /* 983807631bSJohn Baldwin * Compress M_NOTREADY mbufs after they have been readied by sbready(). 993807631bSJohn Baldwin * 1003807631bSJohn Baldwin * sbcompress() skips M_NOTREADY mbufs since the data is not available to 1013807631bSJohn Baldwin * be copied at the time of sbcompress(). This function combines small 1023807631bSJohn Baldwin * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first 1033807631bSJohn Baldwin * mbuf sbready() marked ready, and 'end' is the first mbuf still not 1043807631bSJohn Baldwin * ready. 1053807631bSJohn Baldwin */ 1063807631bSJohn Baldwin static void 1073807631bSJohn Baldwin sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end) 1083807631bSJohn Baldwin { 1093807631bSJohn Baldwin struct mbuf *m, *n; 1103807631bSJohn Baldwin int ext_size; 1113807631bSJohn Baldwin 1123807631bSJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 1133807631bSJohn Baldwin 1143807631bSJohn Baldwin if ((sb->sb_flags & SB_NOCOALESCE) != 0) 1153807631bSJohn Baldwin return; 1163807631bSJohn Baldwin 1173807631bSJohn Baldwin for (m = m0; m != end; m = m->m_next) { 1183807631bSJohn Baldwin MPASS((m->m_flags & M_NOTREADY) == 0); 119c4ad247bSAndrew Gallatin /* 120c4ad247bSAndrew Gallatin * NB: In sbcompress(), 'n' is the last mbuf in the 121c4ad247bSAndrew Gallatin * socket buffer and 'm' is the new mbuf being copied 122c4ad247bSAndrew Gallatin * into the trailing space of 'n'. Here, the roles 123c4ad247bSAndrew Gallatin * are reversed and 'n' is the next mbuf after 'm' 124c4ad247bSAndrew Gallatin * that is being copied into the trailing space of 125c4ad247bSAndrew Gallatin * 'm'. 126c4ad247bSAndrew Gallatin */ 127c4ad247bSAndrew Gallatin n = m->m_next; 128c4ad247bSAndrew Gallatin #ifdef KERN_TLS 129c4ad247bSAndrew Gallatin /* Try to coalesce adjacent ktls mbuf hdr/trailers. */ 130c4ad247bSAndrew Gallatin if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && 1316edfd179SGleb Smirnoff (m->m_flags & M_EXTPG) && 1326edfd179SGleb Smirnoff (n->m_flags & M_EXTPG) && 133c4ad247bSAndrew Gallatin !mbuf_has_tls_session(m) && 134c4ad247bSAndrew Gallatin !mbuf_has_tls_session(n)) { 135c4ad247bSAndrew Gallatin int hdr_len, trail_len; 136c4ad247bSAndrew Gallatin 1377b6c99d0SGleb Smirnoff hdr_len = n->m_epg_hdrlen; 1387b6c99d0SGleb Smirnoff trail_len = m->m_epg_trllen; 139c4ad247bSAndrew Gallatin if (trail_len != 0 && hdr_len != 0 && 140c4ad247bSAndrew Gallatin trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) { 141c4ad247bSAndrew Gallatin /* copy n's header to m's trailer */ 14223feb563SAndrew Gallatin memcpy(&m->m_epg_trail[trail_len], 14323feb563SAndrew Gallatin n->m_epg_hdr, hdr_len); 1447b6c99d0SGleb Smirnoff m->m_epg_trllen += hdr_len; 145c4ad247bSAndrew Gallatin m->m_len += hdr_len; 1467b6c99d0SGleb Smirnoff n->m_epg_hdrlen = 0; 147c4ad247bSAndrew Gallatin n->m_len -= hdr_len; 148c4ad247bSAndrew Gallatin } 149c4ad247bSAndrew Gallatin } 150c4ad247bSAndrew Gallatin #endif 1513807631bSJohn Baldwin 1523807631bSJohn Baldwin /* Compress small unmapped mbufs into plain mbufs. */ 1536edfd179SGleb Smirnoff if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN && 154b2e60773SJohn Baldwin !mbuf_has_tls_session(m)) { 1553807631bSJohn Baldwin ext_size = m->m_ext.ext_size; 156*d59bc188SGleb Smirnoff if (mb_unmapped_compress(m) == 0) 1573807631bSJohn Baldwin sb->sb_mbcnt -= ext_size; 1583807631bSJohn Baldwin } 1593807631bSJohn Baldwin 1603807631bSJohn Baldwin while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && 1613807631bSJohn Baldwin M_WRITABLE(m) && 1626edfd179SGleb Smirnoff (m->m_flags & M_EXTPG) == 0 && 163b2e60773SJohn Baldwin !mbuf_has_tls_session(n) && 164b2e60773SJohn Baldwin !mbuf_has_tls_session(m) && 1653807631bSJohn Baldwin n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 1663807631bSJohn Baldwin n->m_len <= M_TRAILINGSPACE(m) && 1673807631bSJohn Baldwin m->m_type == n->m_type) { 1683807631bSJohn Baldwin KASSERT(sb->sb_lastrecord != n, 1693807631bSJohn Baldwin ("%s: merging start of record (%p) into previous mbuf (%p)", 1703807631bSJohn Baldwin __func__, n, m)); 1713807631bSJohn Baldwin m_copydata(n, 0, n->m_len, mtodo(m, m->m_len)); 1723807631bSJohn Baldwin m->m_len += n->m_len; 1733807631bSJohn Baldwin m->m_next = n->m_next; 1743807631bSJohn Baldwin m->m_flags |= n->m_flags & M_EOR; 1753807631bSJohn Baldwin if (sb->sb_mbtail == n) 1763807631bSJohn Baldwin sb->sb_mbtail = m; 1773807631bSJohn Baldwin 1783807631bSJohn Baldwin sb->sb_mbcnt -= MSIZE; 179*d59bc188SGleb Smirnoff if (n->m_flags & M_EXT) 1803807631bSJohn Baldwin sb->sb_mbcnt -= n->m_ext.ext_size; 1813807631bSJohn Baldwin m_free(n); 1823807631bSJohn Baldwin n = m->m_next; 1833807631bSJohn Baldwin } 1843807631bSJohn Baldwin } 1853807631bSJohn Baldwin SBLASTRECORDCHK(sb); 1863807631bSJohn Baldwin SBLASTMBUFCHK(sb); 1873807631bSJohn Baldwin } 1883807631bSJohn Baldwin 1893807631bSJohn Baldwin /* 19082334850SJohn Baldwin * Mark ready "count" units of I/O starting with "m". Most mbufs 19161664ee7SGleb Smirnoff * count as a single unit of I/O except for M_EXTPG mbufs which 19261664ee7SGleb Smirnoff * are backed by multiple pages. 1930f9d0a73SGleb Smirnoff */ 1940f9d0a73SGleb Smirnoff int 19582334850SJohn Baldwin sbready(struct sockbuf *sb, struct mbuf *m0, int count) 1960f9d0a73SGleb Smirnoff { 19782334850SJohn Baldwin struct mbuf *m; 1980f9d0a73SGleb Smirnoff u_int blocker; 1990f9d0a73SGleb Smirnoff 2000f9d0a73SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2010f9d0a73SGleb Smirnoff KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); 20282334850SJohn Baldwin KASSERT(count > 0, ("%s: invalid count %d", __func__, count)); 2030f9d0a73SGleb Smirnoff 20482334850SJohn Baldwin m = m0; 2050f9d0a73SGleb Smirnoff blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; 2060f9d0a73SGleb Smirnoff 20782334850SJohn Baldwin while (count > 0) { 2080f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 2090f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 210c2a8fd6fSJohn Baldwin if ((m->m_flags & M_EXTPG) != 0 && m->m_epg_npgs != 0) { 2117b6c99d0SGleb Smirnoff if (count < m->m_epg_nrdy) { 2127b6c99d0SGleb Smirnoff m->m_epg_nrdy -= count; 21382334850SJohn Baldwin count = 0; 21482334850SJohn Baldwin break; 21582334850SJohn Baldwin } 2167b6c99d0SGleb Smirnoff count -= m->m_epg_nrdy; 2177b6c99d0SGleb Smirnoff m->m_epg_nrdy = 0; 21882334850SJohn Baldwin } else 21982334850SJohn Baldwin count--; 22082334850SJohn Baldwin 2210f9d0a73SGleb Smirnoff m->m_flags &= ~(M_NOTREADY | blocker); 2220f9d0a73SGleb Smirnoff if (blocker) 2230f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 22482334850SJohn Baldwin m = m->m_next; 2250f9d0a73SGleb Smirnoff } 2260f9d0a73SGleb Smirnoff 22782334850SJohn Baldwin /* 22882334850SJohn Baldwin * If the first mbuf is still not fully ready because only 22982334850SJohn Baldwin * some of its backing pages were readied, no further progress 23082334850SJohn Baldwin * can be made. 23182334850SJohn Baldwin */ 23282334850SJohn Baldwin if (m0 == m) { 23382334850SJohn Baldwin MPASS(m->m_flags & M_NOTREADY); 2340f9d0a73SGleb Smirnoff return (EINPROGRESS); 23582334850SJohn Baldwin } 23682334850SJohn Baldwin 23782334850SJohn Baldwin if (!blocker) { 2383807631bSJohn Baldwin sbready_compress(sb, m0, m); 23982334850SJohn Baldwin return (EINPROGRESS); 24082334850SJohn Baldwin } 2410f9d0a73SGleb Smirnoff 2420f9d0a73SGleb Smirnoff /* This one was blocking all the queue. */ 2430f9d0a73SGleb Smirnoff for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { 2440f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_BLOCKED, 2450f9d0a73SGleb Smirnoff ("%s: m %p !M_BLOCKED", __func__, m)); 2460f9d0a73SGleb Smirnoff m->m_flags &= ~M_BLOCKED; 2470f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 2480f9d0a73SGleb Smirnoff } 2490f9d0a73SGleb Smirnoff 2500f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 2513807631bSJohn Baldwin sbready_compress(sb, m0, m); 2520f9d0a73SGleb Smirnoff 2530f9d0a73SGleb Smirnoff return (0); 2540f9d0a73SGleb Smirnoff } 2550f9d0a73SGleb Smirnoff 2560f9d0a73SGleb Smirnoff /* 2578967b220SGleb Smirnoff * Adjust sockbuf state reflecting allocation of m. 2588967b220SGleb Smirnoff */ 2598967b220SGleb Smirnoff void 2608967b220SGleb Smirnoff sballoc(struct sockbuf *sb, struct mbuf *m) 2618967b220SGleb Smirnoff { 2628967b220SGleb Smirnoff 2638967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2648967b220SGleb Smirnoff 2650f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 2660f9d0a73SGleb Smirnoff 2670f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) { 2680f9d0a73SGleb Smirnoff if (m->m_flags & M_NOTREADY) 2690f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 2700f9d0a73SGleb Smirnoff else 2710f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 2720f9d0a73SGleb Smirnoff } else 2730f9d0a73SGleb Smirnoff m->m_flags |= M_BLOCKED; 2748967b220SGleb Smirnoff 2758967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 2768967b220SGleb Smirnoff sb->sb_ctl += m->m_len; 2778967b220SGleb Smirnoff 2788967b220SGleb Smirnoff sb->sb_mbcnt += MSIZE; 2798967b220SGleb Smirnoff 280*d59bc188SGleb Smirnoff if (m->m_flags & M_EXT) 2818967b220SGleb Smirnoff sb->sb_mbcnt += m->m_ext.ext_size; 2828967b220SGleb Smirnoff } 2838967b220SGleb Smirnoff 2848967b220SGleb Smirnoff /* 2858967b220SGleb Smirnoff * Adjust sockbuf state reflecting freeing of m. 2868967b220SGleb Smirnoff */ 2878967b220SGleb Smirnoff void 2888967b220SGleb Smirnoff sbfree(struct sockbuf *sb, struct mbuf *m) 2898967b220SGleb Smirnoff { 2908967b220SGleb Smirnoff 2918967b220SGleb Smirnoff #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ 2928967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2938967b220SGleb Smirnoff #endif 2948967b220SGleb Smirnoff 2950f9d0a73SGleb Smirnoff sb->sb_ccc -= m->m_len; 2960f9d0a73SGleb Smirnoff 2970f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) 2980f9d0a73SGleb Smirnoff sb->sb_acc -= m->m_len; 2990f9d0a73SGleb Smirnoff 3000f9d0a73SGleb Smirnoff if (m == sb->sb_fnrdy) { 3010f9d0a73SGleb Smirnoff struct mbuf *n; 3020f9d0a73SGleb Smirnoff 3030f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 3040f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 3050f9d0a73SGleb Smirnoff 3060f9d0a73SGleb Smirnoff n = m->m_next; 3070f9d0a73SGleb Smirnoff while (n != NULL && !(n->m_flags & M_NOTREADY)) { 3080f9d0a73SGleb Smirnoff n->m_flags &= ~M_BLOCKED; 3090f9d0a73SGleb Smirnoff sb->sb_acc += n->m_len; 3100f9d0a73SGleb Smirnoff n = n->m_next; 3110f9d0a73SGleb Smirnoff } 3120f9d0a73SGleb Smirnoff sb->sb_fnrdy = n; 3130f9d0a73SGleb Smirnoff } 3148967b220SGleb Smirnoff 3158967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 3168967b220SGleb Smirnoff sb->sb_ctl -= m->m_len; 3178967b220SGleb Smirnoff 3188967b220SGleb Smirnoff sb->sb_mbcnt -= MSIZE; 319*d59bc188SGleb Smirnoff if (m->m_flags & M_EXT) 3208967b220SGleb Smirnoff sb->sb_mbcnt -= m->m_ext.ext_size; 3218967b220SGleb Smirnoff 3228967b220SGleb Smirnoff if (sb->sb_sndptr == m) { 3238967b220SGleb Smirnoff sb->sb_sndptr = NULL; 3248967b220SGleb Smirnoff sb->sb_sndptroff = 0; 3258967b220SGleb Smirnoff } 3268967b220SGleb Smirnoff if (sb->sb_sndptroff != 0) 3278967b220SGleb Smirnoff sb->sb_sndptroff -= m->m_len; 3288967b220SGleb Smirnoff } 3298967b220SGleb Smirnoff 3303c0e5685SJohn Baldwin #ifdef KERN_TLS 3313c0e5685SJohn Baldwin /* 3323c0e5685SJohn Baldwin * Similar to sballoc/sbfree but does not adjust state associated with 3333c0e5685SJohn Baldwin * the sb_mb chain such as sb_fnrdy or sb_sndptr*. Also assumes mbufs 3343c0e5685SJohn Baldwin * are not ready. 3353c0e5685SJohn Baldwin */ 3363c0e5685SJohn Baldwin void 3373c0e5685SJohn Baldwin sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m) 3383c0e5685SJohn Baldwin { 3393c0e5685SJohn Baldwin 3403c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 3413c0e5685SJohn Baldwin 3423c0e5685SJohn Baldwin sb->sb_ccc += m->m_len; 3433c0e5685SJohn Baldwin sb->sb_tlscc += m->m_len; 3443c0e5685SJohn Baldwin 3453c0e5685SJohn Baldwin sb->sb_mbcnt += MSIZE; 3463c0e5685SJohn Baldwin 347*d59bc188SGleb Smirnoff if (m->m_flags & M_EXT) 3483c0e5685SJohn Baldwin sb->sb_mbcnt += m->m_ext.ext_size; 3493c0e5685SJohn Baldwin } 3503c0e5685SJohn Baldwin 3513c0e5685SJohn Baldwin void 3523c0e5685SJohn Baldwin sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m) 3533c0e5685SJohn Baldwin { 3543c0e5685SJohn Baldwin 3553c0e5685SJohn Baldwin #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ 3563c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 3573c0e5685SJohn Baldwin #endif 3583c0e5685SJohn Baldwin 3593c0e5685SJohn Baldwin sb->sb_ccc -= m->m_len; 3603c0e5685SJohn Baldwin sb->sb_tlscc -= m->m_len; 3613c0e5685SJohn Baldwin 3623c0e5685SJohn Baldwin sb->sb_mbcnt -= MSIZE; 3633c0e5685SJohn Baldwin 364*d59bc188SGleb Smirnoff if (m->m_flags & M_EXT) 3653c0e5685SJohn Baldwin sb->sb_mbcnt -= m->m_ext.ext_size; 3663c0e5685SJohn Baldwin } 3673c0e5685SJohn Baldwin #endif 3683c0e5685SJohn Baldwin 3698967b220SGleb Smirnoff /* 370050ac265SRobert Watson * Socantsendmore indicates that no more data will be sent on the socket; it 371050ac265SRobert Watson * would normally be applied to a socket when the user informs the system 372050ac265SRobert Watson * that no more data is to be sent, by the protocol code (in case 373050ac265SRobert Watson * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be 374050ac265SRobert Watson * received, and will normally be applied to the socket by a protocol when it 375050ac265SRobert Watson * detects that the peer will send no more data. Data queued for reading in 376050ac265SRobert Watson * the socket may yet be read. 377df8bae1dSRodney W. Grimes */ 378a34b7046SRobert Watson void 379050ac265SRobert Watson socantsendmore_locked(struct socket *so) 380a34b7046SRobert Watson { 381a34b7046SRobert Watson 38243283184SGleb Smirnoff SOCK_SENDBUF_LOCK_ASSERT(so); 383a34b7046SRobert Watson 384a34b7046SRobert Watson so->so_snd.sb_state |= SBS_CANTSENDMORE; 385a34b7046SRobert Watson sowwakeup_locked(so); 38643283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK_ASSERT(so); 387a34b7046SRobert Watson } 388df8bae1dSRodney W. Grimes 38926f9a767SRodney W. Grimes void 390050ac265SRobert Watson socantsendmore(struct socket *so) 391df8bae1dSRodney W. Grimes { 392df8bae1dSRodney W. Grimes 39343283184SGleb Smirnoff SOCK_SENDBUF_LOCK(so); 394a34b7046SRobert Watson socantsendmore_locked(so); 39543283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK_ASSERT(so); 396a34b7046SRobert Watson } 397a34b7046SRobert Watson 398a34b7046SRobert Watson void 399050ac265SRobert Watson socantrcvmore_locked(struct socket *so) 400a34b7046SRobert Watson { 401a34b7046SRobert Watson 40243283184SGleb Smirnoff SOCK_RECVBUF_LOCK_ASSERT(so); 403a34b7046SRobert Watson 404a34b7046SRobert Watson so->so_rcv.sb_state |= SBS_CANTRCVMORE; 4053c0e5685SJohn Baldwin #ifdef KERN_TLS 4063c0e5685SJohn Baldwin if (so->so_rcv.sb_flags & SB_TLS_RX) 4073c0e5685SJohn Baldwin ktls_check_rx(&so->so_rcv); 4083c0e5685SJohn Baldwin #endif 409a34b7046SRobert Watson sorwakeup_locked(so); 41043283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so); 411df8bae1dSRodney W. Grimes } 412df8bae1dSRodney W. Grimes 41326f9a767SRodney W. Grimes void 414050ac265SRobert Watson socantrcvmore(struct socket *so) 415df8bae1dSRodney W. Grimes { 416df8bae1dSRodney W. Grimes 41743283184SGleb Smirnoff SOCK_RECVBUF_LOCK(so); 418a34b7046SRobert Watson socantrcvmore_locked(so); 41943283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so); 420df8bae1dSRodney W. Grimes } 421df8bae1dSRodney W. Grimes 4227045b160SRoy Marples void 4237045b160SRoy Marples soroverflow_locked(struct socket *so) 4247045b160SRoy Marples { 4257045b160SRoy Marples 42643283184SGleb Smirnoff SOCK_RECVBUF_LOCK_ASSERT(so); 4277045b160SRoy Marples 4287045b160SRoy Marples if (so->so_options & SO_RERROR) { 4297045b160SRoy Marples so->so_rerror = ENOBUFS; 4307045b160SRoy Marples sorwakeup_locked(so); 4317045b160SRoy Marples } else 43243283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so); 4337045b160SRoy Marples 43443283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so); 4357045b160SRoy Marples } 4367045b160SRoy Marples 4377045b160SRoy Marples void 4387045b160SRoy Marples soroverflow(struct socket *so) 4397045b160SRoy Marples { 4407045b160SRoy Marples 44143283184SGleb Smirnoff SOCK_RECVBUF_LOCK(so); 4427045b160SRoy Marples soroverflow_locked(so); 44343283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK_ASSERT(so); 4447045b160SRoy Marples } 4457045b160SRoy Marples 446df8bae1dSRodney W. Grimes /* 447df8bae1dSRodney W. Grimes * Wait for data to arrive at/drain from a socket buffer. 448df8bae1dSRodney W. Grimes */ 44926f9a767SRodney W. Grimes int 45043283184SGleb Smirnoff sbwait(struct socket *so, sb_which which) 451df8bae1dSRodney W. Grimes { 45243283184SGleb Smirnoff struct sockbuf *sb; 453df8bae1dSRodney W. Grimes 45443283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which); 45531f555a1SRobert Watson 45643283184SGleb Smirnoff sb = sobuf(so, which); 457df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WAIT; 45843283184SGleb Smirnoff return (msleep_sbt(&sb->sb_acc, soeventmtx(so, which), 45947daf5d5SBruce Evans (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 4607729cbf1SDavide Italiano sb->sb_timeo, 0, 0)); 461df8bae1dSRodney W. Grimes } 462df8bae1dSRodney W. Grimes 463df8bae1dSRodney W. Grimes /* 464050ac265SRobert Watson * Wakeup processes waiting on a socket buffer. Do asynchronous notification 465050ac265SRobert Watson * via SIGIO if the socket has the SS_ASYNC flag set. 466a34b7046SRobert Watson * 467a34b7046SRobert Watson * Called with the socket buffer lock held; will release the lock by the end 468a34b7046SRobert Watson * of the function. This allows the caller to acquire the socket buffer lock 469a34b7046SRobert Watson * while testing for the need for various sorts of wakeup and hold it through 470a34b7046SRobert Watson * to the point where it's no longer required. We currently hold the lock 471a34b7046SRobert Watson * through calls out to other subsystems (with the exception of kqueue), and 472a34b7046SRobert Watson * then release it to avoid lock order issues. It's not clear that's 473a34b7046SRobert Watson * correct. 474df8bae1dSRodney W. Grimes */ 47543283184SGleb Smirnoff static __always_inline void 47643283184SGleb Smirnoff sowakeup(struct socket *so, const sb_which which) 477df8bae1dSRodney W. Grimes { 47843283184SGleb Smirnoff struct sockbuf *sb; 47974fb0ba7SJohn Baldwin int ret; 480d48d4b25SSeigo Tanimura 48143283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which); 482a34b7046SRobert Watson 48343283184SGleb Smirnoff sb = sobuf(so, which); 484779f106aSGleb Smirnoff selwakeuppri(sb->sb_sel, PSOCK); 485779f106aSGleb Smirnoff if (!SEL_WAITING(sb->sb_sel)) 486df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_SEL; 487df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_WAIT) { 488df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_WAIT; 4890f9d0a73SGleb Smirnoff wakeup(&sb->sb_acc); 490df8bae1dSRodney W. Grimes } 491779f106aSGleb Smirnoff KNOTE_LOCKED(&sb->sb_sel->si_note, 0); 49298c92369SNavdeep Parhar if (sb->sb_upcall != NULL) { 493eb1b1807SGleb Smirnoff ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); 49474fb0ba7SJohn Baldwin if (ret == SU_ISCONNECTED) { 49574fb0ba7SJohn Baldwin KASSERT(sb == &so->so_rcv, 49674fb0ba7SJohn Baldwin ("SO_SND upcall returned SU_ISCONNECTED")); 49774fb0ba7SJohn Baldwin soupcall_clear(so, SO_RCV); 49874fb0ba7SJohn Baldwin } 49974fb0ba7SJohn Baldwin } else 50074fb0ba7SJohn Baldwin ret = SU_OK; 5014cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_AIO) 50243283184SGleb Smirnoff sowakeup_aio(so, which); 50343283184SGleb Smirnoff SOCK_BUF_UNLOCK(so, which); 504555b3e2fSGleb Smirnoff if (ret == SU_ISCONNECTED) 50574fb0ba7SJohn Baldwin soisconnected(so); 50674fb0ba7SJohn Baldwin if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 50774fb0ba7SJohn Baldwin pgsigio(&so->so_sigio, SIGIO, 0); 50843283184SGleb Smirnoff SOCK_BUF_UNLOCK_ASSERT(so, which); 50943283184SGleb Smirnoff } 51043283184SGleb Smirnoff 51143283184SGleb Smirnoff /* 51243283184SGleb Smirnoff * Do we need to notify the other side when I/O is possible? 51343283184SGleb Smirnoff */ 51443283184SGleb Smirnoff static __always_inline bool 51543283184SGleb Smirnoff sb_notify(const struct sockbuf *sb) 51643283184SGleb Smirnoff { 51743283184SGleb Smirnoff return ((sb->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | 51843283184SGleb Smirnoff SB_UPCALL | SB_AIO | SB_KNOTE)) != 0); 51943283184SGleb Smirnoff } 52043283184SGleb Smirnoff 52143283184SGleb Smirnoff void 52243283184SGleb Smirnoff sorwakeup_locked(struct socket *so) 52343283184SGleb Smirnoff { 52443283184SGleb Smirnoff SOCK_RECVBUF_LOCK_ASSERT(so); 52543283184SGleb Smirnoff if (sb_notify(&so->so_rcv)) 52643283184SGleb Smirnoff sowakeup(so, SO_RCV); 52743283184SGleb Smirnoff else 52843283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so); 52943283184SGleb Smirnoff } 53043283184SGleb Smirnoff 53143283184SGleb Smirnoff void 53243283184SGleb Smirnoff sowwakeup_locked(struct socket *so) 53343283184SGleb Smirnoff { 53443283184SGleb Smirnoff SOCK_SENDBUF_LOCK_ASSERT(so); 53543283184SGleb Smirnoff if (sb_notify(&so->so_snd)) 53643283184SGleb Smirnoff sowakeup(so, SO_SND); 53743283184SGleb Smirnoff else 53843283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK(so); 539df8bae1dSRodney W. Grimes } 540df8bae1dSRodney W. Grimes 541df8bae1dSRodney W. Grimes /* 542df8bae1dSRodney W. Grimes * Socket buffer (struct sockbuf) utility routines. 543df8bae1dSRodney W. Grimes * 544050ac265SRobert Watson * Each socket contains two socket buffers: one for sending data and one for 545050ac265SRobert Watson * receiving data. Each buffer contains a queue of mbufs, information about 546050ac265SRobert Watson * the number of mbufs and amount of data in the queue, and other fields 547050ac265SRobert Watson * allowing select() statements and notification on data availability to be 548050ac265SRobert Watson * implemented. 549df8bae1dSRodney W. Grimes * 550050ac265SRobert Watson * Data stored in a socket buffer is maintained as a list of records. Each 551050ac265SRobert Watson * record is a list of mbufs chained together with the m_next field. Records 552050ac265SRobert Watson * are chained together with the m_nextpkt field. The upper level routine 553050ac265SRobert Watson * soreceive() expects the following conventions to be observed when placing 554050ac265SRobert Watson * information in the receive buffer: 555df8bae1dSRodney W. Grimes * 556050ac265SRobert Watson * 1. If the protocol requires each message be preceded by the sender's name, 557050ac265SRobert Watson * then a record containing that name must be present before any 558050ac265SRobert Watson * associated data (mbuf's must be of type MT_SONAME). 559050ac265SRobert Watson * 2. If the protocol supports the exchange of ``access rights'' (really just 560050ac265SRobert Watson * additional data associated with the message), and there are ``rights'' 561050ac265SRobert Watson * to be received, then a record containing this data should be present 562050ac265SRobert Watson * (mbuf's must be of type MT_RIGHTS). 563050ac265SRobert Watson * 3. If a name or rights record exists, then it must be followed by a data 564050ac265SRobert Watson * record, perhaps of zero length. 565df8bae1dSRodney W. Grimes * 566df8bae1dSRodney W. Grimes * Before using a new socket structure it is first necessary to reserve 567df8bae1dSRodney W. Grimes * buffer space to the socket, by calling sbreserve(). This should commit 568df8bae1dSRodney W. Grimes * some of the available buffer space in the system buffer pool for the 569050ac265SRobert Watson * socket (currently, it does nothing but enforce limits). The space should 570050ac265SRobert Watson * be released by calling sbrelease() when the socket is destroyed. 571df8bae1dSRodney W. Grimes */ 57226f9a767SRodney W. Grimes int 573050ac265SRobert Watson soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 574df8bae1dSRodney W. Grimes { 575b40ce416SJulian Elischer struct thread *td = curthread; 576df8bae1dSRodney W. Grimes 57743283184SGleb Smirnoff SOCK_SENDBUF_LOCK(so); 57843283184SGleb Smirnoff SOCK_RECVBUF_LOCK(so); 57943283184SGleb Smirnoff if (sbreserve_locked(so, SO_SND, sndcc, td) == 0) 5803f11a2f3SRobert Watson goto bad; 58143283184SGleb Smirnoff if (sbreserve_locked(so, SO_RCV, rcvcc, td) == 0) 5823f11a2f3SRobert Watson goto bad2; 583df8bae1dSRodney W. Grimes if (so->so_rcv.sb_lowat == 0) 584df8bae1dSRodney W. Grimes so->so_rcv.sb_lowat = 1; 585df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat == 0) 586df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = MCLBYTES; 587df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 588df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 58943283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so); 59043283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK(so); 591df8bae1dSRodney W. Grimes return (0); 592df8bae1dSRodney W. Grimes bad2: 59343283184SGleb Smirnoff sbrelease_locked(so, SO_SND); 594df8bae1dSRodney W. Grimes bad: 59543283184SGleb Smirnoff SOCK_RECVBUF_UNLOCK(so); 59643283184SGleb Smirnoff SOCK_SENDBUF_UNLOCK(so); 597df8bae1dSRodney W. Grimes return (ENOBUFS); 598df8bae1dSRodney W. Grimes } 599df8bae1dSRodney W. Grimes 60079cb7eb4SDavid Greenman static int 60179cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 60279cb7eb4SDavid Greenman { 60379cb7eb4SDavid Greenman int error = 0; 60486a93d51SJohn Baldwin u_long tmp_sb_max = sb_max; 60579cb7eb4SDavid Greenman 60686a93d51SJohn Baldwin error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); 60779cb7eb4SDavid Greenman if (error || !req->newptr) 60879cb7eb4SDavid Greenman return (error); 60986a93d51SJohn Baldwin if (tmp_sb_max < MSIZE + MCLBYTES) 61079cb7eb4SDavid Greenman return (EINVAL); 61186a93d51SJohn Baldwin sb_max = tmp_sb_max; 61279cb7eb4SDavid Greenman sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 61379cb7eb4SDavid Greenman return (0); 61479cb7eb4SDavid Greenman } 61579cb7eb4SDavid Greenman 616df8bae1dSRodney W. Grimes /* 617050ac265SRobert Watson * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't 618050ac265SRobert Watson * become limiting if buffering efficiency is near the normal case. 619df8bae1dSRodney W. Grimes */ 62043283184SGleb Smirnoff bool 62143283184SGleb Smirnoff sbreserve_locked(struct socket *so, sb_which which, u_long cc, 622050ac265SRobert Watson struct thread *td) 623df8bae1dSRodney W. Grimes { 62443283184SGleb Smirnoff struct sockbuf *sb = sobuf(so, which); 62591d5354aSJohn Baldwin rlim_t sbsize_limit; 626ecf72308SBrian Feldman 62743283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which); 6283f11a2f3SRobert Watson 629ecf72308SBrian Feldman /* 6307978014dSRobert Watson * When a thread is passed, we take into account the thread's socket 6317978014dSRobert Watson * buffer size limit. The caller will generally pass curthread, but 6327978014dSRobert Watson * in the TCP input path, NULL will be passed to indicate that no 6337978014dSRobert Watson * appropriate thread resource limits are available. In that case, 6347978014dSRobert Watson * we don't apply a process limit. 635ecf72308SBrian Feldman */ 63679cb7eb4SDavid Greenman if (cc > sb_max_adj) 63743283184SGleb Smirnoff return (false); 63891d5354aSJohn Baldwin if (td != NULL) { 639f6f6d240SMateusz Guzik sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); 64091d5354aSJohn Baldwin } else 64191d5354aSJohn Baldwin sbsize_limit = RLIM_INFINITY; 642f535380cSDon Lewis if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 64391d5354aSJohn Baldwin sbsize_limit)) 64443283184SGleb Smirnoff return (false); 6454b29bc4fSGarrett Wollman sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 646df8bae1dSRodney W. Grimes if (sb->sb_lowat > sb->sb_hiwat) 647df8bae1dSRodney W. Grimes sb->sb_lowat = sb->sb_hiwat; 64843283184SGleb Smirnoff return (true); 649df8bae1dSRodney W. Grimes } 650df8bae1dSRodney W. Grimes 6513f11a2f3SRobert Watson int 65264290befSGleb Smirnoff sbsetopt(struct socket *so, int cmd, u_long cc) 6533f11a2f3SRobert Watson { 65464290befSGleb Smirnoff struct sockbuf *sb; 65543283184SGleb Smirnoff sb_which wh; 65664290befSGleb Smirnoff short *flags; 65764290befSGleb Smirnoff u_int *hiwat, *lowat; 6583f11a2f3SRobert Watson int error; 6593f11a2f3SRobert Watson 660b2037136SMatt Macy sb = NULL; 66164290befSGleb Smirnoff SOCK_LOCK(so); 66264290befSGleb Smirnoff if (SOLISTENING(so)) { 66364290befSGleb Smirnoff switch (cmd) { 66464290befSGleb Smirnoff case SO_SNDLOWAT: 66564290befSGleb Smirnoff case SO_SNDBUF: 66664290befSGleb Smirnoff lowat = &so->sol_sbsnd_lowat; 66764290befSGleb Smirnoff hiwat = &so->sol_sbsnd_hiwat; 66864290befSGleb Smirnoff flags = &so->sol_sbsnd_flags; 66964290befSGleb Smirnoff break; 67064290befSGleb Smirnoff case SO_RCVLOWAT: 67164290befSGleb Smirnoff case SO_RCVBUF: 67264290befSGleb Smirnoff lowat = &so->sol_sbrcv_lowat; 67364290befSGleb Smirnoff hiwat = &so->sol_sbrcv_hiwat; 67464290befSGleb Smirnoff flags = &so->sol_sbrcv_flags; 67564290befSGleb Smirnoff break; 67664290befSGleb Smirnoff } 67764290befSGleb Smirnoff } else { 67864290befSGleb Smirnoff switch (cmd) { 67964290befSGleb Smirnoff case SO_SNDLOWAT: 68064290befSGleb Smirnoff case SO_SNDBUF: 68164290befSGleb Smirnoff sb = &so->so_snd; 68243283184SGleb Smirnoff wh = SO_SND; 68364290befSGleb Smirnoff break; 68464290befSGleb Smirnoff case SO_RCVLOWAT: 68564290befSGleb Smirnoff case SO_RCVBUF: 68664290befSGleb Smirnoff sb = &so->so_rcv; 68743283184SGleb Smirnoff wh = SO_RCV; 68864290befSGleb Smirnoff break; 68964290befSGleb Smirnoff } 69064290befSGleb Smirnoff flags = &sb->sb_flags; 69164290befSGleb Smirnoff hiwat = &sb->sb_hiwat; 69264290befSGleb Smirnoff lowat = &sb->sb_lowat; 69343283184SGleb Smirnoff SOCK_BUF_LOCK(so, wh); 69464290befSGleb Smirnoff } 69564290befSGleb Smirnoff 69664290befSGleb Smirnoff error = 0; 69764290befSGleb Smirnoff switch (cmd) { 69864290befSGleb Smirnoff case SO_SNDBUF: 69964290befSGleb Smirnoff case SO_RCVBUF: 70064290befSGleb Smirnoff if (SOLISTENING(so)) { 70164290befSGleb Smirnoff if (cc > sb_max_adj) { 70264290befSGleb Smirnoff error = ENOBUFS; 70364290befSGleb Smirnoff break; 70464290befSGleb Smirnoff } 70564290befSGleb Smirnoff *hiwat = cc; 70664290befSGleb Smirnoff if (*lowat > *hiwat) 70764290befSGleb Smirnoff *lowat = *hiwat; 70864290befSGleb Smirnoff } else { 70943283184SGleb Smirnoff if (!sbreserve_locked(so, wh, cc, curthread)) 71064290befSGleb Smirnoff error = ENOBUFS; 71164290befSGleb Smirnoff } 71264290befSGleb Smirnoff if (error == 0) 71364290befSGleb Smirnoff *flags &= ~SB_AUTOSIZE; 71464290befSGleb Smirnoff break; 71564290befSGleb Smirnoff case SO_SNDLOWAT: 71664290befSGleb Smirnoff case SO_RCVLOWAT: 71764290befSGleb Smirnoff /* 71864290befSGleb Smirnoff * Make sure the low-water is never greater than the 71964290befSGleb Smirnoff * high-water. 72064290befSGleb Smirnoff */ 72164290befSGleb Smirnoff *lowat = (cc > *hiwat) ? *hiwat : cc; 72264290befSGleb Smirnoff break; 72364290befSGleb Smirnoff } 72464290befSGleb Smirnoff 72564290befSGleb Smirnoff if (!SOLISTENING(so)) 72643283184SGleb Smirnoff SOCK_BUF_UNLOCK(so, wh); 72764290befSGleb Smirnoff SOCK_UNLOCK(so); 7283f11a2f3SRobert Watson return (error); 7293f11a2f3SRobert Watson } 7303f11a2f3SRobert Watson 731df8bae1dSRodney W. Grimes /* 732df8bae1dSRodney W. Grimes * Free mbufs held by a socket, and reserved mbuf space. 733df8bae1dSRodney W. Grimes */ 7347db54446SGleb Smirnoff static void 73543283184SGleb Smirnoff sbrelease_internal(struct socket *so, sb_which which) 736eaa6dfbcSRobert Watson { 73743283184SGleb Smirnoff struct sockbuf *sb = sobuf(so, which); 738eaa6dfbcSRobert Watson 739eaa6dfbcSRobert Watson sbflush_internal(sb); 740eaa6dfbcSRobert Watson (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 741eaa6dfbcSRobert Watson RLIM_INFINITY); 742eaa6dfbcSRobert Watson sb->sb_mbmax = 0; 743eaa6dfbcSRobert Watson } 744eaa6dfbcSRobert Watson 74526f9a767SRodney W. Grimes void 74643283184SGleb Smirnoff sbrelease_locked(struct socket *so, sb_which which) 747df8bae1dSRodney W. Grimes { 748df8bae1dSRodney W. Grimes 74943283184SGleb Smirnoff SOCK_BUF_LOCK_ASSERT(so, which); 750a34b7046SRobert Watson 75143283184SGleb Smirnoff sbrelease_internal(so, which); 752df8bae1dSRodney W. Grimes } 753df8bae1dSRodney W. Grimes 754a34b7046SRobert Watson void 75543283184SGleb Smirnoff sbrelease(struct socket *so, sb_which which) 756a34b7046SRobert Watson { 757a34b7046SRobert Watson 75843283184SGleb Smirnoff SOCK_BUF_LOCK(so, which); 75943283184SGleb Smirnoff sbrelease_locked(so, which); 76043283184SGleb Smirnoff SOCK_BUF_UNLOCK(so, which); 761a34b7046SRobert Watson } 762eaa6dfbcSRobert Watson 763eaa6dfbcSRobert Watson void 76443283184SGleb Smirnoff sbdestroy(struct socket *so, sb_which which) 765eaa6dfbcSRobert Watson { 766b2e60773SJohn Baldwin #ifdef KERN_TLS 76743283184SGleb Smirnoff struct sockbuf *sb = sobuf(so, which); 76843283184SGleb Smirnoff 769b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL) 770b2e60773SJohn Baldwin ktls_free(sb->sb_tls_info); 771b2e60773SJohn Baldwin sb->sb_tls_info = NULL; 772b2e60773SJohn Baldwin #endif 77343283184SGleb Smirnoff sbrelease_internal(so, which); 774eaa6dfbcSRobert Watson } 775eaa6dfbcSRobert Watson 776df8bae1dSRodney W. Grimes /* 777050ac265SRobert Watson * Routines to add and remove data from an mbuf queue. 778df8bae1dSRodney W. Grimes * 779050ac265SRobert Watson * The routines sbappend() or sbappendrecord() are normally called to append 780050ac265SRobert Watson * new mbufs to a socket buffer, after checking that adequate space is 781050ac265SRobert Watson * available, comparing the function sbspace() with the amount of data to be 782050ac265SRobert Watson * added. sbappendrecord() differs from sbappend() in that data supplied is 783050ac265SRobert Watson * treated as the beginning of a new record. To place a sender's address, 784050ac265SRobert Watson * optional access rights, and data in a socket receive buffer, 785050ac265SRobert Watson * sbappendaddr() should be used. To place access rights and data in a 786050ac265SRobert Watson * socket receive buffer, sbappendrights() should be used. In either case, 787050ac265SRobert Watson * the new data begins a new record. Note that unlike sbappend() and 788050ac265SRobert Watson * sbappendrecord(), these routines check for the caller that there will be 789050ac265SRobert Watson * enough space to store the data. Each fails if there is not enough space, 790050ac265SRobert Watson * or if it cannot find mbufs to store additional information in. 791df8bae1dSRodney W. Grimes * 792050ac265SRobert Watson * Reliable protocols may use the socket send buffer to hold data awaiting 793050ac265SRobert Watson * acknowledgement. Data is normally copied from a socket send buffer in a 794050ac265SRobert Watson * protocol with m_copy for output to a peer, and then removing the data from 795050ac265SRobert Watson * the socket buffer with sbdrop() or sbdroprecord() when the data is 796050ac265SRobert Watson * acknowledged by the peer. 797df8bae1dSRodney W. Grimes */ 798395bb186SSam Leffler #ifdef SOCKBUF_DEBUG 799395bb186SSam Leffler void 800395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line) 801395bb186SSam Leffler { 802395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 803395bb186SSam Leffler 804a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 805a34b7046SRobert Watson 806395bb186SSam Leffler while (m && m->m_nextpkt) 807395bb186SSam Leffler m = m->m_nextpkt; 808395bb186SSam Leffler 809395bb186SSam Leffler if (m != sb->sb_lastrecord) { 810395bb186SSam Leffler printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 811395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_lastrecord, m); 812395bb186SSam Leffler printf("packet chain:\n"); 813395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 814395bb186SSam Leffler printf("\t%p\n", m); 815395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 816395bb186SSam Leffler } 817395bb186SSam Leffler } 818395bb186SSam Leffler 819395bb186SSam Leffler void 820395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line) 821395bb186SSam Leffler { 822395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 823395bb186SSam Leffler struct mbuf *n; 824395bb186SSam Leffler 825a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 826a34b7046SRobert Watson 827395bb186SSam Leffler while (m && m->m_nextpkt) 828395bb186SSam Leffler m = m->m_nextpkt; 829395bb186SSam Leffler 830395bb186SSam Leffler while (m && m->m_next) 831395bb186SSam Leffler m = m->m_next; 832395bb186SSam Leffler 833395bb186SSam Leffler if (m != sb->sb_mbtail) { 834395bb186SSam Leffler printf("%s: sb_mb %p sb_mbtail %p last %p\n", 835395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_mbtail, m); 836395bb186SSam Leffler printf("packet tree:\n"); 837395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 838395bb186SSam Leffler printf("\t"); 839395bb186SSam Leffler for (n = m; n != NULL; n = n->m_next) 840395bb186SSam Leffler printf("%p ", n); 841395bb186SSam Leffler printf("\n"); 842395bb186SSam Leffler } 843395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 844395bb186SSam Leffler } 8453c0e5685SJohn Baldwin 8463c0e5685SJohn Baldwin #ifdef KERN_TLS 8473c0e5685SJohn Baldwin m = sb->sb_mtls; 8483c0e5685SJohn Baldwin while (m && m->m_next) 8493c0e5685SJohn Baldwin m = m->m_next; 8503c0e5685SJohn Baldwin 8513c0e5685SJohn Baldwin if (m != sb->sb_mtlstail) { 8523c0e5685SJohn Baldwin printf("%s: sb_mtls %p sb_mtlstail %p last %p\n", 8533c0e5685SJohn Baldwin __func__, sb->sb_mtls, sb->sb_mtlstail, m); 8543c0e5685SJohn Baldwin printf("TLS packet tree:\n"); 8553c0e5685SJohn Baldwin printf("\t"); 8563c0e5685SJohn Baldwin for (m = sb->sb_mtls; m != NULL; m = m->m_next) { 8573c0e5685SJohn Baldwin printf("%p ", m); 8583c0e5685SJohn Baldwin } 8593c0e5685SJohn Baldwin printf("\n"); 8603c0e5685SJohn Baldwin panic("%s from %s:%u", __func__, file, line); 8613c0e5685SJohn Baldwin } 8623c0e5685SJohn Baldwin #endif 863395bb186SSam Leffler } 864395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */ 865395bb186SSam Leffler 866395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do { \ 867a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); \ 868395bb186SSam Leffler if ((sb)->sb_lastrecord != NULL) \ 869395bb186SSam Leffler (sb)->sb_lastrecord->m_nextpkt = (m0); \ 870395bb186SSam Leffler else \ 871395bb186SSam Leffler (sb)->sb_mb = (m0); \ 872395bb186SSam Leffler (sb)->sb_lastrecord = (m0); \ 873395bb186SSam Leffler } while (/*CONSTCOND*/0) 874395bb186SSam Leffler 875df8bae1dSRodney W. Grimes /* 876050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 877050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 878050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 879df8bae1dSRodney W. Grimes */ 88026f9a767SRodney W. Grimes void 881829fae90SGleb Smirnoff sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) 882df8bae1dSRodney W. Grimes { 883050ac265SRobert Watson struct mbuf *n; 884df8bae1dSRodney W. Grimes 885a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 886a34b7046SRobert Watson 887b85f65afSPedro F. Giffuni if (m == NULL) 888df8bae1dSRodney W. Grimes return; 889829fae90SGleb Smirnoff sbm_clrprotoflags(m, flags); 890395bb186SSam Leffler SBLASTRECORDCHK(sb); 891797f2d22SPoul-Henning Kamp n = sb->sb_mb; 892797f2d22SPoul-Henning Kamp if (n) { 893df8bae1dSRodney W. Grimes while (n->m_nextpkt) 894df8bae1dSRodney W. Grimes n = n->m_nextpkt; 895df8bae1dSRodney W. Grimes do { 896df8bae1dSRodney W. Grimes if (n->m_flags & M_EOR) { 897a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 898df8bae1dSRodney W. Grimes return; 899df8bae1dSRodney W. Grimes } 900df8bae1dSRodney W. Grimes } while (n->m_next && (n = n->m_next)); 901395bb186SSam Leffler } else { 902395bb186SSam Leffler /* 903395bb186SSam Leffler * XXX Would like to simply use sb_mbtail here, but 904395bb186SSam Leffler * XXX I need to verify that I won't miss an EOR that 905395bb186SSam Leffler * XXX way. 906395bb186SSam Leffler */ 907395bb186SSam Leffler if ((n = sb->sb_lastrecord) != NULL) { 908395bb186SSam Leffler do { 909395bb186SSam Leffler if (n->m_flags & M_EOR) { 910a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 911395bb186SSam Leffler return; 912395bb186SSam Leffler } 913395bb186SSam Leffler } while (n->m_next && (n = n->m_next)); 914395bb186SSam Leffler } else { 915395bb186SSam Leffler /* 916395bb186SSam Leffler * If this is the first record in the socket buffer, 917395bb186SSam Leffler * it's also the last record. 918395bb186SSam Leffler */ 919395bb186SSam Leffler sb->sb_lastrecord = m; 920395bb186SSam Leffler } 921df8bae1dSRodney W. Grimes } 922df8bae1dSRodney W. Grimes sbcompress(sb, m, n); 923395bb186SSam Leffler SBLASTRECORDCHK(sb); 924395bb186SSam Leffler } 925395bb186SSam Leffler 926395bb186SSam Leffler /* 927050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 928050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 929050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 930a34b7046SRobert Watson */ 931a34b7046SRobert Watson void 932829fae90SGleb Smirnoff sbappend(struct sockbuf *sb, struct mbuf *m, int flags) 933a34b7046SRobert Watson { 934a34b7046SRobert Watson 935a34b7046SRobert Watson SOCKBUF_LOCK(sb); 936829fae90SGleb Smirnoff sbappend_locked(sb, m, flags); 937a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 938a34b7046SRobert Watson } 939a34b7046SRobert Watson 9403c0e5685SJohn Baldwin #ifdef KERN_TLS 9413c0e5685SJohn Baldwin /* 9423c0e5685SJohn Baldwin * Append an mbuf containing encrypted TLS data. The data 9433c0e5685SJohn Baldwin * is marked M_NOTREADY until it has been decrypted and 9443c0e5685SJohn Baldwin * stored as a TLS record. 9453c0e5685SJohn Baldwin */ 9463c0e5685SJohn Baldwin static void 9473c0e5685SJohn Baldwin sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m) 9483c0e5685SJohn Baldwin { 9493c0e5685SJohn Baldwin struct mbuf *n; 9503c0e5685SJohn Baldwin 9513c0e5685SJohn Baldwin SBLASTMBUFCHK(sb); 9523c0e5685SJohn Baldwin 9533c0e5685SJohn Baldwin /* Remove all packet headers and mbuf tags to get a pure data chain. */ 9543c0e5685SJohn Baldwin m_demote(m, 1, 0); 9553c0e5685SJohn Baldwin 9563c0e5685SJohn Baldwin for (n = m; n != NULL; n = n->m_next) 9573c0e5685SJohn Baldwin n->m_flags |= M_NOTREADY; 9583c0e5685SJohn Baldwin sbcompress_ktls_rx(sb, m, sb->sb_mtlstail); 9593c0e5685SJohn Baldwin ktls_check_rx(sb); 9603c0e5685SJohn Baldwin } 9613c0e5685SJohn Baldwin #endif 9623c0e5685SJohn Baldwin 963a34b7046SRobert Watson /* 964050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 965050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 966050ac265SRobert Watson * that is, a stream protocol (such as TCP). 967395bb186SSam Leffler */ 968395bb186SSam Leffler void 969651e4e6aSGleb Smirnoff sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) 970395bb186SSam Leffler { 971a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 972395bb186SSam Leffler 973395bb186SSam Leffler KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 9743c0e5685SJohn Baldwin 9753c0e5685SJohn Baldwin #ifdef KERN_TLS 9763c0e5685SJohn Baldwin /* 9773c0e5685SJohn Baldwin * Decrypted TLS records are appended as records via 9783c0e5685SJohn Baldwin * sbappendrecord(). TCP passes encrypted TLS records to this 9793c0e5685SJohn Baldwin * function which must be scheduled for decryption. 9803c0e5685SJohn Baldwin */ 9813c0e5685SJohn Baldwin if (sb->sb_flags & SB_TLS_RX) { 9823c0e5685SJohn Baldwin sbappend_ktls_rx(sb, m); 9833c0e5685SJohn Baldwin return; 9843c0e5685SJohn Baldwin } 9853c0e5685SJohn Baldwin #endif 9863c0e5685SJohn Baldwin 987395bb186SSam Leffler KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 988395bb186SSam Leffler 989395bb186SSam Leffler SBLASTMBUFCHK(sb); 990395bb186SSam Leffler 991b2e60773SJohn Baldwin #ifdef KERN_TLS 992b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL) 993b2e60773SJohn Baldwin ktls_seq(sb, m); 994b2e60773SJohn Baldwin #endif 995b2e60773SJohn Baldwin 996844cacd1SGleb Smirnoff /* Remove all packet headers and mbuf tags to get a pure data chain. */ 997651e4e6aSGleb Smirnoff m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); 998844cacd1SGleb Smirnoff 999395bb186SSam Leffler sbcompress(sb, m, sb->sb_mbtail); 1000395bb186SSam Leffler 1001395bb186SSam Leffler sb->sb_lastrecord = sb->sb_mb; 1002395bb186SSam Leffler SBLASTRECORDCHK(sb); 1003df8bae1dSRodney W. Grimes } 1004df8bae1dSRodney W. Grimes 1005a34b7046SRobert Watson /* 1006050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 1007050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 1008050ac265SRobert Watson * that is, a stream protocol (such as TCP). 1009a34b7046SRobert Watson */ 1010a34b7046SRobert Watson void 1011651e4e6aSGleb Smirnoff sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) 1012a34b7046SRobert Watson { 1013a34b7046SRobert Watson 1014a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1015651e4e6aSGleb Smirnoff sbappendstream_locked(sb, m, flags); 1016a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1017a34b7046SRobert Watson } 1018a34b7046SRobert Watson 1019df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG 102026f9a767SRodney W. Grimes void 102157f43a45SGleb Smirnoff sbcheck(struct sockbuf *sb, const char *file, int line) 1022df8bae1dSRodney W. Grimes { 10230f9d0a73SGleb Smirnoff struct mbuf *m, *n, *fnrdy; 10240f9d0a73SGleb Smirnoff u_long acc, ccc, mbcnt; 10253c0e5685SJohn Baldwin #ifdef KERN_TLS 10263c0e5685SJohn Baldwin u_long tlscc; 10273c0e5685SJohn Baldwin #endif 1028df8bae1dSRodney W. Grimes 1029a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1030a34b7046SRobert Watson 10310f9d0a73SGleb Smirnoff acc = ccc = mbcnt = 0; 10320f9d0a73SGleb Smirnoff fnrdy = NULL; 103357f43a45SGleb Smirnoff 10340931333fSBill Fenner for (m = sb->sb_mb; m; m = n) { 10350931333fSBill Fenner n = m->m_nextpkt; 10360931333fSBill Fenner for (; m; m = m->m_next) { 103757f43a45SGleb Smirnoff if (m->m_len == 0) { 103857f43a45SGleb Smirnoff printf("sb %p empty mbuf %p\n", sb, m); 103957f43a45SGleb Smirnoff goto fail; 104057f43a45SGleb Smirnoff } 10410f9d0a73SGleb Smirnoff if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { 10420f9d0a73SGleb Smirnoff if (m != sb->sb_fnrdy) { 10430f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p != m %p\n", 10440f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 10450f9d0a73SGleb Smirnoff goto fail; 10460f9d0a73SGleb Smirnoff } 10470f9d0a73SGleb Smirnoff fnrdy = m; 10480f9d0a73SGleb Smirnoff } 10490f9d0a73SGleb Smirnoff if (fnrdy) { 10500f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) { 10510f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p, m %p is avail\n", 10520f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 10530f9d0a73SGleb Smirnoff goto fail; 10540f9d0a73SGleb Smirnoff } 10550f9d0a73SGleb Smirnoff } else 10560f9d0a73SGleb Smirnoff acc += m->m_len; 10570f9d0a73SGleb Smirnoff ccc += m->m_len; 1058df8bae1dSRodney W. Grimes mbcnt += MSIZE; 1059313861b8SJulian Elischer if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 1060df8bae1dSRodney W. Grimes mbcnt += m->m_ext.ext_size; 10610931333fSBill Fenner } 1062df8bae1dSRodney W. Grimes } 10633c0e5685SJohn Baldwin #ifdef KERN_TLS 10643c0e5685SJohn Baldwin /* 10653c0e5685SJohn Baldwin * Account for mbufs "detached" by ktls_detach_record() while 10663c0e5685SJohn Baldwin * they are decrypted by ktls_decrypt(). tlsdcc gives a count 10673c0e5685SJohn Baldwin * of the detached bytes that are included in ccc. The mbufs 10683c0e5685SJohn Baldwin * and clusters are not included in the socket buffer 10693c0e5685SJohn Baldwin * accounting. 10703c0e5685SJohn Baldwin */ 10713c0e5685SJohn Baldwin ccc += sb->sb_tlsdcc; 10723c0e5685SJohn Baldwin 10733c0e5685SJohn Baldwin tlscc = 0; 10743c0e5685SJohn Baldwin for (m = sb->sb_mtls; m; m = m->m_next) { 10753c0e5685SJohn Baldwin if (m->m_nextpkt != NULL) { 10763c0e5685SJohn Baldwin printf("sb %p TLS mbuf %p with nextpkt\n", sb, m); 10773c0e5685SJohn Baldwin goto fail; 10783c0e5685SJohn Baldwin } 10793c0e5685SJohn Baldwin if ((m->m_flags & M_NOTREADY) == 0) { 10803c0e5685SJohn Baldwin printf("sb %p TLS mbuf %p ready\n", sb, m); 10813c0e5685SJohn Baldwin goto fail; 10823c0e5685SJohn Baldwin } 10833c0e5685SJohn Baldwin tlscc += m->m_len; 10843c0e5685SJohn Baldwin ccc += m->m_len; 10853c0e5685SJohn Baldwin mbcnt += MSIZE; 10863c0e5685SJohn Baldwin if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 10873c0e5685SJohn Baldwin mbcnt += m->m_ext.ext_size; 10883c0e5685SJohn Baldwin } 10893c0e5685SJohn Baldwin 10903c0e5685SJohn Baldwin if (sb->sb_tlscc != tlscc) { 10913c0e5685SJohn Baldwin printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, 10923c0e5685SJohn Baldwin sb->sb_tlsdcc); 10933c0e5685SJohn Baldwin goto fail; 10943c0e5685SJohn Baldwin } 10953c0e5685SJohn Baldwin #endif 10960f9d0a73SGleb Smirnoff if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { 10970f9d0a73SGleb Smirnoff printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", 10980f9d0a73SGleb Smirnoff acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); 10993c0e5685SJohn Baldwin #ifdef KERN_TLS 11003c0e5685SJohn Baldwin printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, 11013c0e5685SJohn Baldwin sb->sb_tlsdcc); 11023c0e5685SJohn Baldwin #endif 110357f43a45SGleb Smirnoff goto fail; 1104df8bae1dSRodney W. Grimes } 110557f43a45SGleb Smirnoff return; 110657f43a45SGleb Smirnoff fail: 110757f43a45SGleb Smirnoff panic("%s from %s:%u", __func__, file, line); 1108df8bae1dSRodney W. Grimes } 1109df8bae1dSRodney W. Grimes #endif 1110df8bae1dSRodney W. Grimes 1111df8bae1dSRodney W. Grimes /* 1112050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 1113df8bae1dSRodney W. Grimes */ 111426f9a767SRodney W. Grimes void 1115050ac265SRobert Watson sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) 1116df8bae1dSRodney W. Grimes { 1117050ac265SRobert Watson struct mbuf *m; 1118df8bae1dSRodney W. Grimes 1119a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1120a34b7046SRobert Watson 1121b85f65afSPedro F. Giffuni if (m0 == NULL) 1122df8bae1dSRodney W. Grimes return; 112353b680caSGleb Smirnoff m_clrprotoflags(m0); 1124df8bae1dSRodney W. Grimes /* 1125050ac265SRobert Watson * Put the first mbuf on the queue. Note this permits zero length 1126050ac265SRobert Watson * records. 1127df8bae1dSRodney W. Grimes */ 1128df8bae1dSRodney W. Grimes sballoc(sb, m0); 1129395bb186SSam Leffler SBLASTRECORDCHK(sb); 1130395bb186SSam Leffler SBLINKRECORD(sb, m0); 1131e72a94adSMaksim Yevmenkin sb->sb_mbtail = m0; 1132df8bae1dSRodney W. Grimes m = m0->m_next; 1133df8bae1dSRodney W. Grimes m0->m_next = 0; 1134df8bae1dSRodney W. Grimes if (m && (m0->m_flags & M_EOR)) { 1135df8bae1dSRodney W. Grimes m0->m_flags &= ~M_EOR; 1136df8bae1dSRodney W. Grimes m->m_flags |= M_EOR; 1137df8bae1dSRodney W. Grimes } 1138e72a94adSMaksim Yevmenkin /* always call sbcompress() so it can do SBLASTMBUFCHK() */ 1139df8bae1dSRodney W. Grimes sbcompress(sb, m, m0); 1140df8bae1dSRodney W. Grimes } 1141df8bae1dSRodney W. Grimes 1142df8bae1dSRodney W. Grimes /* 1143050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 1144a34b7046SRobert Watson */ 1145a34b7046SRobert Watson void 1146050ac265SRobert Watson sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 1147a34b7046SRobert Watson { 1148a34b7046SRobert Watson 1149a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1150a34b7046SRobert Watson sbappendrecord_locked(sb, m0); 1151a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1152a34b7046SRobert Watson } 1153a34b7046SRobert Watson 11548de34a88SAlan Somers /* Helper routine that appends data, control, and address to a sockbuf. */ 11558de34a88SAlan Somers static int 11568de34a88SAlan Somers sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, 11578de34a88SAlan Somers struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) 1158df8bae1dSRodney W. Grimes { 1159395bb186SSam Leffler struct mbuf *m, *n, *nlast; 1160c43cad1aSScott Long #if MSIZE <= 256 1161df8bae1dSRodney W. Grimes if (asa->sa_len > MLEN) 1162df8bae1dSRodney W. Grimes return (0); 1163c43cad1aSScott Long #endif 1164c8b59ea7SGleb Smirnoff m = m_get(M_NOWAIT, MT_SONAME); 1165c8b59ea7SGleb Smirnoff if (m == NULL) 1166df8bae1dSRodney W. Grimes return (0); 1167df8bae1dSRodney W. Grimes m->m_len = asa->sa_len; 116880208239SAlfred Perlstein bcopy(asa, mtod(m, caddr_t), asa->sa_len); 1169c33a2313SAndrey V. Elsukov if (m0) { 117017cbcf33SHans Petter Selasky M_ASSERT_NO_SND_TAG(m0); 117153b680caSGleb Smirnoff m_clrprotoflags(m0); 117257386f5dSAndrey V. Elsukov m_tag_delete_chain(m0, NULL); 1173c33a2313SAndrey V. Elsukov /* 1174c33a2313SAndrey V. Elsukov * Clear some persistent info from pkthdr. 1175c33a2313SAndrey V. Elsukov * We don't use m_demote(), because some netgraph consumers 1176c33a2313SAndrey V. Elsukov * expect M_PKTHDR presence. 1177c33a2313SAndrey V. Elsukov */ 1178c33a2313SAndrey V. Elsukov m0->m_pkthdr.rcvif = NULL; 1179c33a2313SAndrey V. Elsukov m0->m_pkthdr.flowid = 0; 1180c33a2313SAndrey V. Elsukov m0->m_pkthdr.csum_flags = 0; 1181c33a2313SAndrey V. Elsukov m0->m_pkthdr.fibnum = 0; 1182c33a2313SAndrey V. Elsukov m0->m_pkthdr.rsstype = 0; 1183c33a2313SAndrey V. Elsukov } 11848de34a88SAlan Somers if (ctrl_last) 11858de34a88SAlan Somers ctrl_last->m_next = m0; /* concatenate data to control */ 1186df8bae1dSRodney W. Grimes else 1187df8bae1dSRodney W. Grimes control = m0; 1188df8bae1dSRodney W. Grimes m->m_next = control; 1189395bb186SSam Leffler for (n = m; n->m_next != NULL; n = n->m_next) 1190df8bae1dSRodney W. Grimes sballoc(sb, n); 1191395bb186SSam Leffler sballoc(sb, n); 1192395bb186SSam Leffler nlast = n; 1193395bb186SSam Leffler SBLINKRECORD(sb, m); 1194395bb186SSam Leffler 1195395bb186SSam Leffler sb->sb_mbtail = nlast; 1196395bb186SSam Leffler SBLASTMBUFCHK(sb); 1197395bb186SSam Leffler 1198395bb186SSam Leffler SBLASTRECORDCHK(sb); 1199df8bae1dSRodney W. Grimes return (1); 1200df8bae1dSRodney W. Grimes } 1201df8bae1dSRodney W. Grimes 1202a34b7046SRobert Watson /* 1203050ac265SRobert Watson * Append address and data, and optionally, control (ancillary) data to the 1204050ac265SRobert Watson * receive queue of a socket. If present, m0 must include a packet header 1205050ac265SRobert Watson * with total length. Returns 0 if no space in sockbuf or insufficient 1206050ac265SRobert Watson * mbufs. 1207a34b7046SRobert Watson */ 120826f9a767SRodney W. Grimes int 12098de34a88SAlan Somers sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, 12108de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 12118de34a88SAlan Somers { 12128de34a88SAlan Somers struct mbuf *ctrl_last; 12138de34a88SAlan Somers int space = asa->sa_len; 12148de34a88SAlan Somers 12158de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 12168de34a88SAlan Somers 12178de34a88SAlan Somers if (m0 && (m0->m_flags & M_PKTHDR) == 0) 12188de34a88SAlan Somers panic("sbappendaddr_locked"); 12198de34a88SAlan Somers if (m0) 12208de34a88SAlan Somers space += m0->m_pkthdr.len; 12218de34a88SAlan Somers space += m_length(control, &ctrl_last); 12228de34a88SAlan Somers 12238de34a88SAlan Somers if (space > sbspace(sb)) 12248de34a88SAlan Somers return (0); 12258de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 12268de34a88SAlan Somers } 12278de34a88SAlan Somers 12288de34a88SAlan Somers /* 12298de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 12308de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 12318de34a88SAlan Somers * with total length. Returns 0 if insufficient mbufs. Does not validate space 12328de34a88SAlan Somers * on the receiving sockbuf. 12338de34a88SAlan Somers */ 12348de34a88SAlan Somers int 12358de34a88SAlan Somers sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, 12368de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 12378de34a88SAlan Somers { 12388de34a88SAlan Somers struct mbuf *ctrl_last; 12398de34a88SAlan Somers 12408de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 12418de34a88SAlan Somers 12428de34a88SAlan Somers ctrl_last = (control == NULL) ? NULL : m_last(control); 12438de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 12448de34a88SAlan Somers } 12458de34a88SAlan Somers 12468de34a88SAlan Somers /* 12478de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 12488de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 12498de34a88SAlan Somers * with total length. Returns 0 if no space in sockbuf or insufficient 12508de34a88SAlan Somers * mbufs. 12518de34a88SAlan Somers */ 12528de34a88SAlan Somers int 1253050ac265SRobert Watson sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, 1254050ac265SRobert Watson struct mbuf *m0, struct mbuf *control) 1255a34b7046SRobert Watson { 1256a34b7046SRobert Watson int retval; 1257a34b7046SRobert Watson 1258a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1259a34b7046SRobert Watson retval = sbappendaddr_locked(sb, asa, m0, control); 1260a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1261a34b7046SRobert Watson return (retval); 1262a34b7046SRobert Watson } 1263a34b7046SRobert Watson 12645b0480f2SMark Johnston void 1265050ac265SRobert Watson sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, 126625f4ddfbSMark Johnston struct mbuf *control, int flags) 1267df8bae1dSRodney W. Grimes { 12685b0480f2SMark Johnston struct mbuf *m, *mlast; 1269df8bae1dSRodney W. Grimes 127025f4ddfbSMark Johnston sbm_clrprotoflags(m0, flags); 12715b0480f2SMark Johnston m_last(control)->m_next = m0; 1272395bb186SSam Leffler 1273395bb186SSam Leffler SBLASTRECORDCHK(sb); 1274395bb186SSam Leffler 1275395bb186SSam Leffler for (m = control; m->m_next; m = m->m_next) 1276df8bae1dSRodney W. Grimes sballoc(sb, m); 1277395bb186SSam Leffler sballoc(sb, m); 1278395bb186SSam Leffler mlast = m; 1279395bb186SSam Leffler SBLINKRECORD(sb, control); 1280395bb186SSam Leffler 1281395bb186SSam Leffler sb->sb_mbtail = mlast; 1282395bb186SSam Leffler SBLASTMBUFCHK(sb); 1283395bb186SSam Leffler 1284395bb186SSam Leffler SBLASTRECORDCHK(sb); 1285df8bae1dSRodney W. Grimes } 1286df8bae1dSRodney W. Grimes 12875b0480f2SMark Johnston void 128825f4ddfbSMark Johnston sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, 128925f4ddfbSMark Johnston int flags) 1290a34b7046SRobert Watson { 1291a34b7046SRobert Watson 1292a34b7046SRobert Watson SOCKBUF_LOCK(sb); 129325f4ddfbSMark Johnston sbappendcontrol_locked(sb, m0, control, flags); 1294a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1295a34b7046SRobert Watson } 1296a34b7046SRobert Watson 1297df8bae1dSRodney W. Grimes /* 12987da7362bSRobert Watson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 12997da7362bSRobert Watson * (n). If (n) is NULL, the buffer is presumed empty. 13007da7362bSRobert Watson * 13017da7362bSRobert Watson * When the data is compressed, mbufs in the chain may be handled in one of 13027da7362bSRobert Watson * three ways: 13037da7362bSRobert Watson * 13047da7362bSRobert Watson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 13057da7362bSRobert Watson * record boundary, and no change in data type). 13067da7362bSRobert Watson * 13077da7362bSRobert Watson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 13087da7362bSRobert Watson * an mbuf already in the socket buffer. This can occur if an 13090f9d0a73SGleb Smirnoff * appropriate mbuf exists, there is room, both mbufs are not marked as 13100f9d0a73SGleb Smirnoff * not ready, and no merging of data types will occur. 13117da7362bSRobert Watson * 13127da7362bSRobert Watson * (3) The mbuf may be appended to the end of the existing mbuf chain. 13137da7362bSRobert Watson * 13147da7362bSRobert Watson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 13157da7362bSRobert Watson * end-of-record. 1316df8bae1dSRodney W. Grimes */ 131726f9a767SRodney W. Grimes void 1318050ac265SRobert Watson sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1319df8bae1dSRodney W. Grimes { 1320050ac265SRobert Watson int eor = 0; 1321050ac265SRobert Watson struct mbuf *o; 1322df8bae1dSRodney W. Grimes 1323a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1324a34b7046SRobert Watson 1325df8bae1dSRodney W. Grimes while (m) { 1326df8bae1dSRodney W. Grimes eor |= m->m_flags & M_EOR; 1327df8bae1dSRodney W. Grimes if (m->m_len == 0 && 1328df8bae1dSRodney W. Grimes (eor == 0 || 1329df8bae1dSRodney W. Grimes (((o = m->m_next) || (o = n)) && 1330df8bae1dSRodney W. Grimes o->m_type == m->m_type))) { 1331395bb186SSam Leffler if (sb->sb_lastrecord == m) 1332395bb186SSam Leffler sb->sb_lastrecord = m->m_next; 1333df8bae1dSRodney W. Grimes m = m_free(m); 1334df8bae1dSRodney W. Grimes continue; 1335df8bae1dSRodney W. Grimes } 133632af0d74SDavid Malone if (n && (n->m_flags & M_EOR) == 0 && 133732af0d74SDavid Malone M_WRITABLE(n) && 13385e0f5cfaSKip Macy ((sb->sb_flags & SB_NOCOALESCE) == 0) && 13390f9d0a73SGleb Smirnoff !(m->m_flags & M_NOTREADY) && 13406edfd179SGleb Smirnoff !(n->m_flags & (M_NOTREADY | M_EXTPG)) && 1341b2e60773SJohn Baldwin !mbuf_has_tls_session(m) && 1342b2e60773SJohn Baldwin !mbuf_has_tls_session(n) && 134332af0d74SDavid Malone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 134432af0d74SDavid Malone m->m_len <= M_TRAILINGSPACE(n) && 1345df8bae1dSRodney W. Grimes n->m_type == m->m_type) { 134682334850SJohn Baldwin m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); 1347df8bae1dSRodney W. Grimes n->m_len += m->m_len; 13480f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 13490f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) 13500f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 135134333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1352b3f1af6bSTim J. Robbins /* XXX: Probably don't need.*/ 135304ac9b97SKelly Yancey sb->sb_ctl += m->m_len; 1354df8bae1dSRodney W. Grimes m = m_free(m); 1355df8bae1dSRodney W. Grimes continue; 1356df8bae1dSRodney W. Grimes } 13576edfd179SGleb Smirnoff if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) && 1358b2e60773SJohn Baldwin (m->m_flags & M_NOTREADY) == 0 && 1359b2e60773SJohn Baldwin !mbuf_has_tls_session(m)) 136082334850SJohn Baldwin (void)mb_unmapped_compress(m); 1361df8bae1dSRodney W. Grimes if (n) 1362df8bae1dSRodney W. Grimes n->m_next = m; 1363df8bae1dSRodney W. Grimes else 1364df8bae1dSRodney W. Grimes sb->sb_mb = m; 1365395bb186SSam Leffler sb->sb_mbtail = m; 1366df8bae1dSRodney W. Grimes sballoc(sb, m); 1367df8bae1dSRodney W. Grimes n = m; 1368df8bae1dSRodney W. Grimes m->m_flags &= ~M_EOR; 1369df8bae1dSRodney W. Grimes m = m->m_next; 1370df8bae1dSRodney W. Grimes n->m_next = 0; 1371df8bae1dSRodney W. Grimes } 1372df8bae1dSRodney W. Grimes if (eor) { 13737da7362bSRobert Watson KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 1374df8bae1dSRodney W. Grimes n->m_flags |= eor; 1375df8bae1dSRodney W. Grimes } 1376395bb186SSam Leffler SBLASTMBUFCHK(sb); 1377df8bae1dSRodney W. Grimes } 1378df8bae1dSRodney W. Grimes 13793c0e5685SJohn Baldwin #ifdef KERN_TLS 13803c0e5685SJohn Baldwin /* 13813c0e5685SJohn Baldwin * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs 13823c0e5685SJohn Baldwin * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also 13833c0e5685SJohn Baldwin * a bit simpler (no EOR markers, always MT_DATA, etc.). 13843c0e5685SJohn Baldwin */ 13853c0e5685SJohn Baldwin static void 13863c0e5685SJohn Baldwin sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 13873c0e5685SJohn Baldwin { 13883c0e5685SJohn Baldwin 13893c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 13903c0e5685SJohn Baldwin 13913c0e5685SJohn Baldwin while (m) { 13923c0e5685SJohn Baldwin KASSERT((m->m_flags & M_EOR) == 0, 13933c0e5685SJohn Baldwin ("TLS RX mbuf %p with EOR", m)); 13943c0e5685SJohn Baldwin KASSERT(m->m_type == MT_DATA, 13953c0e5685SJohn Baldwin ("TLS RX mbuf %p is not MT_DATA", m)); 13963c0e5685SJohn Baldwin KASSERT((m->m_flags & M_NOTREADY) != 0, 13973c0e5685SJohn Baldwin ("TLS RX mbuf %p ready", m)); 13983c0e5685SJohn Baldwin KASSERT((m->m_flags & M_EXTPG) == 0, 13993c0e5685SJohn Baldwin ("TLS RX mbuf %p unmapped", m)); 14003c0e5685SJohn Baldwin 14013c0e5685SJohn Baldwin if (m->m_len == 0) { 14023c0e5685SJohn Baldwin m = m_free(m); 14033c0e5685SJohn Baldwin continue; 14043c0e5685SJohn Baldwin } 14053c0e5685SJohn Baldwin 14063c0e5685SJohn Baldwin /* 14073c0e5685SJohn Baldwin * Even though both 'n' and 'm' are NOTREADY, it's ok 14083c0e5685SJohn Baldwin * to coalesce the data. 14093c0e5685SJohn Baldwin */ 14103c0e5685SJohn Baldwin if (n && 14113c0e5685SJohn Baldwin M_WRITABLE(n) && 14123c0e5685SJohn Baldwin ((sb->sb_flags & SB_NOCOALESCE) == 0) && 14133c0e5685SJohn Baldwin !(n->m_flags & (M_EXTPG)) && 14143c0e5685SJohn Baldwin m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 14153c0e5685SJohn Baldwin m->m_len <= M_TRAILINGSPACE(n)) { 14163c0e5685SJohn Baldwin m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); 14173c0e5685SJohn Baldwin n->m_len += m->m_len; 14183c0e5685SJohn Baldwin sb->sb_ccc += m->m_len; 14193c0e5685SJohn Baldwin sb->sb_tlscc += m->m_len; 14203c0e5685SJohn Baldwin m = m_free(m); 14213c0e5685SJohn Baldwin continue; 14223c0e5685SJohn Baldwin } 14233c0e5685SJohn Baldwin if (n) 14243c0e5685SJohn Baldwin n->m_next = m; 14253c0e5685SJohn Baldwin else 14263c0e5685SJohn Baldwin sb->sb_mtls = m; 14273c0e5685SJohn Baldwin sb->sb_mtlstail = m; 14283c0e5685SJohn Baldwin sballoc_ktls_rx(sb, m); 14293c0e5685SJohn Baldwin n = m; 14303c0e5685SJohn Baldwin m = m->m_next; 14313c0e5685SJohn Baldwin n->m_next = NULL; 14323c0e5685SJohn Baldwin } 14333c0e5685SJohn Baldwin SBLASTMBUFCHK(sb); 14343c0e5685SJohn Baldwin } 14353c0e5685SJohn Baldwin #endif 14363c0e5685SJohn Baldwin 1437df8bae1dSRodney W. Grimes /* 1438050ac265SRobert Watson * Free all mbufs in a sockbuf. Check that all resources are reclaimed. 1439df8bae1dSRodney W. Grimes */ 1440eaa6dfbcSRobert Watson static void 1441050ac265SRobert Watson sbflush_internal(struct sockbuf *sb) 1442df8bae1dSRodney W. Grimes { 1443df8bae1dSRodney W. Grimes 14443c0e5685SJohn Baldwin while (sb->sb_mbcnt || sb->sb_tlsdcc) { 144523f84772SPierre Beyssac /* 1446761a9a1fSGleb Smirnoff * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: 144723f84772SPierre Beyssac * we would loop forever. Panic instead. 144823f84772SPierre Beyssac */ 14490f9d0a73SGleb Smirnoff if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 145023f84772SPierre Beyssac break; 14510f9d0a73SGleb Smirnoff m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); 145223f84772SPierre Beyssac } 14530f9d0a73SGleb Smirnoff KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, 14540f9d0a73SGleb Smirnoff ("%s: ccc %u mb %p mbcnt %u", __func__, 14550f9d0a73SGleb Smirnoff sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); 1456a34b7046SRobert Watson } 1457a34b7046SRobert Watson 1458a34b7046SRobert Watson void 1459050ac265SRobert Watson sbflush_locked(struct sockbuf *sb) 1460eaa6dfbcSRobert Watson { 1461eaa6dfbcSRobert Watson 1462eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1463eaa6dfbcSRobert Watson sbflush_internal(sb); 1464eaa6dfbcSRobert Watson } 1465eaa6dfbcSRobert Watson 1466eaa6dfbcSRobert Watson void 1467050ac265SRobert Watson sbflush(struct sockbuf *sb) 1468a34b7046SRobert Watson { 1469a34b7046SRobert Watson 1470a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1471a34b7046SRobert Watson sbflush_locked(sb); 1472a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1473df8bae1dSRodney W. Grimes } 1474df8bae1dSRodney W. Grimes 1475df8bae1dSRodney W. Grimes /* 14761d2df300SGleb Smirnoff * Cut data from (the front of) a sockbuf. 1477df8bae1dSRodney W. Grimes */ 14781d2df300SGleb Smirnoff static struct mbuf * 14791d2df300SGleb Smirnoff sbcut_internal(struct sockbuf *sb, int len) 1480df8bae1dSRodney W. Grimes { 14810f9d0a73SGleb Smirnoff struct mbuf *m, *next, *mfree; 14823c0e5685SJohn Baldwin bool is_tls; 1483df8bae1dSRodney W. Grimes 1484f41b2de7SHiren Panchasara KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", 1485b5b023b9SHiren Panchasara __func__, len)); 1486b5b023b9SHiren Panchasara KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", 1487b5b023b9SHiren Panchasara __func__, len, sb->sb_ccc)); 1488b5b023b9SHiren Panchasara 1489df8bae1dSRodney W. Grimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 14903c0e5685SJohn Baldwin is_tls = false; 14911d2df300SGleb Smirnoff mfree = NULL; 14921d2df300SGleb Smirnoff 1493df8bae1dSRodney W. Grimes while (len > 0) { 14948146bcfeSGleb Smirnoff if (m == NULL) { 14953c0e5685SJohn Baldwin #ifdef KERN_TLS 14963c0e5685SJohn Baldwin if (next == NULL && !is_tls) { 14973c0e5685SJohn Baldwin if (sb->sb_tlsdcc != 0) { 14983c0e5685SJohn Baldwin MPASS(len >= sb->sb_tlsdcc); 14993c0e5685SJohn Baldwin len -= sb->sb_tlsdcc; 15003c0e5685SJohn Baldwin sb->sb_ccc -= sb->sb_tlsdcc; 15013c0e5685SJohn Baldwin sb->sb_tlsdcc = 0; 15023c0e5685SJohn Baldwin if (len == 0) 15033c0e5685SJohn Baldwin break; 15043c0e5685SJohn Baldwin } 15053c0e5685SJohn Baldwin next = sb->sb_mtls; 15063c0e5685SJohn Baldwin is_tls = true; 15073c0e5685SJohn Baldwin } 15083c0e5685SJohn Baldwin #endif 15098146bcfeSGleb Smirnoff KASSERT(next, ("%s: no next, len %d", __func__, len)); 1510df8bae1dSRodney W. Grimes m = next; 1511df8bae1dSRodney W. Grimes next = m->m_nextpkt; 1512df8bae1dSRodney W. Grimes } 1513df8bae1dSRodney W. Grimes if (m->m_len > len) { 15140f9d0a73SGleb Smirnoff KASSERT(!(m->m_flags & M_NOTAVAIL), 15150f9d0a73SGleb Smirnoff ("%s: m %p M_NOTAVAIL", __func__, m)); 1516df8bae1dSRodney W. Grimes m->m_len -= len; 1517df8bae1dSRodney W. Grimes m->m_data += len; 15180f9d0a73SGleb Smirnoff sb->sb_ccc -= len; 15190f9d0a73SGleb Smirnoff sb->sb_acc -= len; 15204e023759SAndre Oppermann if (sb->sb_sndptroff != 0) 15214e023759SAndre Oppermann sb->sb_sndptroff -= len; 152234333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 152304ac9b97SKelly Yancey sb->sb_ctl -= len; 1524df8bae1dSRodney W. Grimes break; 1525df8bae1dSRodney W. Grimes } 1526df8bae1dSRodney W. Grimes len -= m->m_len; 15273c0e5685SJohn Baldwin #ifdef KERN_TLS 15283c0e5685SJohn Baldwin if (is_tls) 15293c0e5685SJohn Baldwin sbfree_ktls_rx(sb, m); 15303c0e5685SJohn Baldwin else 15313c0e5685SJohn Baldwin #endif 1532df8bae1dSRodney W. Grimes sbfree(sb, m); 15330f9d0a73SGleb Smirnoff /* 15340f9d0a73SGleb Smirnoff * Do not put M_NOTREADY buffers to the free list, they 15350f9d0a73SGleb Smirnoff * are referenced from outside. 15360f9d0a73SGleb Smirnoff */ 15373c0e5685SJohn Baldwin if (m->m_flags & M_NOTREADY && !is_tls) 15380f9d0a73SGleb Smirnoff m = m->m_next; 15390f9d0a73SGleb Smirnoff else { 15400f9d0a73SGleb Smirnoff struct mbuf *n; 15410f9d0a73SGleb Smirnoff 15421d2df300SGleb Smirnoff n = m->m_next; 15431d2df300SGleb Smirnoff m->m_next = mfree; 15441d2df300SGleb Smirnoff mfree = m; 15451d2df300SGleb Smirnoff m = n; 1546df8bae1dSRodney W. Grimes } 15470f9d0a73SGleb Smirnoff } 1548e834a840SGleb Smirnoff /* 1549e834a840SGleb Smirnoff * Free any zero-length mbufs from the buffer. 1550e834a840SGleb Smirnoff * For SOCK_DGRAM sockets such mbufs represent empty records. 1551e834a840SGleb Smirnoff * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, 1552e834a840SGleb Smirnoff * when sosend_generic() needs to send only control data. 1553e834a840SGleb Smirnoff */ 1554e834a840SGleb Smirnoff while (m && m->m_len == 0) { 1555e834a840SGleb Smirnoff struct mbuf *n; 1556e834a840SGleb Smirnoff 1557e834a840SGleb Smirnoff sbfree(sb, m); 1558e834a840SGleb Smirnoff n = m->m_next; 1559e834a840SGleb Smirnoff m->m_next = mfree; 1560e834a840SGleb Smirnoff mfree = m; 1561e834a840SGleb Smirnoff m = n; 1562e834a840SGleb Smirnoff } 15633c0e5685SJohn Baldwin #ifdef KERN_TLS 15643c0e5685SJohn Baldwin if (is_tls) { 15653c0e5685SJohn Baldwin sb->sb_mb = NULL; 15663c0e5685SJohn Baldwin sb->sb_mtls = m; 15673c0e5685SJohn Baldwin if (m == NULL) 15683c0e5685SJohn Baldwin sb->sb_mtlstail = NULL; 15693c0e5685SJohn Baldwin } else 15703c0e5685SJohn Baldwin #endif 1571df8bae1dSRodney W. Grimes if (m) { 1572df8bae1dSRodney W. Grimes sb->sb_mb = m; 1573df8bae1dSRodney W. Grimes m->m_nextpkt = next; 1574df8bae1dSRodney W. Grimes } else 1575df8bae1dSRodney W. Grimes sb->sb_mb = next; 1576395bb186SSam Leffler /* 1577050ac265SRobert Watson * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure 1578050ac265SRobert Watson * sb_lastrecord is up-to-date if we dropped part of the last record. 1579395bb186SSam Leffler */ 1580395bb186SSam Leffler m = sb->sb_mb; 1581395bb186SSam Leffler if (m == NULL) { 1582395bb186SSam Leffler sb->sb_mbtail = NULL; 1583395bb186SSam Leffler sb->sb_lastrecord = NULL; 1584395bb186SSam Leffler } else if (m->m_nextpkt == NULL) { 1585395bb186SSam Leffler sb->sb_lastrecord = m; 1586395bb186SSam Leffler } 15871d2df300SGleb Smirnoff 15881d2df300SGleb Smirnoff return (mfree); 1589df8bae1dSRodney W. Grimes } 1590df8bae1dSRodney W. Grimes 1591df8bae1dSRodney W. Grimes /* 1592a34b7046SRobert Watson * Drop data from (the front of) a sockbuf. 1593a34b7046SRobert Watson */ 1594a34b7046SRobert Watson void 1595050ac265SRobert Watson sbdrop_locked(struct sockbuf *sb, int len) 1596eaa6dfbcSRobert Watson { 1597eaa6dfbcSRobert Watson 1598eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 15991d2df300SGleb Smirnoff m_freem(sbcut_internal(sb, len)); 16001d2df300SGleb Smirnoff } 1601eaa6dfbcSRobert Watson 16021d2df300SGleb Smirnoff /* 16031d2df300SGleb Smirnoff * Drop data from (the front of) a sockbuf, 16041d2df300SGleb Smirnoff * and return it to caller. 16051d2df300SGleb Smirnoff */ 16061d2df300SGleb Smirnoff struct mbuf * 16071d2df300SGleb Smirnoff sbcut_locked(struct sockbuf *sb, int len) 16081d2df300SGleb Smirnoff { 16091d2df300SGleb Smirnoff 16101d2df300SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 16111d2df300SGleb Smirnoff return (sbcut_internal(sb, len)); 1612eaa6dfbcSRobert Watson } 1613eaa6dfbcSRobert Watson 1614eaa6dfbcSRobert Watson void 1615050ac265SRobert Watson sbdrop(struct sockbuf *sb, int len) 1616a34b7046SRobert Watson { 16171d2df300SGleb Smirnoff struct mbuf *mfree; 1618a34b7046SRobert Watson 1619a34b7046SRobert Watson SOCKBUF_LOCK(sb); 16201d2df300SGleb Smirnoff mfree = sbcut_internal(sb, len); 1621a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 16221d2df300SGleb Smirnoff 16231d2df300SGleb Smirnoff m_freem(mfree); 1624a34b7046SRobert Watson } 1625a34b7046SRobert Watson 162689e560f4SRandall Stewart struct mbuf * 162789e560f4SRandall Stewart sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff) 162889e560f4SRandall Stewart { 162989e560f4SRandall Stewart struct mbuf *m; 163089e560f4SRandall Stewart 163189e560f4SRandall Stewart KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 163289e560f4SRandall Stewart if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 163389e560f4SRandall Stewart *moff = off; 163489e560f4SRandall Stewart if (sb->sb_sndptr == NULL) { 163589e560f4SRandall Stewart sb->sb_sndptr = sb->sb_mb; 163689e560f4SRandall Stewart sb->sb_sndptroff = 0; 163789e560f4SRandall Stewart } 163889e560f4SRandall Stewart return (sb->sb_mb); 163989e560f4SRandall Stewart } else { 164089e560f4SRandall Stewart m = sb->sb_sndptr; 164189e560f4SRandall Stewart off -= sb->sb_sndptroff; 164289e560f4SRandall Stewart } 164389e560f4SRandall Stewart *moff = off; 164489e560f4SRandall Stewart return (m); 164589e560f4SRandall Stewart } 164689e560f4SRandall Stewart 164789e560f4SRandall Stewart void 164889e560f4SRandall Stewart sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len) 164989e560f4SRandall Stewart { 165089e560f4SRandall Stewart /* 165189e560f4SRandall Stewart * A small copy was done, advance forward the sb_sbsndptr to cover 165289e560f4SRandall Stewart * it. 165389e560f4SRandall Stewart */ 165489e560f4SRandall Stewart struct mbuf *m; 165589e560f4SRandall Stewart 165689e560f4SRandall Stewart if (mb != sb->sb_sndptr) { 165789e560f4SRandall Stewart /* Did not copyout at the same mbuf */ 165889e560f4SRandall Stewart return; 165989e560f4SRandall Stewart } 166089e560f4SRandall Stewart m = mb; 166189e560f4SRandall Stewart while (m && (len > 0)) { 166289e560f4SRandall Stewart if (len >= m->m_len) { 166389e560f4SRandall Stewart len -= m->m_len; 166489e560f4SRandall Stewart if (m->m_next) { 166589e560f4SRandall Stewart sb->sb_sndptroff += m->m_len; 166689e560f4SRandall Stewart sb->sb_sndptr = m->m_next; 166789e560f4SRandall Stewart } 166889e560f4SRandall Stewart m = m->m_next; 166989e560f4SRandall Stewart } else { 167089e560f4SRandall Stewart len = 0; 167189e560f4SRandall Stewart } 167289e560f4SRandall Stewart } 167389e560f4SRandall Stewart } 167489e560f4SRandall Stewart 1675a34b7046SRobert Watson /* 16769fd573c3SHans Petter Selasky * Return the first mbuf and the mbuf data offset for the provided 16779fd573c3SHans Petter Selasky * send offset without changing the "sb_sndptroff" field. 16789fd573c3SHans Petter Selasky */ 16799fd573c3SHans Petter Selasky struct mbuf * 16809fd573c3SHans Petter Selasky sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) 16819fd573c3SHans Petter Selasky { 16829fd573c3SHans Petter Selasky struct mbuf *m; 16839fd573c3SHans Petter Selasky 16849fd573c3SHans Petter Selasky KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 16859fd573c3SHans Petter Selasky 16869fd573c3SHans Petter Selasky /* 16879fd573c3SHans Petter Selasky * If the "off" is below the stored offset, which happens on 16889fd573c3SHans Petter Selasky * retransmits, just use "sb_mb": 16899fd573c3SHans Petter Selasky */ 16909fd573c3SHans Petter Selasky if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 16919fd573c3SHans Petter Selasky m = sb->sb_mb; 16929fd573c3SHans Petter Selasky } else { 16939fd573c3SHans Petter Selasky m = sb->sb_sndptr; 16949fd573c3SHans Petter Selasky off -= sb->sb_sndptroff; 16959fd573c3SHans Petter Selasky } 16969fd573c3SHans Petter Selasky while (off > 0 && m != NULL) { 16979fd573c3SHans Petter Selasky if (off < m->m_len) 16989fd573c3SHans Petter Selasky break; 16999fd573c3SHans Petter Selasky off -= m->m_len; 17009fd573c3SHans Petter Selasky m = m->m_next; 17019fd573c3SHans Petter Selasky } 17029fd573c3SHans Petter Selasky *moff = off; 17039fd573c3SHans Petter Selasky return (m); 17049fd573c3SHans Petter Selasky } 17059fd573c3SHans Petter Selasky 17069fd573c3SHans Petter Selasky /* 1707050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1708050ac265SRobert Watson * front. 1709df8bae1dSRodney W. Grimes */ 171026f9a767SRodney W. Grimes void 1711050ac265SRobert Watson sbdroprecord_locked(struct sockbuf *sb) 1712df8bae1dSRodney W. Grimes { 1713050ac265SRobert Watson struct mbuf *m; 1714df8bae1dSRodney W. Grimes 1715a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1716a34b7046SRobert Watson 1717df8bae1dSRodney W. Grimes m = sb->sb_mb; 1718df8bae1dSRodney W. Grimes if (m) { 1719df8bae1dSRodney W. Grimes sb->sb_mb = m->m_nextpkt; 1720df8bae1dSRodney W. Grimes do { 1721df8bae1dSRodney W. Grimes sbfree(sb, m); 1722ecde8f7cSMatthew Dillon m = m_free(m); 1723797f2d22SPoul-Henning Kamp } while (m); 1724df8bae1dSRodney W. Grimes } 1725395bb186SSam Leffler SB_EMPTY_FIXUP(sb); 1726df8bae1dSRodney W. Grimes } 17271e4ad9ceSGarrett Wollman 172882c23ebaSBill Fenner /* 1729050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1730050ac265SRobert Watson * front. 1731a34b7046SRobert Watson */ 1732a34b7046SRobert Watson void 1733050ac265SRobert Watson sbdroprecord(struct sockbuf *sb) 1734a34b7046SRobert Watson { 1735a34b7046SRobert Watson 1736a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1737a34b7046SRobert Watson sbdroprecord_locked(sb); 1738a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1739a34b7046SRobert Watson } 1740a34b7046SRobert Watson 174120d9e5e8SRobert Watson /* 17428c799760SRobert Watson * Create a "control" mbuf containing the specified data with the specified 17438c799760SRobert Watson * type for presentation on a socket buffer. 174420d9e5e8SRobert Watson */ 174520d9e5e8SRobert Watson struct mbuf * 17466890b588SGleb Smirnoff sbcreatecontrol(const void *p, u_int size, int type, int level, int wait) 174720d9e5e8SRobert Watson { 1748d19e16a7SRobert Watson struct cmsghdr *cp; 174920d9e5e8SRobert Watson struct mbuf *m; 175020d9e5e8SRobert Watson 17513c0e5685SJohn Baldwin MBUF_CHECKSLEEP(wait); 17526890b588SGleb Smirnoff 17536890b588SGleb Smirnoff if (wait == M_NOWAIT) { 17546890b588SGleb Smirnoff if (CMSG_SPACE(size) > MCLBYTES) 17556890b588SGleb Smirnoff return (NULL); 17566890b588SGleb Smirnoff } else 1757ad51c47fSGleb Smirnoff KASSERT(CMSG_SPACE(size) <= MCLBYTES, 1758ad51c47fSGleb Smirnoff ("%s: passed CMSG_SPACE(%u) > MCLBYTES", __func__, size)); 17596890b588SGleb Smirnoff 17606890b588SGleb Smirnoff if (CMSG_SPACE(size) > MLEN) 17613c0e5685SJohn Baldwin m = m_getcl(wait, MT_CONTROL, 0); 176220d9e5e8SRobert Watson else 17633c0e5685SJohn Baldwin m = m_get(wait, MT_CONTROL); 176420d9e5e8SRobert Watson if (m == NULL) 17656890b588SGleb Smirnoff return (NULL); 17666890b588SGleb Smirnoff 17676890b588SGleb Smirnoff KASSERT(CMSG_SPACE(size) <= M_TRAILINGSPACE(m), 176820d9e5e8SRobert Watson ("sbcreatecontrol: short mbuf")); 17692827952eSXin LI /* 17702827952eSXin LI * Don't leave the padding between the msg header and the 17712827952eSXin LI * cmsg data and the padding after the cmsg data un-initialized. 17722827952eSXin LI */ 17736890b588SGleb Smirnoff cp = mtod(m, struct cmsghdr *); 17746890b588SGleb Smirnoff bzero(cp, CMSG_SPACE(size)); 177520d9e5e8SRobert Watson if (p != NULL) 177620d9e5e8SRobert Watson (void)memcpy(CMSG_DATA(cp), p, size); 177720d9e5e8SRobert Watson m->m_len = CMSG_SPACE(size); 177820d9e5e8SRobert Watson cp->cmsg_len = CMSG_LEN(size); 177920d9e5e8SRobert Watson cp->cmsg_level = level; 178020d9e5e8SRobert Watson cp->cmsg_type = type; 178120d9e5e8SRobert Watson return (m); 178220d9e5e8SRobert Watson } 178320d9e5e8SRobert Watson 178420d9e5e8SRobert Watson /* 17858c799760SRobert Watson * This does the same for socket buffers that sotoxsocket does for sockets: 17868c799760SRobert Watson * generate an user-format data structure describing the socket buffer. Note 17878c799760SRobert Watson * that the xsockbuf structure, since it is always embedded in a socket, does 17888c799760SRobert Watson * not include a self pointer nor a length. We make this entry point public 17898c799760SRobert Watson * in case some other mechanism needs it. 179020d9e5e8SRobert Watson */ 179120d9e5e8SRobert Watson void 179220d9e5e8SRobert Watson sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) 179320d9e5e8SRobert Watson { 1794d19e16a7SRobert Watson 17950f9d0a73SGleb Smirnoff xsb->sb_cc = sb->sb_ccc; 179620d9e5e8SRobert Watson xsb->sb_hiwat = sb->sb_hiwat; 179720d9e5e8SRobert Watson xsb->sb_mbcnt = sb->sb_mbcnt; 179820d9e5e8SRobert Watson xsb->sb_mbmax = sb->sb_mbmax; 179920d9e5e8SRobert Watson xsb->sb_lowat = sb->sb_lowat; 180020d9e5e8SRobert Watson xsb->sb_flags = sb->sb_flags; 180120d9e5e8SRobert Watson xsb->sb_timeo = sb->sb_timeo; 180220d9e5e8SRobert Watson } 180320d9e5e8SRobert Watson 1804639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 1805639acc13SGarrett Wollman static int dummy; 1806e8cdbb48SPawel Biernacki SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, ""); 18077029da5cSPawel Biernacki SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, 1808fe27f1dbSAlexander Motin CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &sb_max, 0, 18097029da5cSPawel Biernacki sysctl_handle_sb_max, "LU", 18107029da5cSPawel Biernacki "Maximum socket buffer size"); 18111b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 18123eb9ab52SEitan Adler &sb_efficiency, 0, "Socket buffer size waste factor"); 1813