19454b2d8SWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 1569a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 34677b542eSDavid E. O'Brien #include <sys/cdefs.h> 35677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 36677b542eSDavid E. O'Brien 37b2e60773SJohn Baldwin #include "opt_kern_tls.h" 385b86eac4SJesper Skriver #include "opt_param.h" 39335654d7SRobert Watson 40df8bae1dSRodney W. Grimes #include <sys/param.h> 41960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */ 42ff5c09daSGarrett Wollman #include <sys/kernel.h> 43b2e60773SJohn Baldwin #include <sys/ktls.h> 44fb919e4dSMark Murray #include <sys/lock.h> 458ec07310SGleb Smirnoff #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47960ed29cSSeigo Tanimura #include <sys/mutex.h> 48fb919e4dSMark Murray #include <sys/proc.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 502f9a2132SBrian Feldman #include <sys/resourcevar.h> 51960ed29cSSeigo Tanimura #include <sys/signalvar.h> 52df8bae1dSRodney W. Grimes #include <sys/socket.h> 53df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 547abab911SRobert Watson #include <sys/sx.h> 55ff5c09daSGarrett Wollman #include <sys/sysctl.h> 5626f9a767SRodney W. Grimes 57f14cce87SRobert Watson /* 58f14cce87SRobert Watson * Function pointer set by the AIO routines so that the socket buffer code 59f14cce87SRobert Watson * can call back into the AIO module if it is loaded. 60f14cce87SRobert Watson */ 6121d56e9cSAlfred Perlstein void (*aio_swake)(struct socket *, struct sockbuf *); 6221d56e9cSAlfred Perlstein 63df8bae1dSRodney W. Grimes /* 64f14cce87SRobert Watson * Primitive routines for operating on socket buffers 65df8bae1dSRodney W. Grimes */ 66df8bae1dSRodney W. Grimes 6779cb7eb4SDavid Greenman u_long sb_max = SB_MAX; 6858d14daeSMohan Srinivasan u_long sb_max_adj = 69b233773bSBjoern A. Zeeb (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 70df8bae1dSRodney W. Grimes 714b29bc4fSGarrett Wollman static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 724b29bc4fSGarrett Wollman 73*3c0e5685SJohn Baldwin static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, 74*3c0e5685SJohn Baldwin struct mbuf *n); 751d2df300SGleb Smirnoff static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); 76050ac265SRobert Watson static void sbflush_internal(struct sockbuf *sb); 77eaa6dfbcSRobert Watson 78df8bae1dSRodney W. Grimes /* 79829fae90SGleb Smirnoff * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. 80829fae90SGleb Smirnoff */ 81829fae90SGleb Smirnoff static void 82829fae90SGleb Smirnoff sbm_clrprotoflags(struct mbuf *m, int flags) 83829fae90SGleb Smirnoff { 84829fae90SGleb Smirnoff int mask; 85829fae90SGleb Smirnoff 86829fae90SGleb Smirnoff mask = ~M_PROTOFLAGS; 87829fae90SGleb Smirnoff if (flags & PRUS_NOTREADY) 88829fae90SGleb Smirnoff mask |= M_NOTREADY; 89829fae90SGleb Smirnoff while (m) { 90829fae90SGleb Smirnoff m->m_flags &= mask; 91829fae90SGleb Smirnoff m = m->m_next; 92829fae90SGleb Smirnoff } 93829fae90SGleb Smirnoff } 94829fae90SGleb Smirnoff 95829fae90SGleb Smirnoff /* 963807631bSJohn Baldwin * Compress M_NOTREADY mbufs after they have been readied by sbready(). 973807631bSJohn Baldwin * 983807631bSJohn Baldwin * sbcompress() skips M_NOTREADY mbufs since the data is not available to 993807631bSJohn Baldwin * be copied at the time of sbcompress(). This function combines small 1003807631bSJohn Baldwin * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first 1013807631bSJohn Baldwin * mbuf sbready() marked ready, and 'end' is the first mbuf still not 1023807631bSJohn Baldwin * ready. 1033807631bSJohn Baldwin */ 1043807631bSJohn Baldwin static void 1053807631bSJohn Baldwin sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end) 1063807631bSJohn Baldwin { 1073807631bSJohn Baldwin struct mbuf *m, *n; 1083807631bSJohn Baldwin int ext_size; 1093807631bSJohn Baldwin 1103807631bSJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 1113807631bSJohn Baldwin 1123807631bSJohn Baldwin if ((sb->sb_flags & SB_NOCOALESCE) != 0) 1133807631bSJohn Baldwin return; 1143807631bSJohn Baldwin 1153807631bSJohn Baldwin for (m = m0; m != end; m = m->m_next) { 1163807631bSJohn Baldwin MPASS((m->m_flags & M_NOTREADY) == 0); 117c4ad247bSAndrew Gallatin /* 118c4ad247bSAndrew Gallatin * NB: In sbcompress(), 'n' is the last mbuf in the 119c4ad247bSAndrew Gallatin * socket buffer and 'm' is the new mbuf being copied 120c4ad247bSAndrew Gallatin * into the trailing space of 'n'. Here, the roles 121c4ad247bSAndrew Gallatin * are reversed and 'n' is the next mbuf after 'm' 122c4ad247bSAndrew Gallatin * that is being copied into the trailing space of 123c4ad247bSAndrew Gallatin * 'm'. 124c4ad247bSAndrew Gallatin */ 125c4ad247bSAndrew Gallatin n = m->m_next; 126c4ad247bSAndrew Gallatin #ifdef KERN_TLS 127c4ad247bSAndrew Gallatin /* Try to coalesce adjacent ktls mbuf hdr/trailers. */ 128c4ad247bSAndrew Gallatin if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && 1296edfd179SGleb Smirnoff (m->m_flags & M_EXTPG) && 1306edfd179SGleb Smirnoff (n->m_flags & M_EXTPG) && 131c4ad247bSAndrew Gallatin !mbuf_has_tls_session(m) && 132c4ad247bSAndrew Gallatin !mbuf_has_tls_session(n)) { 133c4ad247bSAndrew Gallatin int hdr_len, trail_len; 134c4ad247bSAndrew Gallatin 1357b6c99d0SGleb Smirnoff hdr_len = n->m_epg_hdrlen; 1367b6c99d0SGleb Smirnoff trail_len = m->m_epg_trllen; 137c4ad247bSAndrew Gallatin if (trail_len != 0 && hdr_len != 0 && 138c4ad247bSAndrew Gallatin trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) { 139c4ad247bSAndrew Gallatin /* copy n's header to m's trailer */ 14023feb563SAndrew Gallatin memcpy(&m->m_epg_trail[trail_len], 14123feb563SAndrew Gallatin n->m_epg_hdr, hdr_len); 1427b6c99d0SGleb Smirnoff m->m_epg_trllen += hdr_len; 143c4ad247bSAndrew Gallatin m->m_len += hdr_len; 1447b6c99d0SGleb Smirnoff n->m_epg_hdrlen = 0; 145c4ad247bSAndrew Gallatin n->m_len -= hdr_len; 146c4ad247bSAndrew Gallatin } 147c4ad247bSAndrew Gallatin } 148c4ad247bSAndrew Gallatin #endif 1493807631bSJohn Baldwin 1503807631bSJohn Baldwin /* Compress small unmapped mbufs into plain mbufs. */ 1516edfd179SGleb Smirnoff if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN && 152b2e60773SJohn Baldwin !mbuf_has_tls_session(m)) { 1533807631bSJohn Baldwin ext_size = m->m_ext.ext_size; 1543807631bSJohn Baldwin if (mb_unmapped_compress(m) == 0) { 1553807631bSJohn Baldwin sb->sb_mbcnt -= ext_size; 1563807631bSJohn Baldwin sb->sb_ccnt -= 1; 1573807631bSJohn Baldwin } 1583807631bSJohn Baldwin } 1593807631bSJohn Baldwin 1603807631bSJohn Baldwin while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && 1613807631bSJohn Baldwin M_WRITABLE(m) && 1626edfd179SGleb Smirnoff (m->m_flags & M_EXTPG) == 0 && 163b2e60773SJohn Baldwin !mbuf_has_tls_session(n) && 164b2e60773SJohn Baldwin !mbuf_has_tls_session(m) && 1653807631bSJohn Baldwin n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 1663807631bSJohn Baldwin n->m_len <= M_TRAILINGSPACE(m) && 1673807631bSJohn Baldwin m->m_type == n->m_type) { 1683807631bSJohn Baldwin KASSERT(sb->sb_lastrecord != n, 1693807631bSJohn Baldwin ("%s: merging start of record (%p) into previous mbuf (%p)", 1703807631bSJohn Baldwin __func__, n, m)); 1713807631bSJohn Baldwin m_copydata(n, 0, n->m_len, mtodo(m, m->m_len)); 1723807631bSJohn Baldwin m->m_len += n->m_len; 1733807631bSJohn Baldwin m->m_next = n->m_next; 1743807631bSJohn Baldwin m->m_flags |= n->m_flags & M_EOR; 1753807631bSJohn Baldwin if (sb->sb_mbtail == n) 1763807631bSJohn Baldwin sb->sb_mbtail = m; 1773807631bSJohn Baldwin 1783807631bSJohn Baldwin sb->sb_mbcnt -= MSIZE; 1793807631bSJohn Baldwin sb->sb_mcnt -= 1; 1803807631bSJohn Baldwin if (n->m_flags & M_EXT) { 1813807631bSJohn Baldwin sb->sb_mbcnt -= n->m_ext.ext_size; 1823807631bSJohn Baldwin sb->sb_ccnt -= 1; 1833807631bSJohn Baldwin } 1843807631bSJohn Baldwin m_free(n); 1853807631bSJohn Baldwin n = m->m_next; 1863807631bSJohn Baldwin } 1873807631bSJohn Baldwin } 1883807631bSJohn Baldwin SBLASTRECORDCHK(sb); 1893807631bSJohn Baldwin SBLASTMBUFCHK(sb); 1903807631bSJohn Baldwin } 1913807631bSJohn Baldwin 1923807631bSJohn Baldwin /* 19382334850SJohn Baldwin * Mark ready "count" units of I/O starting with "m". Most mbufs 19461664ee7SGleb Smirnoff * count as a single unit of I/O except for M_EXTPG mbufs which 19561664ee7SGleb Smirnoff * are backed by multiple pages. 1960f9d0a73SGleb Smirnoff */ 1970f9d0a73SGleb Smirnoff int 19882334850SJohn Baldwin sbready(struct sockbuf *sb, struct mbuf *m0, int count) 1990f9d0a73SGleb Smirnoff { 20082334850SJohn Baldwin struct mbuf *m; 2010f9d0a73SGleb Smirnoff u_int blocker; 2020f9d0a73SGleb Smirnoff 2030f9d0a73SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2040f9d0a73SGleb Smirnoff KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); 20582334850SJohn Baldwin KASSERT(count > 0, ("%s: invalid count %d", __func__, count)); 2060f9d0a73SGleb Smirnoff 20782334850SJohn Baldwin m = m0; 2080f9d0a73SGleb Smirnoff blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; 2090f9d0a73SGleb Smirnoff 21082334850SJohn Baldwin while (count > 0) { 2110f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 2120f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 21361664ee7SGleb Smirnoff if ((m->m_flags & M_EXTPG) != 0) { 2147b6c99d0SGleb Smirnoff if (count < m->m_epg_nrdy) { 2157b6c99d0SGleb Smirnoff m->m_epg_nrdy -= count; 21682334850SJohn Baldwin count = 0; 21782334850SJohn Baldwin break; 21882334850SJohn Baldwin } 2197b6c99d0SGleb Smirnoff count -= m->m_epg_nrdy; 2207b6c99d0SGleb Smirnoff m->m_epg_nrdy = 0; 22182334850SJohn Baldwin } else 22282334850SJohn Baldwin count--; 22382334850SJohn Baldwin 2240f9d0a73SGleb Smirnoff m->m_flags &= ~(M_NOTREADY | blocker); 2250f9d0a73SGleb Smirnoff if (blocker) 2260f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 22782334850SJohn Baldwin m = m->m_next; 2280f9d0a73SGleb Smirnoff } 2290f9d0a73SGleb Smirnoff 23082334850SJohn Baldwin /* 23182334850SJohn Baldwin * If the first mbuf is still not fully ready because only 23282334850SJohn Baldwin * some of its backing pages were readied, no further progress 23382334850SJohn Baldwin * can be made. 23482334850SJohn Baldwin */ 23582334850SJohn Baldwin if (m0 == m) { 23682334850SJohn Baldwin MPASS(m->m_flags & M_NOTREADY); 2370f9d0a73SGleb Smirnoff return (EINPROGRESS); 23882334850SJohn Baldwin } 23982334850SJohn Baldwin 24082334850SJohn Baldwin if (!blocker) { 2413807631bSJohn Baldwin sbready_compress(sb, m0, m); 24282334850SJohn Baldwin return (EINPROGRESS); 24382334850SJohn Baldwin } 2440f9d0a73SGleb Smirnoff 2450f9d0a73SGleb Smirnoff /* This one was blocking all the queue. */ 2460f9d0a73SGleb Smirnoff for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { 2470f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_BLOCKED, 2480f9d0a73SGleb Smirnoff ("%s: m %p !M_BLOCKED", __func__, m)); 2490f9d0a73SGleb Smirnoff m->m_flags &= ~M_BLOCKED; 2500f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 2510f9d0a73SGleb Smirnoff } 2520f9d0a73SGleb Smirnoff 2530f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 2543807631bSJohn Baldwin sbready_compress(sb, m0, m); 2550f9d0a73SGleb Smirnoff 2560f9d0a73SGleb Smirnoff return (0); 2570f9d0a73SGleb Smirnoff } 2580f9d0a73SGleb Smirnoff 2590f9d0a73SGleb Smirnoff /* 2608967b220SGleb Smirnoff * Adjust sockbuf state reflecting allocation of m. 2618967b220SGleb Smirnoff */ 2628967b220SGleb Smirnoff void 2638967b220SGleb Smirnoff sballoc(struct sockbuf *sb, struct mbuf *m) 2648967b220SGleb Smirnoff { 2658967b220SGleb Smirnoff 2668967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2678967b220SGleb Smirnoff 2680f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 2690f9d0a73SGleb Smirnoff 2700f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) { 2710f9d0a73SGleb Smirnoff if (m->m_flags & M_NOTREADY) 2720f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 2730f9d0a73SGleb Smirnoff else 2740f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 2750f9d0a73SGleb Smirnoff } else 2760f9d0a73SGleb Smirnoff m->m_flags |= M_BLOCKED; 2778967b220SGleb Smirnoff 2788967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 2798967b220SGleb Smirnoff sb->sb_ctl += m->m_len; 2808967b220SGleb Smirnoff 2818967b220SGleb Smirnoff sb->sb_mbcnt += MSIZE; 2828967b220SGleb Smirnoff sb->sb_mcnt += 1; 2838967b220SGleb Smirnoff 2848967b220SGleb Smirnoff if (m->m_flags & M_EXT) { 2858967b220SGleb Smirnoff sb->sb_mbcnt += m->m_ext.ext_size; 2868967b220SGleb Smirnoff sb->sb_ccnt += 1; 2878967b220SGleb Smirnoff } 2888967b220SGleb Smirnoff } 2898967b220SGleb Smirnoff 2908967b220SGleb Smirnoff /* 2918967b220SGleb Smirnoff * Adjust sockbuf state reflecting freeing of m. 2928967b220SGleb Smirnoff */ 2938967b220SGleb Smirnoff void 2948967b220SGleb Smirnoff sbfree(struct sockbuf *sb, struct mbuf *m) 2958967b220SGleb Smirnoff { 2968967b220SGleb Smirnoff 2978967b220SGleb Smirnoff #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ 2988967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2998967b220SGleb Smirnoff #endif 3008967b220SGleb Smirnoff 3010f9d0a73SGleb Smirnoff sb->sb_ccc -= m->m_len; 3020f9d0a73SGleb Smirnoff 3030f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) 3040f9d0a73SGleb Smirnoff sb->sb_acc -= m->m_len; 3050f9d0a73SGleb Smirnoff 3060f9d0a73SGleb Smirnoff if (m == sb->sb_fnrdy) { 3070f9d0a73SGleb Smirnoff struct mbuf *n; 3080f9d0a73SGleb Smirnoff 3090f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 3100f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 3110f9d0a73SGleb Smirnoff 3120f9d0a73SGleb Smirnoff n = m->m_next; 3130f9d0a73SGleb Smirnoff while (n != NULL && !(n->m_flags & M_NOTREADY)) { 3140f9d0a73SGleb Smirnoff n->m_flags &= ~M_BLOCKED; 3150f9d0a73SGleb Smirnoff sb->sb_acc += n->m_len; 3160f9d0a73SGleb Smirnoff n = n->m_next; 3170f9d0a73SGleb Smirnoff } 3180f9d0a73SGleb Smirnoff sb->sb_fnrdy = n; 3190f9d0a73SGleb Smirnoff } 3208967b220SGleb Smirnoff 3218967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 3228967b220SGleb Smirnoff sb->sb_ctl -= m->m_len; 3238967b220SGleb Smirnoff 3248967b220SGleb Smirnoff sb->sb_mbcnt -= MSIZE; 3258967b220SGleb Smirnoff sb->sb_mcnt -= 1; 3268967b220SGleb Smirnoff if (m->m_flags & M_EXT) { 3278967b220SGleb Smirnoff sb->sb_mbcnt -= m->m_ext.ext_size; 3288967b220SGleb Smirnoff sb->sb_ccnt -= 1; 3298967b220SGleb Smirnoff } 3308967b220SGleb Smirnoff 3318967b220SGleb Smirnoff if (sb->sb_sndptr == m) { 3328967b220SGleb Smirnoff sb->sb_sndptr = NULL; 3338967b220SGleb Smirnoff sb->sb_sndptroff = 0; 3348967b220SGleb Smirnoff } 3358967b220SGleb Smirnoff if (sb->sb_sndptroff != 0) 3368967b220SGleb Smirnoff sb->sb_sndptroff -= m->m_len; 3378967b220SGleb Smirnoff } 3388967b220SGleb Smirnoff 339*3c0e5685SJohn Baldwin #ifdef KERN_TLS 340*3c0e5685SJohn Baldwin /* 341*3c0e5685SJohn Baldwin * Similar to sballoc/sbfree but does not adjust state associated with 342*3c0e5685SJohn Baldwin * the sb_mb chain such as sb_fnrdy or sb_sndptr*. Also assumes mbufs 343*3c0e5685SJohn Baldwin * are not ready. 344*3c0e5685SJohn Baldwin */ 345*3c0e5685SJohn Baldwin void 346*3c0e5685SJohn Baldwin sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m) 347*3c0e5685SJohn Baldwin { 348*3c0e5685SJohn Baldwin 349*3c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 350*3c0e5685SJohn Baldwin 351*3c0e5685SJohn Baldwin sb->sb_ccc += m->m_len; 352*3c0e5685SJohn Baldwin sb->sb_tlscc += m->m_len; 353*3c0e5685SJohn Baldwin 354*3c0e5685SJohn Baldwin sb->sb_mbcnt += MSIZE; 355*3c0e5685SJohn Baldwin sb->sb_mcnt += 1; 356*3c0e5685SJohn Baldwin 357*3c0e5685SJohn Baldwin if (m->m_flags & M_EXT) { 358*3c0e5685SJohn Baldwin sb->sb_mbcnt += m->m_ext.ext_size; 359*3c0e5685SJohn Baldwin sb->sb_ccnt += 1; 360*3c0e5685SJohn Baldwin } 361*3c0e5685SJohn Baldwin } 362*3c0e5685SJohn Baldwin 363*3c0e5685SJohn Baldwin void 364*3c0e5685SJohn Baldwin sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m) 365*3c0e5685SJohn Baldwin { 366*3c0e5685SJohn Baldwin 367*3c0e5685SJohn Baldwin #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ 368*3c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 369*3c0e5685SJohn Baldwin #endif 370*3c0e5685SJohn Baldwin 371*3c0e5685SJohn Baldwin sb->sb_ccc -= m->m_len; 372*3c0e5685SJohn Baldwin sb->sb_tlscc -= m->m_len; 373*3c0e5685SJohn Baldwin 374*3c0e5685SJohn Baldwin sb->sb_mbcnt -= MSIZE; 375*3c0e5685SJohn Baldwin sb->sb_mcnt -= 1; 376*3c0e5685SJohn Baldwin 377*3c0e5685SJohn Baldwin if (m->m_flags & M_EXT) { 378*3c0e5685SJohn Baldwin sb->sb_mbcnt -= m->m_ext.ext_size; 379*3c0e5685SJohn Baldwin sb->sb_ccnt -= 1; 380*3c0e5685SJohn Baldwin } 381*3c0e5685SJohn Baldwin } 382*3c0e5685SJohn Baldwin #endif 383*3c0e5685SJohn Baldwin 3848967b220SGleb Smirnoff /* 385050ac265SRobert Watson * Socantsendmore indicates that no more data will be sent on the socket; it 386050ac265SRobert Watson * would normally be applied to a socket when the user informs the system 387050ac265SRobert Watson * that no more data is to be sent, by the protocol code (in case 388050ac265SRobert Watson * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be 389050ac265SRobert Watson * received, and will normally be applied to the socket by a protocol when it 390050ac265SRobert Watson * detects that the peer will send no more data. Data queued for reading in 391050ac265SRobert Watson * the socket may yet be read. 392df8bae1dSRodney W. Grimes */ 393a34b7046SRobert Watson void 394050ac265SRobert Watson socantsendmore_locked(struct socket *so) 395a34b7046SRobert Watson { 396a34b7046SRobert Watson 397a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_snd); 398a34b7046SRobert Watson 399a34b7046SRobert Watson so->so_snd.sb_state |= SBS_CANTSENDMORE; 400a34b7046SRobert Watson sowwakeup_locked(so); 401a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 402a34b7046SRobert Watson } 403df8bae1dSRodney W. Grimes 40426f9a767SRodney W. Grimes void 405050ac265SRobert Watson socantsendmore(struct socket *so) 406df8bae1dSRodney W. Grimes { 407df8bae1dSRodney W. Grimes 408a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_snd); 409a34b7046SRobert Watson socantsendmore_locked(so); 410a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 411a34b7046SRobert Watson } 412a34b7046SRobert Watson 413a34b7046SRobert Watson void 414050ac265SRobert Watson socantrcvmore_locked(struct socket *so) 415a34b7046SRobert Watson { 416a34b7046SRobert Watson 417a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_rcv); 418a34b7046SRobert Watson 419a34b7046SRobert Watson so->so_rcv.sb_state |= SBS_CANTRCVMORE; 420*3c0e5685SJohn Baldwin #ifdef KERN_TLS 421*3c0e5685SJohn Baldwin if (so->so_rcv.sb_flags & SB_TLS_RX) 422*3c0e5685SJohn Baldwin ktls_check_rx(&so->so_rcv); 423*3c0e5685SJohn Baldwin #endif 424a34b7046SRobert Watson sorwakeup_locked(so); 425a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 426df8bae1dSRodney W. Grimes } 427df8bae1dSRodney W. Grimes 42826f9a767SRodney W. Grimes void 429050ac265SRobert Watson socantrcvmore(struct socket *so) 430df8bae1dSRodney W. Grimes { 431df8bae1dSRodney W. Grimes 432a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 433a34b7046SRobert Watson socantrcvmore_locked(so); 434a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 435df8bae1dSRodney W. Grimes } 436df8bae1dSRodney W. Grimes 437df8bae1dSRodney W. Grimes /* 438df8bae1dSRodney W. Grimes * Wait for data to arrive at/drain from a socket buffer. 439df8bae1dSRodney W. Grimes */ 44026f9a767SRodney W. Grimes int 441050ac265SRobert Watson sbwait(struct sockbuf *sb) 442df8bae1dSRodney W. Grimes { 443df8bae1dSRodney W. Grimes 44431f555a1SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 44531f555a1SRobert Watson 446df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WAIT; 4470f9d0a73SGleb Smirnoff return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx, 44847daf5d5SBruce Evans (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 4497729cbf1SDavide Italiano sb->sb_timeo, 0, 0)); 450df8bae1dSRodney W. Grimes } 451df8bae1dSRodney W. Grimes 45226f9a767SRodney W. Grimes int 4537abab911SRobert Watson sblock(struct sockbuf *sb, int flags) 454df8bae1dSRodney W. Grimes { 455df8bae1dSRodney W. Grimes 456265de5bbSRobert Watson KASSERT((flags & SBL_VALID) == flags, 457265de5bbSRobert Watson ("sblock: flags invalid (0x%x)", flags)); 458265de5bbSRobert Watson 459265de5bbSRobert Watson if (flags & SBL_WAIT) { 460265de5bbSRobert Watson if ((sb->sb_flags & SB_NOINTR) || 461265de5bbSRobert Watson (flags & SBL_NOINTR)) { 4627abab911SRobert Watson sx_xlock(&sb->sb_sx); 463df8bae1dSRodney W. Grimes return (0); 464049c3b6cSRobert Watson } 465049c3b6cSRobert Watson return (sx_xlock_sig(&sb->sb_sx)); 4667abab911SRobert Watson } else { 4677abab911SRobert Watson if (sx_try_xlock(&sb->sb_sx) == 0) 4687abab911SRobert Watson return (EWOULDBLOCK); 4697abab911SRobert Watson return (0); 4707abab911SRobert Watson } 4717abab911SRobert Watson } 4727abab911SRobert Watson 4737abab911SRobert Watson void 4747abab911SRobert Watson sbunlock(struct sockbuf *sb) 4757abab911SRobert Watson { 4767abab911SRobert Watson 4777abab911SRobert Watson sx_xunlock(&sb->sb_sx); 478df8bae1dSRodney W. Grimes } 479df8bae1dSRodney W. Grimes 480df8bae1dSRodney W. Grimes /* 481050ac265SRobert Watson * Wakeup processes waiting on a socket buffer. Do asynchronous notification 482050ac265SRobert Watson * via SIGIO if the socket has the SS_ASYNC flag set. 483a34b7046SRobert Watson * 484a34b7046SRobert Watson * Called with the socket buffer lock held; will release the lock by the end 485a34b7046SRobert Watson * of the function. This allows the caller to acquire the socket buffer lock 486a34b7046SRobert Watson * while testing for the need for various sorts of wakeup and hold it through 487a34b7046SRobert Watson * to the point where it's no longer required. We currently hold the lock 488a34b7046SRobert Watson * through calls out to other subsystems (with the exception of kqueue), and 489a34b7046SRobert Watson * then release it to avoid lock order issues. It's not clear that's 490a34b7046SRobert Watson * correct. 491df8bae1dSRodney W. Grimes */ 49226f9a767SRodney W. Grimes void 493050ac265SRobert Watson sowakeup(struct socket *so, struct sockbuf *sb) 494df8bae1dSRodney W. Grimes { 49574fb0ba7SJohn Baldwin int ret; 496d48d4b25SSeigo Tanimura 497a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 498a34b7046SRobert Watson 499779f106aSGleb Smirnoff selwakeuppri(sb->sb_sel, PSOCK); 500779f106aSGleb Smirnoff if (!SEL_WAITING(sb->sb_sel)) 501df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_SEL; 502df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_WAIT) { 503df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_WAIT; 5040f9d0a73SGleb Smirnoff wakeup(&sb->sb_acc); 505df8bae1dSRodney W. Grimes } 506779f106aSGleb Smirnoff KNOTE_LOCKED(&sb->sb_sel->si_note, 0); 50798c92369SNavdeep Parhar if (sb->sb_upcall != NULL) { 508eb1b1807SGleb Smirnoff ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); 50974fb0ba7SJohn Baldwin if (ret == SU_ISCONNECTED) { 51074fb0ba7SJohn Baldwin KASSERT(sb == &so->so_rcv, 51174fb0ba7SJohn Baldwin ("SO_SND upcall returned SU_ISCONNECTED")); 51274fb0ba7SJohn Baldwin soupcall_clear(so, SO_RCV); 51374fb0ba7SJohn Baldwin } 51474fb0ba7SJohn Baldwin } else 51574fb0ba7SJohn Baldwin ret = SU_OK; 5164cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_AIO) 517f3215338SJohn Baldwin sowakeup_aio(so, sb); 51874fb0ba7SJohn Baldwin SOCKBUF_UNLOCK(sb); 519555b3e2fSGleb Smirnoff if (ret == SU_ISCONNECTED) 52074fb0ba7SJohn Baldwin soisconnected(so); 52174fb0ba7SJohn Baldwin if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 52274fb0ba7SJohn Baldwin pgsigio(&so->so_sigio, SIGIO, 0); 523a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 524df8bae1dSRodney W. Grimes } 525df8bae1dSRodney W. Grimes 526df8bae1dSRodney W. Grimes /* 527df8bae1dSRodney W. Grimes * Socket buffer (struct sockbuf) utility routines. 528df8bae1dSRodney W. Grimes * 529050ac265SRobert Watson * Each socket contains two socket buffers: one for sending data and one for 530050ac265SRobert Watson * receiving data. Each buffer contains a queue of mbufs, information about 531050ac265SRobert Watson * the number of mbufs and amount of data in the queue, and other fields 532050ac265SRobert Watson * allowing select() statements and notification on data availability to be 533050ac265SRobert Watson * implemented. 534df8bae1dSRodney W. Grimes * 535050ac265SRobert Watson * Data stored in a socket buffer is maintained as a list of records. Each 536050ac265SRobert Watson * record is a list of mbufs chained together with the m_next field. Records 537050ac265SRobert Watson * are chained together with the m_nextpkt field. The upper level routine 538050ac265SRobert Watson * soreceive() expects the following conventions to be observed when placing 539050ac265SRobert Watson * information in the receive buffer: 540df8bae1dSRodney W. Grimes * 541050ac265SRobert Watson * 1. If the protocol requires each message be preceded by the sender's name, 542050ac265SRobert Watson * then a record containing that name must be present before any 543050ac265SRobert Watson * associated data (mbuf's must be of type MT_SONAME). 544050ac265SRobert Watson * 2. If the protocol supports the exchange of ``access rights'' (really just 545050ac265SRobert Watson * additional data associated with the message), and there are ``rights'' 546050ac265SRobert Watson * to be received, then a record containing this data should be present 547050ac265SRobert Watson * (mbuf's must be of type MT_RIGHTS). 548050ac265SRobert Watson * 3. If a name or rights record exists, then it must be followed by a data 549050ac265SRobert Watson * record, perhaps of zero length. 550df8bae1dSRodney W. Grimes * 551df8bae1dSRodney W. Grimes * Before using a new socket structure it is first necessary to reserve 552df8bae1dSRodney W. Grimes * buffer space to the socket, by calling sbreserve(). This should commit 553df8bae1dSRodney W. Grimes * some of the available buffer space in the system buffer pool for the 554050ac265SRobert Watson * socket (currently, it does nothing but enforce limits). The space should 555050ac265SRobert Watson * be released by calling sbrelease() when the socket is destroyed. 556df8bae1dSRodney W. Grimes */ 55726f9a767SRodney W. Grimes int 558050ac265SRobert Watson soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 559df8bae1dSRodney W. Grimes { 560b40ce416SJulian Elischer struct thread *td = curthread; 561df8bae1dSRodney W. Grimes 5623f11a2f3SRobert Watson SOCKBUF_LOCK(&so->so_snd); 5639535efc0SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 5643f11a2f3SRobert Watson if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 5653f11a2f3SRobert Watson goto bad; 5663f11a2f3SRobert Watson if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 5673f11a2f3SRobert Watson goto bad2; 568df8bae1dSRodney W. Grimes if (so->so_rcv.sb_lowat == 0) 569df8bae1dSRodney W. Grimes so->so_rcv.sb_lowat = 1; 570df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat == 0) 571df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = MCLBYTES; 572df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 573df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 5743f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 5759535efc0SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 576df8bae1dSRodney W. Grimes return (0); 577df8bae1dSRodney W. Grimes bad2: 5783f11a2f3SRobert Watson sbrelease_locked(&so->so_snd, so); 579df8bae1dSRodney W. Grimes bad: 5803f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 5813f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 582df8bae1dSRodney W. Grimes return (ENOBUFS); 583df8bae1dSRodney W. Grimes } 584df8bae1dSRodney W. Grimes 58579cb7eb4SDavid Greenman static int 58679cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 58779cb7eb4SDavid Greenman { 58879cb7eb4SDavid Greenman int error = 0; 58986a93d51SJohn Baldwin u_long tmp_sb_max = sb_max; 59079cb7eb4SDavid Greenman 59186a93d51SJohn Baldwin error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); 59279cb7eb4SDavid Greenman if (error || !req->newptr) 59379cb7eb4SDavid Greenman return (error); 59486a93d51SJohn Baldwin if (tmp_sb_max < MSIZE + MCLBYTES) 59579cb7eb4SDavid Greenman return (EINVAL); 59686a93d51SJohn Baldwin sb_max = tmp_sb_max; 59779cb7eb4SDavid Greenman sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 59879cb7eb4SDavid Greenman return (0); 59979cb7eb4SDavid Greenman } 60079cb7eb4SDavid Greenman 601df8bae1dSRodney W. Grimes /* 602050ac265SRobert Watson * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't 603050ac265SRobert Watson * become limiting if buffering efficiency is near the normal case. 604df8bae1dSRodney W. Grimes */ 60526f9a767SRodney W. Grimes int 606050ac265SRobert Watson sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, 607050ac265SRobert Watson struct thread *td) 608df8bae1dSRodney W. Grimes { 60991d5354aSJohn Baldwin rlim_t sbsize_limit; 610ecf72308SBrian Feldman 6113f11a2f3SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 6123f11a2f3SRobert Watson 613ecf72308SBrian Feldman /* 6147978014dSRobert Watson * When a thread is passed, we take into account the thread's socket 6157978014dSRobert Watson * buffer size limit. The caller will generally pass curthread, but 6167978014dSRobert Watson * in the TCP input path, NULL will be passed to indicate that no 6177978014dSRobert Watson * appropriate thread resource limits are available. In that case, 6187978014dSRobert Watson * we don't apply a process limit. 619ecf72308SBrian Feldman */ 62079cb7eb4SDavid Greenman if (cc > sb_max_adj) 621df8bae1dSRodney W. Grimes return (0); 62291d5354aSJohn Baldwin if (td != NULL) { 623f6f6d240SMateusz Guzik sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); 62491d5354aSJohn Baldwin } else 62591d5354aSJohn Baldwin sbsize_limit = RLIM_INFINITY; 626f535380cSDon Lewis if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 62791d5354aSJohn Baldwin sbsize_limit)) 628ecf72308SBrian Feldman return (0); 6294b29bc4fSGarrett Wollman sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 630df8bae1dSRodney W. Grimes if (sb->sb_lowat > sb->sb_hiwat) 631df8bae1dSRodney W. Grimes sb->sb_lowat = sb->sb_hiwat; 632df8bae1dSRodney W. Grimes return (1); 633df8bae1dSRodney W. Grimes } 634df8bae1dSRodney W. Grimes 6353f11a2f3SRobert Watson int 63664290befSGleb Smirnoff sbsetopt(struct socket *so, int cmd, u_long cc) 6373f11a2f3SRobert Watson { 63864290befSGleb Smirnoff struct sockbuf *sb; 63964290befSGleb Smirnoff short *flags; 64064290befSGleb Smirnoff u_int *hiwat, *lowat; 6413f11a2f3SRobert Watson int error; 6423f11a2f3SRobert Watson 643b2037136SMatt Macy sb = NULL; 64464290befSGleb Smirnoff SOCK_LOCK(so); 64564290befSGleb Smirnoff if (SOLISTENING(so)) { 64664290befSGleb Smirnoff switch (cmd) { 64764290befSGleb Smirnoff case SO_SNDLOWAT: 64864290befSGleb Smirnoff case SO_SNDBUF: 64964290befSGleb Smirnoff lowat = &so->sol_sbsnd_lowat; 65064290befSGleb Smirnoff hiwat = &so->sol_sbsnd_hiwat; 65164290befSGleb Smirnoff flags = &so->sol_sbsnd_flags; 65264290befSGleb Smirnoff break; 65364290befSGleb Smirnoff case SO_RCVLOWAT: 65464290befSGleb Smirnoff case SO_RCVBUF: 65564290befSGleb Smirnoff lowat = &so->sol_sbrcv_lowat; 65664290befSGleb Smirnoff hiwat = &so->sol_sbrcv_hiwat; 65764290befSGleb Smirnoff flags = &so->sol_sbrcv_flags; 65864290befSGleb Smirnoff break; 65964290befSGleb Smirnoff } 66064290befSGleb Smirnoff } else { 66164290befSGleb Smirnoff switch (cmd) { 66264290befSGleb Smirnoff case SO_SNDLOWAT: 66364290befSGleb Smirnoff case SO_SNDBUF: 66464290befSGleb Smirnoff sb = &so->so_snd; 66564290befSGleb Smirnoff break; 66664290befSGleb Smirnoff case SO_RCVLOWAT: 66764290befSGleb Smirnoff case SO_RCVBUF: 66864290befSGleb Smirnoff sb = &so->so_rcv; 66964290befSGleb Smirnoff break; 67064290befSGleb Smirnoff } 67164290befSGleb Smirnoff flags = &sb->sb_flags; 67264290befSGleb Smirnoff hiwat = &sb->sb_hiwat; 67364290befSGleb Smirnoff lowat = &sb->sb_lowat; 6743f11a2f3SRobert Watson SOCKBUF_LOCK(sb); 67564290befSGleb Smirnoff } 67664290befSGleb Smirnoff 67764290befSGleb Smirnoff error = 0; 67864290befSGleb Smirnoff switch (cmd) { 67964290befSGleb Smirnoff case SO_SNDBUF: 68064290befSGleb Smirnoff case SO_RCVBUF: 68164290befSGleb Smirnoff if (SOLISTENING(so)) { 68264290befSGleb Smirnoff if (cc > sb_max_adj) { 68364290befSGleb Smirnoff error = ENOBUFS; 68464290befSGleb Smirnoff break; 68564290befSGleb Smirnoff } 68664290befSGleb Smirnoff *hiwat = cc; 68764290befSGleb Smirnoff if (*lowat > *hiwat) 68864290befSGleb Smirnoff *lowat = *hiwat; 68964290befSGleb Smirnoff } else { 69064290befSGleb Smirnoff if (!sbreserve_locked(sb, cc, so, curthread)) 69164290befSGleb Smirnoff error = ENOBUFS; 69264290befSGleb Smirnoff } 69364290befSGleb Smirnoff if (error == 0) 69464290befSGleb Smirnoff *flags &= ~SB_AUTOSIZE; 69564290befSGleb Smirnoff break; 69664290befSGleb Smirnoff case SO_SNDLOWAT: 69764290befSGleb Smirnoff case SO_RCVLOWAT: 69864290befSGleb Smirnoff /* 69964290befSGleb Smirnoff * Make sure the low-water is never greater than the 70064290befSGleb Smirnoff * high-water. 70164290befSGleb Smirnoff */ 70264290befSGleb Smirnoff *lowat = (cc > *hiwat) ? *hiwat : cc; 70364290befSGleb Smirnoff break; 70464290befSGleb Smirnoff } 70564290befSGleb Smirnoff 70664290befSGleb Smirnoff if (!SOLISTENING(so)) 7073f11a2f3SRobert Watson SOCKBUF_UNLOCK(sb); 70864290befSGleb Smirnoff SOCK_UNLOCK(so); 7093f11a2f3SRobert Watson return (error); 7103f11a2f3SRobert Watson } 7113f11a2f3SRobert Watson 712df8bae1dSRodney W. Grimes /* 713df8bae1dSRodney W. Grimes * Free mbufs held by a socket, and reserved mbuf space. 714df8bae1dSRodney W. Grimes */ 7153f0bfcccSRobert Watson void 716050ac265SRobert Watson sbrelease_internal(struct sockbuf *sb, struct socket *so) 717eaa6dfbcSRobert Watson { 718eaa6dfbcSRobert Watson 719eaa6dfbcSRobert Watson sbflush_internal(sb); 720eaa6dfbcSRobert Watson (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 721eaa6dfbcSRobert Watson RLIM_INFINITY); 722eaa6dfbcSRobert Watson sb->sb_mbmax = 0; 723eaa6dfbcSRobert Watson } 724eaa6dfbcSRobert Watson 72526f9a767SRodney W. Grimes void 726050ac265SRobert Watson sbrelease_locked(struct sockbuf *sb, struct socket *so) 727df8bae1dSRodney W. Grimes { 728df8bae1dSRodney W. Grimes 729a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 730a34b7046SRobert Watson 731eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 732df8bae1dSRodney W. Grimes } 733df8bae1dSRodney W. Grimes 734a34b7046SRobert Watson void 735050ac265SRobert Watson sbrelease(struct sockbuf *sb, struct socket *so) 736a34b7046SRobert Watson { 737a34b7046SRobert Watson 738a34b7046SRobert Watson SOCKBUF_LOCK(sb); 739a34b7046SRobert Watson sbrelease_locked(sb, so); 740a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 741a34b7046SRobert Watson } 742eaa6dfbcSRobert Watson 743eaa6dfbcSRobert Watson void 744050ac265SRobert Watson sbdestroy(struct sockbuf *sb, struct socket *so) 745eaa6dfbcSRobert Watson { 746eaa6dfbcSRobert Watson 747eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 748b2e60773SJohn Baldwin #ifdef KERN_TLS 749b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL) 750b2e60773SJohn Baldwin ktls_free(sb->sb_tls_info); 751b2e60773SJohn Baldwin sb->sb_tls_info = NULL; 752b2e60773SJohn Baldwin #endif 753eaa6dfbcSRobert Watson } 754eaa6dfbcSRobert Watson 755df8bae1dSRodney W. Grimes /* 756050ac265SRobert Watson * Routines to add and remove data from an mbuf queue. 757df8bae1dSRodney W. Grimes * 758050ac265SRobert Watson * The routines sbappend() or sbappendrecord() are normally called to append 759050ac265SRobert Watson * new mbufs to a socket buffer, after checking that adequate space is 760050ac265SRobert Watson * available, comparing the function sbspace() with the amount of data to be 761050ac265SRobert Watson * added. sbappendrecord() differs from sbappend() in that data supplied is 762050ac265SRobert Watson * treated as the beginning of a new record. To place a sender's address, 763050ac265SRobert Watson * optional access rights, and data in a socket receive buffer, 764050ac265SRobert Watson * sbappendaddr() should be used. To place access rights and data in a 765050ac265SRobert Watson * socket receive buffer, sbappendrights() should be used. In either case, 766050ac265SRobert Watson * the new data begins a new record. Note that unlike sbappend() and 767050ac265SRobert Watson * sbappendrecord(), these routines check for the caller that there will be 768050ac265SRobert Watson * enough space to store the data. Each fails if there is not enough space, 769050ac265SRobert Watson * or if it cannot find mbufs to store additional information in. 770df8bae1dSRodney W. Grimes * 771050ac265SRobert Watson * Reliable protocols may use the socket send buffer to hold data awaiting 772050ac265SRobert Watson * acknowledgement. Data is normally copied from a socket send buffer in a 773050ac265SRobert Watson * protocol with m_copy for output to a peer, and then removing the data from 774050ac265SRobert Watson * the socket buffer with sbdrop() or sbdroprecord() when the data is 775050ac265SRobert Watson * acknowledged by the peer. 776df8bae1dSRodney W. Grimes */ 777395bb186SSam Leffler #ifdef SOCKBUF_DEBUG 778395bb186SSam Leffler void 779395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line) 780395bb186SSam Leffler { 781395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 782395bb186SSam Leffler 783a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 784a34b7046SRobert Watson 785395bb186SSam Leffler while (m && m->m_nextpkt) 786395bb186SSam Leffler m = m->m_nextpkt; 787395bb186SSam Leffler 788395bb186SSam Leffler if (m != sb->sb_lastrecord) { 789395bb186SSam Leffler printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 790395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_lastrecord, m); 791395bb186SSam Leffler printf("packet chain:\n"); 792395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 793395bb186SSam Leffler printf("\t%p\n", m); 794395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 795395bb186SSam Leffler } 796395bb186SSam Leffler } 797395bb186SSam Leffler 798395bb186SSam Leffler void 799395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line) 800395bb186SSam Leffler { 801395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 802395bb186SSam Leffler struct mbuf *n; 803395bb186SSam Leffler 804a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 805a34b7046SRobert Watson 806395bb186SSam Leffler while (m && m->m_nextpkt) 807395bb186SSam Leffler m = m->m_nextpkt; 808395bb186SSam Leffler 809395bb186SSam Leffler while (m && m->m_next) 810395bb186SSam Leffler m = m->m_next; 811395bb186SSam Leffler 812395bb186SSam Leffler if (m != sb->sb_mbtail) { 813395bb186SSam Leffler printf("%s: sb_mb %p sb_mbtail %p last %p\n", 814395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_mbtail, m); 815395bb186SSam Leffler printf("packet tree:\n"); 816395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 817395bb186SSam Leffler printf("\t"); 818395bb186SSam Leffler for (n = m; n != NULL; n = n->m_next) 819395bb186SSam Leffler printf("%p ", n); 820395bb186SSam Leffler printf("\n"); 821395bb186SSam Leffler } 822395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 823395bb186SSam Leffler } 824*3c0e5685SJohn Baldwin 825*3c0e5685SJohn Baldwin #ifdef KERN_TLS 826*3c0e5685SJohn Baldwin m = sb->sb_mtls; 827*3c0e5685SJohn Baldwin while (m && m->m_next) 828*3c0e5685SJohn Baldwin m = m->m_next; 829*3c0e5685SJohn Baldwin 830*3c0e5685SJohn Baldwin if (m != sb->sb_mtlstail) { 831*3c0e5685SJohn Baldwin printf("%s: sb_mtls %p sb_mtlstail %p last %p\n", 832*3c0e5685SJohn Baldwin __func__, sb->sb_mtls, sb->sb_mtlstail, m); 833*3c0e5685SJohn Baldwin printf("TLS packet tree:\n"); 834*3c0e5685SJohn Baldwin printf("\t"); 835*3c0e5685SJohn Baldwin for (m = sb->sb_mtls; m != NULL; m = m->m_next) { 836*3c0e5685SJohn Baldwin printf("%p ", m); 837*3c0e5685SJohn Baldwin } 838*3c0e5685SJohn Baldwin printf("\n"); 839*3c0e5685SJohn Baldwin panic("%s from %s:%u", __func__, file, line); 840*3c0e5685SJohn Baldwin } 841*3c0e5685SJohn Baldwin #endif 842395bb186SSam Leffler } 843395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */ 844395bb186SSam Leffler 845395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do { \ 846a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); \ 847395bb186SSam Leffler if ((sb)->sb_lastrecord != NULL) \ 848395bb186SSam Leffler (sb)->sb_lastrecord->m_nextpkt = (m0); \ 849395bb186SSam Leffler else \ 850395bb186SSam Leffler (sb)->sb_mb = (m0); \ 851395bb186SSam Leffler (sb)->sb_lastrecord = (m0); \ 852395bb186SSam Leffler } while (/*CONSTCOND*/0) 853395bb186SSam Leffler 854df8bae1dSRodney W. Grimes /* 855050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 856050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 857050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 858df8bae1dSRodney W. Grimes */ 85926f9a767SRodney W. Grimes void 860829fae90SGleb Smirnoff sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) 861df8bae1dSRodney W. Grimes { 862050ac265SRobert Watson struct mbuf *n; 863df8bae1dSRodney W. Grimes 864a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 865a34b7046SRobert Watson 866b85f65afSPedro F. Giffuni if (m == NULL) 867df8bae1dSRodney W. Grimes return; 868829fae90SGleb Smirnoff sbm_clrprotoflags(m, flags); 869395bb186SSam Leffler SBLASTRECORDCHK(sb); 870797f2d22SPoul-Henning Kamp n = sb->sb_mb; 871797f2d22SPoul-Henning Kamp if (n) { 872df8bae1dSRodney W. Grimes while (n->m_nextpkt) 873df8bae1dSRodney W. Grimes n = n->m_nextpkt; 874df8bae1dSRodney W. Grimes do { 875df8bae1dSRodney W. Grimes if (n->m_flags & M_EOR) { 876a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 877df8bae1dSRodney W. Grimes return; 878df8bae1dSRodney W. Grimes } 879df8bae1dSRodney W. Grimes } while (n->m_next && (n = n->m_next)); 880395bb186SSam Leffler } else { 881395bb186SSam Leffler /* 882395bb186SSam Leffler * XXX Would like to simply use sb_mbtail here, but 883395bb186SSam Leffler * XXX I need to verify that I won't miss an EOR that 884395bb186SSam Leffler * XXX way. 885395bb186SSam Leffler */ 886395bb186SSam Leffler if ((n = sb->sb_lastrecord) != NULL) { 887395bb186SSam Leffler do { 888395bb186SSam Leffler if (n->m_flags & M_EOR) { 889a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 890395bb186SSam Leffler return; 891395bb186SSam Leffler } 892395bb186SSam Leffler } while (n->m_next && (n = n->m_next)); 893395bb186SSam Leffler } else { 894395bb186SSam Leffler /* 895395bb186SSam Leffler * If this is the first record in the socket buffer, 896395bb186SSam Leffler * it's also the last record. 897395bb186SSam Leffler */ 898395bb186SSam Leffler sb->sb_lastrecord = m; 899395bb186SSam Leffler } 900df8bae1dSRodney W. Grimes } 901df8bae1dSRodney W. Grimes sbcompress(sb, m, n); 902395bb186SSam Leffler SBLASTRECORDCHK(sb); 903395bb186SSam Leffler } 904395bb186SSam Leffler 905395bb186SSam Leffler /* 906050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 907050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 908050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 909a34b7046SRobert Watson */ 910a34b7046SRobert Watson void 911829fae90SGleb Smirnoff sbappend(struct sockbuf *sb, struct mbuf *m, int flags) 912a34b7046SRobert Watson { 913a34b7046SRobert Watson 914a34b7046SRobert Watson SOCKBUF_LOCK(sb); 915829fae90SGleb Smirnoff sbappend_locked(sb, m, flags); 916a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 917a34b7046SRobert Watson } 918a34b7046SRobert Watson 919*3c0e5685SJohn Baldwin #ifdef KERN_TLS 920*3c0e5685SJohn Baldwin /* 921*3c0e5685SJohn Baldwin * Append an mbuf containing encrypted TLS data. The data 922*3c0e5685SJohn Baldwin * is marked M_NOTREADY until it has been decrypted and 923*3c0e5685SJohn Baldwin * stored as a TLS record. 924*3c0e5685SJohn Baldwin */ 925*3c0e5685SJohn Baldwin static void 926*3c0e5685SJohn Baldwin sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m) 927*3c0e5685SJohn Baldwin { 928*3c0e5685SJohn Baldwin struct mbuf *n; 929*3c0e5685SJohn Baldwin 930*3c0e5685SJohn Baldwin SBLASTMBUFCHK(sb); 931*3c0e5685SJohn Baldwin 932*3c0e5685SJohn Baldwin /* Remove all packet headers and mbuf tags to get a pure data chain. */ 933*3c0e5685SJohn Baldwin m_demote(m, 1, 0); 934*3c0e5685SJohn Baldwin 935*3c0e5685SJohn Baldwin for (n = m; n != NULL; n = n->m_next) 936*3c0e5685SJohn Baldwin n->m_flags |= M_NOTREADY; 937*3c0e5685SJohn Baldwin sbcompress_ktls_rx(sb, m, sb->sb_mtlstail); 938*3c0e5685SJohn Baldwin ktls_check_rx(sb); 939*3c0e5685SJohn Baldwin } 940*3c0e5685SJohn Baldwin #endif 941*3c0e5685SJohn Baldwin 942a34b7046SRobert Watson /* 943050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 944050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 945050ac265SRobert Watson * that is, a stream protocol (such as TCP). 946395bb186SSam Leffler */ 947395bb186SSam Leffler void 948651e4e6aSGleb Smirnoff sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) 949395bb186SSam Leffler { 950a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 951395bb186SSam Leffler 952395bb186SSam Leffler KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 953*3c0e5685SJohn Baldwin 954*3c0e5685SJohn Baldwin #ifdef KERN_TLS 955*3c0e5685SJohn Baldwin /* 956*3c0e5685SJohn Baldwin * Decrypted TLS records are appended as records via 957*3c0e5685SJohn Baldwin * sbappendrecord(). TCP passes encrypted TLS records to this 958*3c0e5685SJohn Baldwin * function which must be scheduled for decryption. 959*3c0e5685SJohn Baldwin */ 960*3c0e5685SJohn Baldwin if (sb->sb_flags & SB_TLS_RX) { 961*3c0e5685SJohn Baldwin sbappend_ktls_rx(sb, m); 962*3c0e5685SJohn Baldwin return; 963*3c0e5685SJohn Baldwin } 964*3c0e5685SJohn Baldwin #endif 965*3c0e5685SJohn Baldwin 966395bb186SSam Leffler KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 967395bb186SSam Leffler 968395bb186SSam Leffler SBLASTMBUFCHK(sb); 969395bb186SSam Leffler 970b2e60773SJohn Baldwin #ifdef KERN_TLS 971b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL) 972b2e60773SJohn Baldwin ktls_seq(sb, m); 973b2e60773SJohn Baldwin #endif 974b2e60773SJohn Baldwin 975844cacd1SGleb Smirnoff /* Remove all packet headers and mbuf tags to get a pure data chain. */ 976651e4e6aSGleb Smirnoff m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); 977844cacd1SGleb Smirnoff 978395bb186SSam Leffler sbcompress(sb, m, sb->sb_mbtail); 979395bb186SSam Leffler 980395bb186SSam Leffler sb->sb_lastrecord = sb->sb_mb; 981395bb186SSam Leffler SBLASTRECORDCHK(sb); 982df8bae1dSRodney W. Grimes } 983df8bae1dSRodney W. Grimes 984a34b7046SRobert Watson /* 985050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 986050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 987050ac265SRobert Watson * that is, a stream protocol (such as TCP). 988a34b7046SRobert Watson */ 989a34b7046SRobert Watson void 990651e4e6aSGleb Smirnoff sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) 991a34b7046SRobert Watson { 992a34b7046SRobert Watson 993a34b7046SRobert Watson SOCKBUF_LOCK(sb); 994651e4e6aSGleb Smirnoff sbappendstream_locked(sb, m, flags); 995a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 996a34b7046SRobert Watson } 997a34b7046SRobert Watson 998df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG 99926f9a767SRodney W. Grimes void 100057f43a45SGleb Smirnoff sbcheck(struct sockbuf *sb, const char *file, int line) 1001df8bae1dSRodney W. Grimes { 10020f9d0a73SGleb Smirnoff struct mbuf *m, *n, *fnrdy; 10030f9d0a73SGleb Smirnoff u_long acc, ccc, mbcnt; 1004*3c0e5685SJohn Baldwin #ifdef KERN_TLS 1005*3c0e5685SJohn Baldwin u_long tlscc; 1006*3c0e5685SJohn Baldwin #endif 1007df8bae1dSRodney W. Grimes 1008a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1009a34b7046SRobert Watson 10100f9d0a73SGleb Smirnoff acc = ccc = mbcnt = 0; 10110f9d0a73SGleb Smirnoff fnrdy = NULL; 101257f43a45SGleb Smirnoff 10130931333fSBill Fenner for (m = sb->sb_mb; m; m = n) { 10140931333fSBill Fenner n = m->m_nextpkt; 10150931333fSBill Fenner for (; m; m = m->m_next) { 101657f43a45SGleb Smirnoff if (m->m_len == 0) { 101757f43a45SGleb Smirnoff printf("sb %p empty mbuf %p\n", sb, m); 101857f43a45SGleb Smirnoff goto fail; 101957f43a45SGleb Smirnoff } 10200f9d0a73SGleb Smirnoff if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { 10210f9d0a73SGleb Smirnoff if (m != sb->sb_fnrdy) { 10220f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p != m %p\n", 10230f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 10240f9d0a73SGleb Smirnoff goto fail; 10250f9d0a73SGleb Smirnoff } 10260f9d0a73SGleb Smirnoff fnrdy = m; 10270f9d0a73SGleb Smirnoff } 10280f9d0a73SGleb Smirnoff if (fnrdy) { 10290f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) { 10300f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p, m %p is avail\n", 10310f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 10320f9d0a73SGleb Smirnoff goto fail; 10330f9d0a73SGleb Smirnoff } 10340f9d0a73SGleb Smirnoff } else 10350f9d0a73SGleb Smirnoff acc += m->m_len; 10360f9d0a73SGleb Smirnoff ccc += m->m_len; 1037df8bae1dSRodney W. Grimes mbcnt += MSIZE; 1038313861b8SJulian Elischer if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 1039df8bae1dSRodney W. Grimes mbcnt += m->m_ext.ext_size; 10400931333fSBill Fenner } 1041df8bae1dSRodney W. Grimes } 1042*3c0e5685SJohn Baldwin #ifdef KERN_TLS 1043*3c0e5685SJohn Baldwin /* 1044*3c0e5685SJohn Baldwin * Account for mbufs "detached" by ktls_detach_record() while 1045*3c0e5685SJohn Baldwin * they are decrypted by ktls_decrypt(). tlsdcc gives a count 1046*3c0e5685SJohn Baldwin * of the detached bytes that are included in ccc. The mbufs 1047*3c0e5685SJohn Baldwin * and clusters are not included in the socket buffer 1048*3c0e5685SJohn Baldwin * accounting. 1049*3c0e5685SJohn Baldwin */ 1050*3c0e5685SJohn Baldwin ccc += sb->sb_tlsdcc; 1051*3c0e5685SJohn Baldwin 1052*3c0e5685SJohn Baldwin tlscc = 0; 1053*3c0e5685SJohn Baldwin for (m = sb->sb_mtls; m; m = m->m_next) { 1054*3c0e5685SJohn Baldwin if (m->m_nextpkt != NULL) { 1055*3c0e5685SJohn Baldwin printf("sb %p TLS mbuf %p with nextpkt\n", sb, m); 1056*3c0e5685SJohn Baldwin goto fail; 1057*3c0e5685SJohn Baldwin } 1058*3c0e5685SJohn Baldwin if ((m->m_flags & M_NOTREADY) == 0) { 1059*3c0e5685SJohn Baldwin printf("sb %p TLS mbuf %p ready\n", sb, m); 1060*3c0e5685SJohn Baldwin goto fail; 1061*3c0e5685SJohn Baldwin } 1062*3c0e5685SJohn Baldwin tlscc += m->m_len; 1063*3c0e5685SJohn Baldwin ccc += m->m_len; 1064*3c0e5685SJohn Baldwin mbcnt += MSIZE; 1065*3c0e5685SJohn Baldwin if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 1066*3c0e5685SJohn Baldwin mbcnt += m->m_ext.ext_size; 1067*3c0e5685SJohn Baldwin } 1068*3c0e5685SJohn Baldwin 1069*3c0e5685SJohn Baldwin if (sb->sb_tlscc != tlscc) { 1070*3c0e5685SJohn Baldwin printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, 1071*3c0e5685SJohn Baldwin sb->sb_tlsdcc); 1072*3c0e5685SJohn Baldwin goto fail; 1073*3c0e5685SJohn Baldwin } 1074*3c0e5685SJohn Baldwin #endif 10750f9d0a73SGleb Smirnoff if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { 10760f9d0a73SGleb Smirnoff printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", 10770f9d0a73SGleb Smirnoff acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); 1078*3c0e5685SJohn Baldwin #ifdef KERN_TLS 1079*3c0e5685SJohn Baldwin printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc, 1080*3c0e5685SJohn Baldwin sb->sb_tlsdcc); 1081*3c0e5685SJohn Baldwin #endif 108257f43a45SGleb Smirnoff goto fail; 1083df8bae1dSRodney W. Grimes } 108457f43a45SGleb Smirnoff return; 108557f43a45SGleb Smirnoff fail: 108657f43a45SGleb Smirnoff panic("%s from %s:%u", __func__, file, line); 1087df8bae1dSRodney W. Grimes } 1088df8bae1dSRodney W. Grimes #endif 1089df8bae1dSRodney W. Grimes 1090df8bae1dSRodney W. Grimes /* 1091050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 1092df8bae1dSRodney W. Grimes */ 109326f9a767SRodney W. Grimes void 1094050ac265SRobert Watson sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) 1095df8bae1dSRodney W. Grimes { 1096050ac265SRobert Watson struct mbuf *m; 1097df8bae1dSRodney W. Grimes 1098a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1099a34b7046SRobert Watson 1100b85f65afSPedro F. Giffuni if (m0 == NULL) 1101df8bae1dSRodney W. Grimes return; 110253b680caSGleb Smirnoff m_clrprotoflags(m0); 1103df8bae1dSRodney W. Grimes /* 1104050ac265SRobert Watson * Put the first mbuf on the queue. Note this permits zero length 1105050ac265SRobert Watson * records. 1106df8bae1dSRodney W. Grimes */ 1107df8bae1dSRodney W. Grimes sballoc(sb, m0); 1108395bb186SSam Leffler SBLASTRECORDCHK(sb); 1109395bb186SSam Leffler SBLINKRECORD(sb, m0); 1110e72a94adSMaksim Yevmenkin sb->sb_mbtail = m0; 1111df8bae1dSRodney W. Grimes m = m0->m_next; 1112df8bae1dSRodney W. Grimes m0->m_next = 0; 1113df8bae1dSRodney W. Grimes if (m && (m0->m_flags & M_EOR)) { 1114df8bae1dSRodney W. Grimes m0->m_flags &= ~M_EOR; 1115df8bae1dSRodney W. Grimes m->m_flags |= M_EOR; 1116df8bae1dSRodney W. Grimes } 1117e72a94adSMaksim Yevmenkin /* always call sbcompress() so it can do SBLASTMBUFCHK() */ 1118df8bae1dSRodney W. Grimes sbcompress(sb, m, m0); 1119df8bae1dSRodney W. Grimes } 1120df8bae1dSRodney W. Grimes 1121df8bae1dSRodney W. Grimes /* 1122050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 1123a34b7046SRobert Watson */ 1124a34b7046SRobert Watson void 1125050ac265SRobert Watson sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 1126a34b7046SRobert Watson { 1127a34b7046SRobert Watson 1128a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1129a34b7046SRobert Watson sbappendrecord_locked(sb, m0); 1130a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1131a34b7046SRobert Watson } 1132a34b7046SRobert Watson 11338de34a88SAlan Somers /* Helper routine that appends data, control, and address to a sockbuf. */ 11348de34a88SAlan Somers static int 11358de34a88SAlan Somers sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, 11368de34a88SAlan Somers struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) 1137df8bae1dSRodney W. Grimes { 1138395bb186SSam Leffler struct mbuf *m, *n, *nlast; 1139c43cad1aSScott Long #if MSIZE <= 256 1140df8bae1dSRodney W. Grimes if (asa->sa_len > MLEN) 1141df8bae1dSRodney W. Grimes return (0); 1142c43cad1aSScott Long #endif 1143c8b59ea7SGleb Smirnoff m = m_get(M_NOWAIT, MT_SONAME); 1144c8b59ea7SGleb Smirnoff if (m == NULL) 1145df8bae1dSRodney W. Grimes return (0); 1146df8bae1dSRodney W. Grimes m->m_len = asa->sa_len; 114780208239SAlfred Perlstein bcopy(asa, mtod(m, caddr_t), asa->sa_len); 1148c33a2313SAndrey V. Elsukov if (m0) { 114953b680caSGleb Smirnoff m_clrprotoflags(m0); 115057386f5dSAndrey V. Elsukov m_tag_delete_chain(m0, NULL); 1151c33a2313SAndrey V. Elsukov /* 1152c33a2313SAndrey V. Elsukov * Clear some persistent info from pkthdr. 1153c33a2313SAndrey V. Elsukov * We don't use m_demote(), because some netgraph consumers 1154c33a2313SAndrey V. Elsukov * expect M_PKTHDR presence. 1155c33a2313SAndrey V. Elsukov */ 1156c33a2313SAndrey V. Elsukov m0->m_pkthdr.rcvif = NULL; 1157c33a2313SAndrey V. Elsukov m0->m_pkthdr.flowid = 0; 1158c33a2313SAndrey V. Elsukov m0->m_pkthdr.csum_flags = 0; 1159c33a2313SAndrey V. Elsukov m0->m_pkthdr.fibnum = 0; 1160c33a2313SAndrey V. Elsukov m0->m_pkthdr.rsstype = 0; 1161c33a2313SAndrey V. Elsukov } 11628de34a88SAlan Somers if (ctrl_last) 11638de34a88SAlan Somers ctrl_last->m_next = m0; /* concatenate data to control */ 1164df8bae1dSRodney W. Grimes else 1165df8bae1dSRodney W. Grimes control = m0; 1166df8bae1dSRodney W. Grimes m->m_next = control; 1167395bb186SSam Leffler for (n = m; n->m_next != NULL; n = n->m_next) 1168df8bae1dSRodney W. Grimes sballoc(sb, n); 1169395bb186SSam Leffler sballoc(sb, n); 1170395bb186SSam Leffler nlast = n; 1171395bb186SSam Leffler SBLINKRECORD(sb, m); 1172395bb186SSam Leffler 1173395bb186SSam Leffler sb->sb_mbtail = nlast; 1174395bb186SSam Leffler SBLASTMBUFCHK(sb); 1175395bb186SSam Leffler 1176395bb186SSam Leffler SBLASTRECORDCHK(sb); 1177df8bae1dSRodney W. Grimes return (1); 1178df8bae1dSRodney W. Grimes } 1179df8bae1dSRodney W. Grimes 1180a34b7046SRobert Watson /* 1181050ac265SRobert Watson * Append address and data, and optionally, control (ancillary) data to the 1182050ac265SRobert Watson * receive queue of a socket. If present, m0 must include a packet header 1183050ac265SRobert Watson * with total length. Returns 0 if no space in sockbuf or insufficient 1184050ac265SRobert Watson * mbufs. 1185a34b7046SRobert Watson */ 118626f9a767SRodney W. Grimes int 11878de34a88SAlan Somers sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, 11888de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 11898de34a88SAlan Somers { 11908de34a88SAlan Somers struct mbuf *ctrl_last; 11918de34a88SAlan Somers int space = asa->sa_len; 11928de34a88SAlan Somers 11938de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 11948de34a88SAlan Somers 11958de34a88SAlan Somers if (m0 && (m0->m_flags & M_PKTHDR) == 0) 11968de34a88SAlan Somers panic("sbappendaddr_locked"); 11978de34a88SAlan Somers if (m0) 11988de34a88SAlan Somers space += m0->m_pkthdr.len; 11998de34a88SAlan Somers space += m_length(control, &ctrl_last); 12008de34a88SAlan Somers 12018de34a88SAlan Somers if (space > sbspace(sb)) 12028de34a88SAlan Somers return (0); 12038de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 12048de34a88SAlan Somers } 12058de34a88SAlan Somers 12068de34a88SAlan Somers /* 12078de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 12088de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 12098de34a88SAlan Somers * with total length. Returns 0 if insufficient mbufs. Does not validate space 12108de34a88SAlan Somers * on the receiving sockbuf. 12118de34a88SAlan Somers */ 12128de34a88SAlan Somers int 12138de34a88SAlan Somers sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, 12148de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 12158de34a88SAlan Somers { 12168de34a88SAlan Somers struct mbuf *ctrl_last; 12178de34a88SAlan Somers 12188de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 12198de34a88SAlan Somers 12208de34a88SAlan Somers ctrl_last = (control == NULL) ? NULL : m_last(control); 12218de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 12228de34a88SAlan Somers } 12238de34a88SAlan Somers 12248de34a88SAlan Somers /* 12258de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 12268de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 12278de34a88SAlan Somers * with total length. Returns 0 if no space in sockbuf or insufficient 12288de34a88SAlan Somers * mbufs. 12298de34a88SAlan Somers */ 12308de34a88SAlan Somers int 1231050ac265SRobert Watson sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, 1232050ac265SRobert Watson struct mbuf *m0, struct mbuf *control) 1233a34b7046SRobert Watson { 1234a34b7046SRobert Watson int retval; 1235a34b7046SRobert Watson 1236a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1237a34b7046SRobert Watson retval = sbappendaddr_locked(sb, asa, m0, control); 1238a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1239a34b7046SRobert Watson return (retval); 1240a34b7046SRobert Watson } 1241a34b7046SRobert Watson 12425b0480f2SMark Johnston void 1243050ac265SRobert Watson sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, 124425f4ddfbSMark Johnston struct mbuf *control, int flags) 1245df8bae1dSRodney W. Grimes { 12465b0480f2SMark Johnston struct mbuf *m, *mlast; 1247df8bae1dSRodney W. Grimes 124825f4ddfbSMark Johnston sbm_clrprotoflags(m0, flags); 12495b0480f2SMark Johnston m_last(control)->m_next = m0; 1250395bb186SSam Leffler 1251395bb186SSam Leffler SBLASTRECORDCHK(sb); 1252395bb186SSam Leffler 1253395bb186SSam Leffler for (m = control; m->m_next; m = m->m_next) 1254df8bae1dSRodney W. Grimes sballoc(sb, m); 1255395bb186SSam Leffler sballoc(sb, m); 1256395bb186SSam Leffler mlast = m; 1257395bb186SSam Leffler SBLINKRECORD(sb, control); 1258395bb186SSam Leffler 1259395bb186SSam Leffler sb->sb_mbtail = mlast; 1260395bb186SSam Leffler SBLASTMBUFCHK(sb); 1261395bb186SSam Leffler 1262395bb186SSam Leffler SBLASTRECORDCHK(sb); 1263df8bae1dSRodney W. Grimes } 1264df8bae1dSRodney W. Grimes 12655b0480f2SMark Johnston void 126625f4ddfbSMark Johnston sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, 126725f4ddfbSMark Johnston int flags) 1268a34b7046SRobert Watson { 1269a34b7046SRobert Watson 1270a34b7046SRobert Watson SOCKBUF_LOCK(sb); 127125f4ddfbSMark Johnston sbappendcontrol_locked(sb, m0, control, flags); 1272a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1273a34b7046SRobert Watson } 1274a34b7046SRobert Watson 1275df8bae1dSRodney W. Grimes /* 12767da7362bSRobert Watson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 12777da7362bSRobert Watson * (n). If (n) is NULL, the buffer is presumed empty. 12787da7362bSRobert Watson * 12797da7362bSRobert Watson * When the data is compressed, mbufs in the chain may be handled in one of 12807da7362bSRobert Watson * three ways: 12817da7362bSRobert Watson * 12827da7362bSRobert Watson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 12837da7362bSRobert Watson * record boundary, and no change in data type). 12847da7362bSRobert Watson * 12857da7362bSRobert Watson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 12867da7362bSRobert Watson * an mbuf already in the socket buffer. This can occur if an 12870f9d0a73SGleb Smirnoff * appropriate mbuf exists, there is room, both mbufs are not marked as 12880f9d0a73SGleb Smirnoff * not ready, and no merging of data types will occur. 12897da7362bSRobert Watson * 12907da7362bSRobert Watson * (3) The mbuf may be appended to the end of the existing mbuf chain. 12917da7362bSRobert Watson * 12927da7362bSRobert Watson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 12937da7362bSRobert Watson * end-of-record. 1294df8bae1dSRodney W. Grimes */ 129526f9a767SRodney W. Grimes void 1296050ac265SRobert Watson sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1297df8bae1dSRodney W. Grimes { 1298050ac265SRobert Watson int eor = 0; 1299050ac265SRobert Watson struct mbuf *o; 1300df8bae1dSRodney W. Grimes 1301a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1302a34b7046SRobert Watson 1303df8bae1dSRodney W. Grimes while (m) { 1304df8bae1dSRodney W. Grimes eor |= m->m_flags & M_EOR; 1305df8bae1dSRodney W. Grimes if (m->m_len == 0 && 1306df8bae1dSRodney W. Grimes (eor == 0 || 1307df8bae1dSRodney W. Grimes (((o = m->m_next) || (o = n)) && 1308df8bae1dSRodney W. Grimes o->m_type == m->m_type))) { 1309395bb186SSam Leffler if (sb->sb_lastrecord == m) 1310395bb186SSam Leffler sb->sb_lastrecord = m->m_next; 1311df8bae1dSRodney W. Grimes m = m_free(m); 1312df8bae1dSRodney W. Grimes continue; 1313df8bae1dSRodney W. Grimes } 131432af0d74SDavid Malone if (n && (n->m_flags & M_EOR) == 0 && 131532af0d74SDavid Malone M_WRITABLE(n) && 13165e0f5cfaSKip Macy ((sb->sb_flags & SB_NOCOALESCE) == 0) && 13170f9d0a73SGleb Smirnoff !(m->m_flags & M_NOTREADY) && 13186edfd179SGleb Smirnoff !(n->m_flags & (M_NOTREADY | M_EXTPG)) && 1319b2e60773SJohn Baldwin !mbuf_has_tls_session(m) && 1320b2e60773SJohn Baldwin !mbuf_has_tls_session(n) && 132132af0d74SDavid Malone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 132232af0d74SDavid Malone m->m_len <= M_TRAILINGSPACE(n) && 1323df8bae1dSRodney W. Grimes n->m_type == m->m_type) { 132482334850SJohn Baldwin m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); 1325df8bae1dSRodney W. Grimes n->m_len += m->m_len; 13260f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 13270f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) 13280f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 132934333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1330b3f1af6bSTim J. Robbins /* XXX: Probably don't need.*/ 133104ac9b97SKelly Yancey sb->sb_ctl += m->m_len; 1332df8bae1dSRodney W. Grimes m = m_free(m); 1333df8bae1dSRodney W. Grimes continue; 1334df8bae1dSRodney W. Grimes } 13356edfd179SGleb Smirnoff if (m->m_len <= MLEN && (m->m_flags & M_EXTPG) && 1336b2e60773SJohn Baldwin (m->m_flags & M_NOTREADY) == 0 && 1337b2e60773SJohn Baldwin !mbuf_has_tls_session(m)) 133882334850SJohn Baldwin (void)mb_unmapped_compress(m); 1339df8bae1dSRodney W. Grimes if (n) 1340df8bae1dSRodney W. Grimes n->m_next = m; 1341df8bae1dSRodney W. Grimes else 1342df8bae1dSRodney W. Grimes sb->sb_mb = m; 1343395bb186SSam Leffler sb->sb_mbtail = m; 1344df8bae1dSRodney W. Grimes sballoc(sb, m); 1345df8bae1dSRodney W. Grimes n = m; 1346df8bae1dSRodney W. Grimes m->m_flags &= ~M_EOR; 1347df8bae1dSRodney W. Grimes m = m->m_next; 1348df8bae1dSRodney W. Grimes n->m_next = 0; 1349df8bae1dSRodney W. Grimes } 1350df8bae1dSRodney W. Grimes if (eor) { 13517da7362bSRobert Watson KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 1352df8bae1dSRodney W. Grimes n->m_flags |= eor; 1353df8bae1dSRodney W. Grimes } 1354395bb186SSam Leffler SBLASTMBUFCHK(sb); 1355df8bae1dSRodney W. Grimes } 1356df8bae1dSRodney W. Grimes 1357*3c0e5685SJohn Baldwin #ifdef KERN_TLS 1358*3c0e5685SJohn Baldwin /* 1359*3c0e5685SJohn Baldwin * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs 1360*3c0e5685SJohn Baldwin * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also 1361*3c0e5685SJohn Baldwin * a bit simpler (no EOR markers, always MT_DATA, etc.). 1362*3c0e5685SJohn Baldwin */ 1363*3c0e5685SJohn Baldwin static void 1364*3c0e5685SJohn Baldwin sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1365*3c0e5685SJohn Baldwin { 1366*3c0e5685SJohn Baldwin 1367*3c0e5685SJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 1368*3c0e5685SJohn Baldwin 1369*3c0e5685SJohn Baldwin while (m) { 1370*3c0e5685SJohn Baldwin KASSERT((m->m_flags & M_EOR) == 0, 1371*3c0e5685SJohn Baldwin ("TLS RX mbuf %p with EOR", m)); 1372*3c0e5685SJohn Baldwin KASSERT(m->m_type == MT_DATA, 1373*3c0e5685SJohn Baldwin ("TLS RX mbuf %p is not MT_DATA", m)); 1374*3c0e5685SJohn Baldwin KASSERT((m->m_flags & M_NOTREADY) != 0, 1375*3c0e5685SJohn Baldwin ("TLS RX mbuf %p ready", m)); 1376*3c0e5685SJohn Baldwin KASSERT((m->m_flags & M_EXTPG) == 0, 1377*3c0e5685SJohn Baldwin ("TLS RX mbuf %p unmapped", m)); 1378*3c0e5685SJohn Baldwin 1379*3c0e5685SJohn Baldwin if (m->m_len == 0) { 1380*3c0e5685SJohn Baldwin m = m_free(m); 1381*3c0e5685SJohn Baldwin continue; 1382*3c0e5685SJohn Baldwin } 1383*3c0e5685SJohn Baldwin 1384*3c0e5685SJohn Baldwin /* 1385*3c0e5685SJohn Baldwin * Even though both 'n' and 'm' are NOTREADY, it's ok 1386*3c0e5685SJohn Baldwin * to coalesce the data. 1387*3c0e5685SJohn Baldwin */ 1388*3c0e5685SJohn Baldwin if (n && 1389*3c0e5685SJohn Baldwin M_WRITABLE(n) && 1390*3c0e5685SJohn Baldwin ((sb->sb_flags & SB_NOCOALESCE) == 0) && 1391*3c0e5685SJohn Baldwin !(n->m_flags & (M_EXTPG)) && 1392*3c0e5685SJohn Baldwin m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 1393*3c0e5685SJohn Baldwin m->m_len <= M_TRAILINGSPACE(n)) { 1394*3c0e5685SJohn Baldwin m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); 1395*3c0e5685SJohn Baldwin n->m_len += m->m_len; 1396*3c0e5685SJohn Baldwin sb->sb_ccc += m->m_len; 1397*3c0e5685SJohn Baldwin sb->sb_tlscc += m->m_len; 1398*3c0e5685SJohn Baldwin m = m_free(m); 1399*3c0e5685SJohn Baldwin continue; 1400*3c0e5685SJohn Baldwin } 1401*3c0e5685SJohn Baldwin if (n) 1402*3c0e5685SJohn Baldwin n->m_next = m; 1403*3c0e5685SJohn Baldwin else 1404*3c0e5685SJohn Baldwin sb->sb_mtls = m; 1405*3c0e5685SJohn Baldwin sb->sb_mtlstail = m; 1406*3c0e5685SJohn Baldwin sballoc_ktls_rx(sb, m); 1407*3c0e5685SJohn Baldwin n = m; 1408*3c0e5685SJohn Baldwin m = m->m_next; 1409*3c0e5685SJohn Baldwin n->m_next = NULL; 1410*3c0e5685SJohn Baldwin } 1411*3c0e5685SJohn Baldwin SBLASTMBUFCHK(sb); 1412*3c0e5685SJohn Baldwin } 1413*3c0e5685SJohn Baldwin #endif 1414*3c0e5685SJohn Baldwin 1415df8bae1dSRodney W. Grimes /* 1416050ac265SRobert Watson * Free all mbufs in a sockbuf. Check that all resources are reclaimed. 1417df8bae1dSRodney W. Grimes */ 1418eaa6dfbcSRobert Watson static void 1419050ac265SRobert Watson sbflush_internal(struct sockbuf *sb) 1420df8bae1dSRodney W. Grimes { 1421df8bae1dSRodney W. Grimes 1422*3c0e5685SJohn Baldwin while (sb->sb_mbcnt || sb->sb_tlsdcc) { 142323f84772SPierre Beyssac /* 1424761a9a1fSGleb Smirnoff * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: 142523f84772SPierre Beyssac * we would loop forever. Panic instead. 142623f84772SPierre Beyssac */ 14270f9d0a73SGleb Smirnoff if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 142823f84772SPierre Beyssac break; 14290f9d0a73SGleb Smirnoff m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); 143023f84772SPierre Beyssac } 14310f9d0a73SGleb Smirnoff KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, 14320f9d0a73SGleb Smirnoff ("%s: ccc %u mb %p mbcnt %u", __func__, 14330f9d0a73SGleb Smirnoff sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); 1434a34b7046SRobert Watson } 1435a34b7046SRobert Watson 1436a34b7046SRobert Watson void 1437050ac265SRobert Watson sbflush_locked(struct sockbuf *sb) 1438eaa6dfbcSRobert Watson { 1439eaa6dfbcSRobert Watson 1440eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1441eaa6dfbcSRobert Watson sbflush_internal(sb); 1442eaa6dfbcSRobert Watson } 1443eaa6dfbcSRobert Watson 1444eaa6dfbcSRobert Watson void 1445050ac265SRobert Watson sbflush(struct sockbuf *sb) 1446a34b7046SRobert Watson { 1447a34b7046SRobert Watson 1448a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1449a34b7046SRobert Watson sbflush_locked(sb); 1450a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1451df8bae1dSRodney W. Grimes } 1452df8bae1dSRodney W. Grimes 1453df8bae1dSRodney W. Grimes /* 14541d2df300SGleb Smirnoff * Cut data from (the front of) a sockbuf. 1455df8bae1dSRodney W. Grimes */ 14561d2df300SGleb Smirnoff static struct mbuf * 14571d2df300SGleb Smirnoff sbcut_internal(struct sockbuf *sb, int len) 1458df8bae1dSRodney W. Grimes { 14590f9d0a73SGleb Smirnoff struct mbuf *m, *next, *mfree; 1460*3c0e5685SJohn Baldwin bool is_tls; 1461df8bae1dSRodney W. Grimes 1462f41b2de7SHiren Panchasara KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", 1463b5b023b9SHiren Panchasara __func__, len)); 1464b5b023b9SHiren Panchasara KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", 1465b5b023b9SHiren Panchasara __func__, len, sb->sb_ccc)); 1466b5b023b9SHiren Panchasara 1467df8bae1dSRodney W. Grimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 1468*3c0e5685SJohn Baldwin is_tls = false; 14691d2df300SGleb Smirnoff mfree = NULL; 14701d2df300SGleb Smirnoff 1471df8bae1dSRodney W. Grimes while (len > 0) { 14728146bcfeSGleb Smirnoff if (m == NULL) { 1473*3c0e5685SJohn Baldwin #ifdef KERN_TLS 1474*3c0e5685SJohn Baldwin if (next == NULL && !is_tls) { 1475*3c0e5685SJohn Baldwin if (sb->sb_tlsdcc != 0) { 1476*3c0e5685SJohn Baldwin MPASS(len >= sb->sb_tlsdcc); 1477*3c0e5685SJohn Baldwin len -= sb->sb_tlsdcc; 1478*3c0e5685SJohn Baldwin sb->sb_ccc -= sb->sb_tlsdcc; 1479*3c0e5685SJohn Baldwin sb->sb_tlsdcc = 0; 1480*3c0e5685SJohn Baldwin if (len == 0) 1481*3c0e5685SJohn Baldwin break; 1482*3c0e5685SJohn Baldwin } 1483*3c0e5685SJohn Baldwin next = sb->sb_mtls; 1484*3c0e5685SJohn Baldwin is_tls = true; 1485*3c0e5685SJohn Baldwin } 1486*3c0e5685SJohn Baldwin #endif 14878146bcfeSGleb Smirnoff KASSERT(next, ("%s: no next, len %d", __func__, len)); 1488df8bae1dSRodney W. Grimes m = next; 1489df8bae1dSRodney W. Grimes next = m->m_nextpkt; 1490df8bae1dSRodney W. Grimes } 1491df8bae1dSRodney W. Grimes if (m->m_len > len) { 14920f9d0a73SGleb Smirnoff KASSERT(!(m->m_flags & M_NOTAVAIL), 14930f9d0a73SGleb Smirnoff ("%s: m %p M_NOTAVAIL", __func__, m)); 1494df8bae1dSRodney W. Grimes m->m_len -= len; 1495df8bae1dSRodney W. Grimes m->m_data += len; 14960f9d0a73SGleb Smirnoff sb->sb_ccc -= len; 14970f9d0a73SGleb Smirnoff sb->sb_acc -= len; 14984e023759SAndre Oppermann if (sb->sb_sndptroff != 0) 14994e023759SAndre Oppermann sb->sb_sndptroff -= len; 150034333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 150104ac9b97SKelly Yancey sb->sb_ctl -= len; 1502df8bae1dSRodney W. Grimes break; 1503df8bae1dSRodney W. Grimes } 1504df8bae1dSRodney W. Grimes len -= m->m_len; 1505*3c0e5685SJohn Baldwin #ifdef KERN_TLS 1506*3c0e5685SJohn Baldwin if (is_tls) 1507*3c0e5685SJohn Baldwin sbfree_ktls_rx(sb, m); 1508*3c0e5685SJohn Baldwin else 1509*3c0e5685SJohn Baldwin #endif 1510df8bae1dSRodney W. Grimes sbfree(sb, m); 15110f9d0a73SGleb Smirnoff /* 15120f9d0a73SGleb Smirnoff * Do not put M_NOTREADY buffers to the free list, they 15130f9d0a73SGleb Smirnoff * are referenced from outside. 15140f9d0a73SGleb Smirnoff */ 1515*3c0e5685SJohn Baldwin if (m->m_flags & M_NOTREADY && !is_tls) 15160f9d0a73SGleb Smirnoff m = m->m_next; 15170f9d0a73SGleb Smirnoff else { 15180f9d0a73SGleb Smirnoff struct mbuf *n; 15190f9d0a73SGleb Smirnoff 15201d2df300SGleb Smirnoff n = m->m_next; 15211d2df300SGleb Smirnoff m->m_next = mfree; 15221d2df300SGleb Smirnoff mfree = m; 15231d2df300SGleb Smirnoff m = n; 1524df8bae1dSRodney W. Grimes } 15250f9d0a73SGleb Smirnoff } 1526e834a840SGleb Smirnoff /* 1527e834a840SGleb Smirnoff * Free any zero-length mbufs from the buffer. 1528e834a840SGleb Smirnoff * For SOCK_DGRAM sockets such mbufs represent empty records. 1529e834a840SGleb Smirnoff * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, 1530e834a840SGleb Smirnoff * when sosend_generic() needs to send only control data. 1531e834a840SGleb Smirnoff */ 1532e834a840SGleb Smirnoff while (m && m->m_len == 0) { 1533e834a840SGleb Smirnoff struct mbuf *n; 1534e834a840SGleb Smirnoff 1535e834a840SGleb Smirnoff sbfree(sb, m); 1536e834a840SGleb Smirnoff n = m->m_next; 1537e834a840SGleb Smirnoff m->m_next = mfree; 1538e834a840SGleb Smirnoff mfree = m; 1539e834a840SGleb Smirnoff m = n; 1540e834a840SGleb Smirnoff } 1541*3c0e5685SJohn Baldwin #ifdef KERN_TLS 1542*3c0e5685SJohn Baldwin if (is_tls) { 1543*3c0e5685SJohn Baldwin sb->sb_mb = NULL; 1544*3c0e5685SJohn Baldwin sb->sb_mtls = m; 1545*3c0e5685SJohn Baldwin if (m == NULL) 1546*3c0e5685SJohn Baldwin sb->sb_mtlstail = NULL; 1547*3c0e5685SJohn Baldwin } else 1548*3c0e5685SJohn Baldwin #endif 1549df8bae1dSRodney W. Grimes if (m) { 1550df8bae1dSRodney W. Grimes sb->sb_mb = m; 1551df8bae1dSRodney W. Grimes m->m_nextpkt = next; 1552df8bae1dSRodney W. Grimes } else 1553df8bae1dSRodney W. Grimes sb->sb_mb = next; 1554395bb186SSam Leffler /* 1555050ac265SRobert Watson * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure 1556050ac265SRobert Watson * sb_lastrecord is up-to-date if we dropped part of the last record. 1557395bb186SSam Leffler */ 1558395bb186SSam Leffler m = sb->sb_mb; 1559395bb186SSam Leffler if (m == NULL) { 1560395bb186SSam Leffler sb->sb_mbtail = NULL; 1561395bb186SSam Leffler sb->sb_lastrecord = NULL; 1562395bb186SSam Leffler } else if (m->m_nextpkt == NULL) { 1563395bb186SSam Leffler sb->sb_lastrecord = m; 1564395bb186SSam Leffler } 15651d2df300SGleb Smirnoff 15661d2df300SGleb Smirnoff return (mfree); 1567df8bae1dSRodney W. Grimes } 1568df8bae1dSRodney W. Grimes 1569df8bae1dSRodney W. Grimes /* 1570a34b7046SRobert Watson * Drop data from (the front of) a sockbuf. 1571a34b7046SRobert Watson */ 1572a34b7046SRobert Watson void 1573050ac265SRobert Watson sbdrop_locked(struct sockbuf *sb, int len) 1574eaa6dfbcSRobert Watson { 1575eaa6dfbcSRobert Watson 1576eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 15771d2df300SGleb Smirnoff m_freem(sbcut_internal(sb, len)); 15781d2df300SGleb Smirnoff } 1579eaa6dfbcSRobert Watson 15801d2df300SGleb Smirnoff /* 15811d2df300SGleb Smirnoff * Drop data from (the front of) a sockbuf, 15821d2df300SGleb Smirnoff * and return it to caller. 15831d2df300SGleb Smirnoff */ 15841d2df300SGleb Smirnoff struct mbuf * 15851d2df300SGleb Smirnoff sbcut_locked(struct sockbuf *sb, int len) 15861d2df300SGleb Smirnoff { 15871d2df300SGleb Smirnoff 15881d2df300SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 15891d2df300SGleb Smirnoff return (sbcut_internal(sb, len)); 1590eaa6dfbcSRobert Watson } 1591eaa6dfbcSRobert Watson 1592eaa6dfbcSRobert Watson void 1593050ac265SRobert Watson sbdrop(struct sockbuf *sb, int len) 1594a34b7046SRobert Watson { 15951d2df300SGleb Smirnoff struct mbuf *mfree; 1596a34b7046SRobert Watson 1597a34b7046SRobert Watson SOCKBUF_LOCK(sb); 15981d2df300SGleb Smirnoff mfree = sbcut_internal(sb, len); 1599a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 16001d2df300SGleb Smirnoff 16011d2df300SGleb Smirnoff m_freem(mfree); 1602a34b7046SRobert Watson } 1603a34b7046SRobert Watson 160489e560f4SRandall Stewart struct mbuf * 160589e560f4SRandall Stewart sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff) 160689e560f4SRandall Stewart { 160789e560f4SRandall Stewart struct mbuf *m; 160889e560f4SRandall Stewart 160989e560f4SRandall Stewart KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 161089e560f4SRandall Stewart if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 161189e560f4SRandall Stewart *moff = off; 161289e560f4SRandall Stewart if (sb->sb_sndptr == NULL) { 161389e560f4SRandall Stewart sb->sb_sndptr = sb->sb_mb; 161489e560f4SRandall Stewart sb->sb_sndptroff = 0; 161589e560f4SRandall Stewart } 161689e560f4SRandall Stewart return (sb->sb_mb); 161789e560f4SRandall Stewart } else { 161889e560f4SRandall Stewart m = sb->sb_sndptr; 161989e560f4SRandall Stewart off -= sb->sb_sndptroff; 162089e560f4SRandall Stewart } 162189e560f4SRandall Stewart *moff = off; 162289e560f4SRandall Stewart return (m); 162389e560f4SRandall Stewart } 162489e560f4SRandall Stewart 162589e560f4SRandall Stewart void 162689e560f4SRandall Stewart sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len) 162789e560f4SRandall Stewart { 162889e560f4SRandall Stewart /* 162989e560f4SRandall Stewart * A small copy was done, advance forward the sb_sbsndptr to cover 163089e560f4SRandall Stewart * it. 163189e560f4SRandall Stewart */ 163289e560f4SRandall Stewart struct mbuf *m; 163389e560f4SRandall Stewart 163489e560f4SRandall Stewart if (mb != sb->sb_sndptr) { 163589e560f4SRandall Stewart /* Did not copyout at the same mbuf */ 163689e560f4SRandall Stewart return; 163789e560f4SRandall Stewart } 163889e560f4SRandall Stewart m = mb; 163989e560f4SRandall Stewart while (m && (len > 0)) { 164089e560f4SRandall Stewart if (len >= m->m_len) { 164189e560f4SRandall Stewart len -= m->m_len; 164289e560f4SRandall Stewart if (m->m_next) { 164389e560f4SRandall Stewart sb->sb_sndptroff += m->m_len; 164489e560f4SRandall Stewart sb->sb_sndptr = m->m_next; 164589e560f4SRandall Stewart } 164689e560f4SRandall Stewart m = m->m_next; 164789e560f4SRandall Stewart } else { 164889e560f4SRandall Stewart len = 0; 164989e560f4SRandall Stewart } 165089e560f4SRandall Stewart } 165189e560f4SRandall Stewart } 165289e560f4SRandall Stewart 1653a34b7046SRobert Watson /* 16549fd573c3SHans Petter Selasky * Return the first mbuf and the mbuf data offset for the provided 16559fd573c3SHans Petter Selasky * send offset without changing the "sb_sndptroff" field. 16569fd573c3SHans Petter Selasky */ 16579fd573c3SHans Petter Selasky struct mbuf * 16589fd573c3SHans Petter Selasky sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) 16599fd573c3SHans Petter Selasky { 16609fd573c3SHans Petter Selasky struct mbuf *m; 16619fd573c3SHans Petter Selasky 16629fd573c3SHans Petter Selasky KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 16639fd573c3SHans Petter Selasky 16649fd573c3SHans Petter Selasky /* 16659fd573c3SHans Petter Selasky * If the "off" is below the stored offset, which happens on 16669fd573c3SHans Petter Selasky * retransmits, just use "sb_mb": 16679fd573c3SHans Petter Selasky */ 16689fd573c3SHans Petter Selasky if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 16699fd573c3SHans Petter Selasky m = sb->sb_mb; 16709fd573c3SHans Petter Selasky } else { 16719fd573c3SHans Petter Selasky m = sb->sb_sndptr; 16729fd573c3SHans Petter Selasky off -= sb->sb_sndptroff; 16739fd573c3SHans Petter Selasky } 16749fd573c3SHans Petter Selasky while (off > 0 && m != NULL) { 16759fd573c3SHans Petter Selasky if (off < m->m_len) 16769fd573c3SHans Petter Selasky break; 16779fd573c3SHans Petter Selasky off -= m->m_len; 16789fd573c3SHans Petter Selasky m = m->m_next; 16799fd573c3SHans Petter Selasky } 16809fd573c3SHans Petter Selasky *moff = off; 16819fd573c3SHans Petter Selasky return (m); 16829fd573c3SHans Petter Selasky } 16839fd573c3SHans Petter Selasky 16849fd573c3SHans Petter Selasky /* 1685050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1686050ac265SRobert Watson * front. 1687df8bae1dSRodney W. Grimes */ 168826f9a767SRodney W. Grimes void 1689050ac265SRobert Watson sbdroprecord_locked(struct sockbuf *sb) 1690df8bae1dSRodney W. Grimes { 1691050ac265SRobert Watson struct mbuf *m; 1692df8bae1dSRodney W. Grimes 1693a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1694a34b7046SRobert Watson 1695df8bae1dSRodney W. Grimes m = sb->sb_mb; 1696df8bae1dSRodney W. Grimes if (m) { 1697df8bae1dSRodney W. Grimes sb->sb_mb = m->m_nextpkt; 1698df8bae1dSRodney W. Grimes do { 1699df8bae1dSRodney W. Grimes sbfree(sb, m); 1700ecde8f7cSMatthew Dillon m = m_free(m); 1701797f2d22SPoul-Henning Kamp } while (m); 1702df8bae1dSRodney W. Grimes } 1703395bb186SSam Leffler SB_EMPTY_FIXUP(sb); 1704df8bae1dSRodney W. Grimes } 17051e4ad9ceSGarrett Wollman 170682c23ebaSBill Fenner /* 1707050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1708050ac265SRobert Watson * front. 1709a34b7046SRobert Watson */ 1710a34b7046SRobert Watson void 1711050ac265SRobert Watson sbdroprecord(struct sockbuf *sb) 1712a34b7046SRobert Watson { 1713a34b7046SRobert Watson 1714a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1715a34b7046SRobert Watson sbdroprecord_locked(sb); 1716a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1717a34b7046SRobert Watson } 1718a34b7046SRobert Watson 171920d9e5e8SRobert Watson /* 17208c799760SRobert Watson * Create a "control" mbuf containing the specified data with the specified 17218c799760SRobert Watson * type for presentation on a socket buffer. 172220d9e5e8SRobert Watson */ 172320d9e5e8SRobert Watson struct mbuf * 1724*3c0e5685SJohn Baldwin sbcreatecontrol_how(void *p, int size, int type, int level, int wait) 172520d9e5e8SRobert Watson { 1726d19e16a7SRobert Watson struct cmsghdr *cp; 172720d9e5e8SRobert Watson struct mbuf *m; 172820d9e5e8SRobert Watson 1729*3c0e5685SJohn Baldwin MBUF_CHECKSLEEP(wait); 173020d9e5e8SRobert Watson if (CMSG_SPACE((u_int)size) > MCLBYTES) 173120d9e5e8SRobert Watson return ((struct mbuf *) NULL); 173220d9e5e8SRobert Watson if (CMSG_SPACE((u_int)size) > MLEN) 1733*3c0e5685SJohn Baldwin m = m_getcl(wait, MT_CONTROL, 0); 173420d9e5e8SRobert Watson else 1735*3c0e5685SJohn Baldwin m = m_get(wait, MT_CONTROL); 173620d9e5e8SRobert Watson if (m == NULL) 173720d9e5e8SRobert Watson return ((struct mbuf *) NULL); 173820d9e5e8SRobert Watson cp = mtod(m, struct cmsghdr *); 173920d9e5e8SRobert Watson m->m_len = 0; 174020d9e5e8SRobert Watson KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), 174120d9e5e8SRobert Watson ("sbcreatecontrol: short mbuf")); 17422827952eSXin LI /* 17432827952eSXin LI * Don't leave the padding between the msg header and the 17442827952eSXin LI * cmsg data and the padding after the cmsg data un-initialized. 17452827952eSXin LI */ 17462827952eSXin LI bzero(cp, CMSG_SPACE((u_int)size)); 174720d9e5e8SRobert Watson if (p != NULL) 174820d9e5e8SRobert Watson (void)memcpy(CMSG_DATA(cp), p, size); 174920d9e5e8SRobert Watson m->m_len = CMSG_SPACE(size); 175020d9e5e8SRobert Watson cp->cmsg_len = CMSG_LEN(size); 175120d9e5e8SRobert Watson cp->cmsg_level = level; 175220d9e5e8SRobert Watson cp->cmsg_type = type; 175320d9e5e8SRobert Watson return (m); 175420d9e5e8SRobert Watson } 175520d9e5e8SRobert Watson 1756*3c0e5685SJohn Baldwin struct mbuf * 1757*3c0e5685SJohn Baldwin sbcreatecontrol(caddr_t p, int size, int type, int level) 1758*3c0e5685SJohn Baldwin { 1759*3c0e5685SJohn Baldwin 1760*3c0e5685SJohn Baldwin return (sbcreatecontrol_how(p, size, type, level, M_NOWAIT)); 1761*3c0e5685SJohn Baldwin } 1762*3c0e5685SJohn Baldwin 176320d9e5e8SRobert Watson /* 17648c799760SRobert Watson * This does the same for socket buffers that sotoxsocket does for sockets: 17658c799760SRobert Watson * generate an user-format data structure describing the socket buffer. Note 17668c799760SRobert Watson * that the xsockbuf structure, since it is always embedded in a socket, does 17678c799760SRobert Watson * not include a self pointer nor a length. We make this entry point public 17688c799760SRobert Watson * in case some other mechanism needs it. 176920d9e5e8SRobert Watson */ 177020d9e5e8SRobert Watson void 177120d9e5e8SRobert Watson sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) 177220d9e5e8SRobert Watson { 1773d19e16a7SRobert Watson 17740f9d0a73SGleb Smirnoff xsb->sb_cc = sb->sb_ccc; 177520d9e5e8SRobert Watson xsb->sb_hiwat = sb->sb_hiwat; 177620d9e5e8SRobert Watson xsb->sb_mbcnt = sb->sb_mbcnt; 177749f287f8SGeorge V. Neville-Neil xsb->sb_mcnt = sb->sb_mcnt; 177849f287f8SGeorge V. Neville-Neil xsb->sb_ccnt = sb->sb_ccnt; 177920d9e5e8SRobert Watson xsb->sb_mbmax = sb->sb_mbmax; 178020d9e5e8SRobert Watson xsb->sb_lowat = sb->sb_lowat; 178120d9e5e8SRobert Watson xsb->sb_flags = sb->sb_flags; 178220d9e5e8SRobert Watson xsb->sb_timeo = sb->sb_timeo; 178320d9e5e8SRobert Watson } 178420d9e5e8SRobert Watson 1785639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 1786639acc13SGarrett Wollman static int dummy; 1787e8cdbb48SPawel Biernacki SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, ""); 17887029da5cSPawel Biernacki SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, 17897029da5cSPawel Biernacki CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &sb_max, 0, 17907029da5cSPawel Biernacki sysctl_handle_sb_max, "LU", 17917029da5cSPawel Biernacki "Maximum socket buffer size"); 17921b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 17933eb9ab52SEitan Adler &sb_efficiency, 0, "Socket buffer size waste factor"); 1794