19454b2d8SWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1990, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 1569a28758SEd Maste * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 34677b542eSDavid E. O'Brien #include <sys/cdefs.h> 35677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 36677b542eSDavid E. O'Brien 37b2e60773SJohn Baldwin #include "opt_kern_tls.h" 385b86eac4SJesper Skriver #include "opt_param.h" 39335654d7SRobert Watson 40df8bae1dSRodney W. Grimes #include <sys/param.h> 41960ed29cSSeigo Tanimura #include <sys/aio.h> /* for aio_swake proto */ 42ff5c09daSGarrett Wollman #include <sys/kernel.h> 43b2e60773SJohn Baldwin #include <sys/ktls.h> 44fb919e4dSMark Murray #include <sys/lock.h> 458ec07310SGleb Smirnoff #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47960ed29cSSeigo Tanimura #include <sys/mutex.h> 48fb919e4dSMark Murray #include <sys/proc.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 502f9a2132SBrian Feldman #include <sys/resourcevar.h> 51960ed29cSSeigo Tanimura #include <sys/signalvar.h> 52df8bae1dSRodney W. Grimes #include <sys/socket.h> 53df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 547abab911SRobert Watson #include <sys/sx.h> 55ff5c09daSGarrett Wollman #include <sys/sysctl.h> 5626f9a767SRodney W. Grimes 57f14cce87SRobert Watson /* 58f14cce87SRobert Watson * Function pointer set by the AIO routines so that the socket buffer code 59f14cce87SRobert Watson * can call back into the AIO module if it is loaded. 60f14cce87SRobert Watson */ 6121d56e9cSAlfred Perlstein void (*aio_swake)(struct socket *, struct sockbuf *); 6221d56e9cSAlfred Perlstein 63df8bae1dSRodney W. Grimes /* 64f14cce87SRobert Watson * Primitive routines for operating on socket buffers 65df8bae1dSRodney W. Grimes */ 66df8bae1dSRodney W. Grimes 6779cb7eb4SDavid Greenman u_long sb_max = SB_MAX; 6858d14daeSMohan Srinivasan u_long sb_max_adj = 69b233773bSBjoern A. Zeeb (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ 70df8bae1dSRodney W. Grimes 714b29bc4fSGarrett Wollman static u_long sb_efficiency = 8; /* parameter for sbreserve() */ 724b29bc4fSGarrett Wollman 731d2df300SGleb Smirnoff static struct mbuf *sbcut_internal(struct sockbuf *sb, int len); 74050ac265SRobert Watson static void sbflush_internal(struct sockbuf *sb); 75eaa6dfbcSRobert Watson 76df8bae1dSRodney W. Grimes /* 77829fae90SGleb Smirnoff * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY. 78829fae90SGleb Smirnoff */ 79829fae90SGleb Smirnoff static void 80829fae90SGleb Smirnoff sbm_clrprotoflags(struct mbuf *m, int flags) 81829fae90SGleb Smirnoff { 82829fae90SGleb Smirnoff int mask; 83829fae90SGleb Smirnoff 84829fae90SGleb Smirnoff mask = ~M_PROTOFLAGS; 85829fae90SGleb Smirnoff if (flags & PRUS_NOTREADY) 86829fae90SGleb Smirnoff mask |= M_NOTREADY; 87829fae90SGleb Smirnoff while (m) { 88829fae90SGleb Smirnoff m->m_flags &= mask; 89829fae90SGleb Smirnoff m = m->m_next; 90829fae90SGleb Smirnoff } 91829fae90SGleb Smirnoff } 92829fae90SGleb Smirnoff 93829fae90SGleb Smirnoff /* 943807631bSJohn Baldwin * Compress M_NOTREADY mbufs after they have been readied by sbready(). 953807631bSJohn Baldwin * 963807631bSJohn Baldwin * sbcompress() skips M_NOTREADY mbufs since the data is not available to 973807631bSJohn Baldwin * be copied at the time of sbcompress(). This function combines small 983807631bSJohn Baldwin * mbufs similar to sbcompress() once mbufs are ready. 'm0' is the first 993807631bSJohn Baldwin * mbuf sbready() marked ready, and 'end' is the first mbuf still not 1003807631bSJohn Baldwin * ready. 1013807631bSJohn Baldwin */ 1023807631bSJohn Baldwin static void 1033807631bSJohn Baldwin sbready_compress(struct sockbuf *sb, struct mbuf *m0, struct mbuf *end) 1043807631bSJohn Baldwin { 1053807631bSJohn Baldwin struct mbuf *m, *n; 1063807631bSJohn Baldwin int ext_size; 1073807631bSJohn Baldwin 1083807631bSJohn Baldwin SOCKBUF_LOCK_ASSERT(sb); 1093807631bSJohn Baldwin 1103807631bSJohn Baldwin if ((sb->sb_flags & SB_NOCOALESCE) != 0) 1113807631bSJohn Baldwin return; 1123807631bSJohn Baldwin 1133807631bSJohn Baldwin for (m = m0; m != end; m = m->m_next) { 1143807631bSJohn Baldwin MPASS((m->m_flags & M_NOTREADY) == 0); 115c4ad247bSAndrew Gallatin /* 116c4ad247bSAndrew Gallatin * NB: In sbcompress(), 'n' is the last mbuf in the 117c4ad247bSAndrew Gallatin * socket buffer and 'm' is the new mbuf being copied 118c4ad247bSAndrew Gallatin * into the trailing space of 'n'. Here, the roles 119c4ad247bSAndrew Gallatin * are reversed and 'n' is the next mbuf after 'm' 120c4ad247bSAndrew Gallatin * that is being copied into the trailing space of 121c4ad247bSAndrew Gallatin * 'm'. 122c4ad247bSAndrew Gallatin */ 123c4ad247bSAndrew Gallatin n = m->m_next; 124c4ad247bSAndrew Gallatin #ifdef KERN_TLS 125c4ad247bSAndrew Gallatin /* Try to coalesce adjacent ktls mbuf hdr/trailers. */ 126c4ad247bSAndrew Gallatin if ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && 127c4ad247bSAndrew Gallatin (m->m_flags & M_NOMAP) && 128c4ad247bSAndrew Gallatin (n->m_flags & M_NOMAP) && 129c4ad247bSAndrew Gallatin !mbuf_has_tls_session(m) && 130c4ad247bSAndrew Gallatin !mbuf_has_tls_session(n)) { 131c4ad247bSAndrew Gallatin struct mbuf_ext_pgs *mpgs, *npgs; 132c4ad247bSAndrew Gallatin int hdr_len, trail_len; 133c4ad247bSAndrew Gallatin 134*23feb563SAndrew Gallatin mpgs = &m->m_ext_pgs; 135*23feb563SAndrew Gallatin npgs = &n->m_ext_pgs; 136c4ad247bSAndrew Gallatin hdr_len = npgs->hdr_len; 137c4ad247bSAndrew Gallatin trail_len = mpgs->trail_len; 138c4ad247bSAndrew Gallatin if (trail_len != 0 && hdr_len != 0 && 139c4ad247bSAndrew Gallatin trail_len + hdr_len <= MBUF_PEXT_TRAIL_LEN) { 140c4ad247bSAndrew Gallatin /* copy n's header to m's trailer */ 141*23feb563SAndrew Gallatin memcpy(&m->m_epg_trail[trail_len], 142*23feb563SAndrew Gallatin n->m_epg_hdr, hdr_len); 143c4ad247bSAndrew Gallatin mpgs->trail_len += hdr_len; 144c4ad247bSAndrew Gallatin m->m_len += hdr_len; 145c4ad247bSAndrew Gallatin npgs->hdr_len = 0; 146c4ad247bSAndrew Gallatin n->m_len -= hdr_len; 147c4ad247bSAndrew Gallatin } 148c4ad247bSAndrew Gallatin } 149c4ad247bSAndrew Gallatin #endif 1503807631bSJohn Baldwin 1513807631bSJohn Baldwin /* Compress small unmapped mbufs into plain mbufs. */ 152b2e60773SJohn Baldwin if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN && 153b2e60773SJohn Baldwin !mbuf_has_tls_session(m)) { 1543807631bSJohn Baldwin MPASS(m->m_flags & M_EXT); 1553807631bSJohn Baldwin ext_size = m->m_ext.ext_size; 1563807631bSJohn Baldwin if (mb_unmapped_compress(m) == 0) { 1573807631bSJohn Baldwin sb->sb_mbcnt -= ext_size; 1583807631bSJohn Baldwin sb->sb_ccnt -= 1; 1593807631bSJohn Baldwin } 1603807631bSJohn Baldwin } 1613807631bSJohn Baldwin 1623807631bSJohn Baldwin while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && 1633807631bSJohn Baldwin M_WRITABLE(m) && 1643807631bSJohn Baldwin (m->m_flags & M_NOMAP) == 0 && 165b2e60773SJohn Baldwin !mbuf_has_tls_session(n) && 166b2e60773SJohn Baldwin !mbuf_has_tls_session(m) && 1673807631bSJohn Baldwin n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 1683807631bSJohn Baldwin n->m_len <= M_TRAILINGSPACE(m) && 1693807631bSJohn Baldwin m->m_type == n->m_type) { 1703807631bSJohn Baldwin KASSERT(sb->sb_lastrecord != n, 1713807631bSJohn Baldwin ("%s: merging start of record (%p) into previous mbuf (%p)", 1723807631bSJohn Baldwin __func__, n, m)); 1733807631bSJohn Baldwin m_copydata(n, 0, n->m_len, mtodo(m, m->m_len)); 1743807631bSJohn Baldwin m->m_len += n->m_len; 1753807631bSJohn Baldwin m->m_next = n->m_next; 1763807631bSJohn Baldwin m->m_flags |= n->m_flags & M_EOR; 1773807631bSJohn Baldwin if (sb->sb_mbtail == n) 1783807631bSJohn Baldwin sb->sb_mbtail = m; 1793807631bSJohn Baldwin 1803807631bSJohn Baldwin sb->sb_mbcnt -= MSIZE; 1813807631bSJohn Baldwin sb->sb_mcnt -= 1; 1823807631bSJohn Baldwin if (n->m_flags & M_EXT) { 1833807631bSJohn Baldwin sb->sb_mbcnt -= n->m_ext.ext_size; 1843807631bSJohn Baldwin sb->sb_ccnt -= 1; 1853807631bSJohn Baldwin } 1863807631bSJohn Baldwin m_free(n); 1873807631bSJohn Baldwin n = m->m_next; 1883807631bSJohn Baldwin } 1893807631bSJohn Baldwin } 1903807631bSJohn Baldwin SBLASTRECORDCHK(sb); 1913807631bSJohn Baldwin SBLASTMBUFCHK(sb); 1923807631bSJohn Baldwin } 1933807631bSJohn Baldwin 1943807631bSJohn Baldwin /* 19582334850SJohn Baldwin * Mark ready "count" units of I/O starting with "m". Most mbufs 19682334850SJohn Baldwin * count as a single unit of I/O except for EXT_PGS-backed mbufs which 19782334850SJohn Baldwin * can be backed by multiple pages. 1980f9d0a73SGleb Smirnoff */ 1990f9d0a73SGleb Smirnoff int 20082334850SJohn Baldwin sbready(struct sockbuf *sb, struct mbuf *m0, int count) 2010f9d0a73SGleb Smirnoff { 20282334850SJohn Baldwin struct mbuf *m; 2030f9d0a73SGleb Smirnoff u_int blocker; 2040f9d0a73SGleb Smirnoff 2050f9d0a73SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2060f9d0a73SGleb Smirnoff KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb)); 20782334850SJohn Baldwin KASSERT(count > 0, ("%s: invalid count %d", __func__, count)); 2080f9d0a73SGleb Smirnoff 20982334850SJohn Baldwin m = m0; 2100f9d0a73SGleb Smirnoff blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0; 2110f9d0a73SGleb Smirnoff 21282334850SJohn Baldwin while (count > 0) { 2130f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 2140f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 21582334850SJohn Baldwin if ((m->m_flags & M_EXT) != 0 && 21682334850SJohn Baldwin m->m_ext.ext_type == EXT_PGS) { 217*23feb563SAndrew Gallatin if (count < m->m_ext_pgs.nrdy) { 218*23feb563SAndrew Gallatin m->m_ext_pgs.nrdy -= count; 21982334850SJohn Baldwin count = 0; 22082334850SJohn Baldwin break; 22182334850SJohn Baldwin } 222*23feb563SAndrew Gallatin count -= m->m_ext_pgs.nrdy; 223*23feb563SAndrew Gallatin m->m_ext_pgs.nrdy = 0; 22482334850SJohn Baldwin } else 22582334850SJohn Baldwin count--; 22682334850SJohn Baldwin 2270f9d0a73SGleb Smirnoff m->m_flags &= ~(M_NOTREADY | blocker); 2280f9d0a73SGleb Smirnoff if (blocker) 2290f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 23082334850SJohn Baldwin m = m->m_next; 2310f9d0a73SGleb Smirnoff } 2320f9d0a73SGleb Smirnoff 23382334850SJohn Baldwin /* 23482334850SJohn Baldwin * If the first mbuf is still not fully ready because only 23582334850SJohn Baldwin * some of its backing pages were readied, no further progress 23682334850SJohn Baldwin * can be made. 23782334850SJohn Baldwin */ 23882334850SJohn Baldwin if (m0 == m) { 23982334850SJohn Baldwin MPASS(m->m_flags & M_NOTREADY); 2400f9d0a73SGleb Smirnoff return (EINPROGRESS); 24182334850SJohn Baldwin } 24282334850SJohn Baldwin 24382334850SJohn Baldwin if (!blocker) { 2443807631bSJohn Baldwin sbready_compress(sb, m0, m); 24582334850SJohn Baldwin return (EINPROGRESS); 24682334850SJohn Baldwin } 2470f9d0a73SGleb Smirnoff 2480f9d0a73SGleb Smirnoff /* This one was blocking all the queue. */ 2490f9d0a73SGleb Smirnoff for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) { 2500f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_BLOCKED, 2510f9d0a73SGleb Smirnoff ("%s: m %p !M_BLOCKED", __func__, m)); 2520f9d0a73SGleb Smirnoff m->m_flags &= ~M_BLOCKED; 2530f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 2540f9d0a73SGleb Smirnoff } 2550f9d0a73SGleb Smirnoff 2560f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 2573807631bSJohn Baldwin sbready_compress(sb, m0, m); 2580f9d0a73SGleb Smirnoff 2590f9d0a73SGleb Smirnoff return (0); 2600f9d0a73SGleb Smirnoff } 2610f9d0a73SGleb Smirnoff 2620f9d0a73SGleb Smirnoff /* 2638967b220SGleb Smirnoff * Adjust sockbuf state reflecting allocation of m. 2648967b220SGleb Smirnoff */ 2658967b220SGleb Smirnoff void 2668967b220SGleb Smirnoff sballoc(struct sockbuf *sb, struct mbuf *m) 2678967b220SGleb Smirnoff { 2688967b220SGleb Smirnoff 2698967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 2708967b220SGleb Smirnoff 2710f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 2720f9d0a73SGleb Smirnoff 2730f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) { 2740f9d0a73SGleb Smirnoff if (m->m_flags & M_NOTREADY) 2750f9d0a73SGleb Smirnoff sb->sb_fnrdy = m; 2760f9d0a73SGleb Smirnoff else 2770f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 2780f9d0a73SGleb Smirnoff } else 2790f9d0a73SGleb Smirnoff m->m_flags |= M_BLOCKED; 2808967b220SGleb Smirnoff 2818967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 2828967b220SGleb Smirnoff sb->sb_ctl += m->m_len; 2838967b220SGleb Smirnoff 2848967b220SGleb Smirnoff sb->sb_mbcnt += MSIZE; 2858967b220SGleb Smirnoff sb->sb_mcnt += 1; 2868967b220SGleb Smirnoff 2878967b220SGleb Smirnoff if (m->m_flags & M_EXT) { 2888967b220SGleb Smirnoff sb->sb_mbcnt += m->m_ext.ext_size; 2898967b220SGleb Smirnoff sb->sb_ccnt += 1; 2908967b220SGleb Smirnoff } 2918967b220SGleb Smirnoff } 2928967b220SGleb Smirnoff 2938967b220SGleb Smirnoff /* 2948967b220SGleb Smirnoff * Adjust sockbuf state reflecting freeing of m. 2958967b220SGleb Smirnoff */ 2968967b220SGleb Smirnoff void 2978967b220SGleb Smirnoff sbfree(struct sockbuf *sb, struct mbuf *m) 2988967b220SGleb Smirnoff { 2998967b220SGleb Smirnoff 3008967b220SGleb Smirnoff #if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */ 3018967b220SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 3028967b220SGleb Smirnoff #endif 3038967b220SGleb Smirnoff 3040f9d0a73SGleb Smirnoff sb->sb_ccc -= m->m_len; 3050f9d0a73SGleb Smirnoff 3060f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) 3070f9d0a73SGleb Smirnoff sb->sb_acc -= m->m_len; 3080f9d0a73SGleb Smirnoff 3090f9d0a73SGleb Smirnoff if (m == sb->sb_fnrdy) { 3100f9d0a73SGleb Smirnoff struct mbuf *n; 3110f9d0a73SGleb Smirnoff 3120f9d0a73SGleb Smirnoff KASSERT(m->m_flags & M_NOTREADY, 3130f9d0a73SGleb Smirnoff ("%s: m %p !M_NOTREADY", __func__, m)); 3140f9d0a73SGleb Smirnoff 3150f9d0a73SGleb Smirnoff n = m->m_next; 3160f9d0a73SGleb Smirnoff while (n != NULL && !(n->m_flags & M_NOTREADY)) { 3170f9d0a73SGleb Smirnoff n->m_flags &= ~M_BLOCKED; 3180f9d0a73SGleb Smirnoff sb->sb_acc += n->m_len; 3190f9d0a73SGleb Smirnoff n = n->m_next; 3200f9d0a73SGleb Smirnoff } 3210f9d0a73SGleb Smirnoff sb->sb_fnrdy = n; 3220f9d0a73SGleb Smirnoff } 3238967b220SGleb Smirnoff 3248967b220SGleb Smirnoff if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 3258967b220SGleb Smirnoff sb->sb_ctl -= m->m_len; 3268967b220SGleb Smirnoff 3278967b220SGleb Smirnoff sb->sb_mbcnt -= MSIZE; 3288967b220SGleb Smirnoff sb->sb_mcnt -= 1; 3298967b220SGleb Smirnoff if (m->m_flags & M_EXT) { 3308967b220SGleb Smirnoff sb->sb_mbcnt -= m->m_ext.ext_size; 3318967b220SGleb Smirnoff sb->sb_ccnt -= 1; 3328967b220SGleb Smirnoff } 3338967b220SGleb Smirnoff 3348967b220SGleb Smirnoff if (sb->sb_sndptr == m) { 3358967b220SGleb Smirnoff sb->sb_sndptr = NULL; 3368967b220SGleb Smirnoff sb->sb_sndptroff = 0; 3378967b220SGleb Smirnoff } 3388967b220SGleb Smirnoff if (sb->sb_sndptroff != 0) 3398967b220SGleb Smirnoff sb->sb_sndptroff -= m->m_len; 3408967b220SGleb Smirnoff } 3418967b220SGleb Smirnoff 3428967b220SGleb Smirnoff /* 343050ac265SRobert Watson * Socantsendmore indicates that no more data will be sent on the socket; it 344050ac265SRobert Watson * would normally be applied to a socket when the user informs the system 345050ac265SRobert Watson * that no more data is to be sent, by the protocol code (in case 346050ac265SRobert Watson * PRU_SHUTDOWN). Socantrcvmore indicates that no more data will be 347050ac265SRobert Watson * received, and will normally be applied to the socket by a protocol when it 348050ac265SRobert Watson * detects that the peer will send no more data. Data queued for reading in 349050ac265SRobert Watson * the socket may yet be read. 350df8bae1dSRodney W. Grimes */ 351a34b7046SRobert Watson void 352050ac265SRobert Watson socantsendmore_locked(struct socket *so) 353a34b7046SRobert Watson { 354a34b7046SRobert Watson 355a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_snd); 356a34b7046SRobert Watson 357a34b7046SRobert Watson so->so_snd.sb_state |= SBS_CANTSENDMORE; 358a34b7046SRobert Watson sowwakeup_locked(so); 359a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 360a34b7046SRobert Watson } 361df8bae1dSRodney W. Grimes 36226f9a767SRodney W. Grimes void 363050ac265SRobert Watson socantsendmore(struct socket *so) 364df8bae1dSRodney W. Grimes { 365df8bae1dSRodney W. Grimes 366a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_snd); 367a34b7046SRobert Watson socantsendmore_locked(so); 368a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED); 369a34b7046SRobert Watson } 370a34b7046SRobert Watson 371a34b7046SRobert Watson void 372050ac265SRobert Watson socantrcvmore_locked(struct socket *so) 373a34b7046SRobert Watson { 374a34b7046SRobert Watson 375a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(&so->so_rcv); 376a34b7046SRobert Watson 377a34b7046SRobert Watson so->so_rcv.sb_state |= SBS_CANTRCVMORE; 378a34b7046SRobert Watson sorwakeup_locked(so); 379a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 380df8bae1dSRodney W. Grimes } 381df8bae1dSRodney W. Grimes 38226f9a767SRodney W. Grimes void 383050ac265SRobert Watson socantrcvmore(struct socket *so) 384df8bae1dSRodney W. Grimes { 385df8bae1dSRodney W. Grimes 386a34b7046SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 387a34b7046SRobert Watson socantrcvmore_locked(so); 388a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED); 389df8bae1dSRodney W. Grimes } 390df8bae1dSRodney W. Grimes 391df8bae1dSRodney W. Grimes /* 392df8bae1dSRodney W. Grimes * Wait for data to arrive at/drain from a socket buffer. 393df8bae1dSRodney W. Grimes */ 39426f9a767SRodney W. Grimes int 395050ac265SRobert Watson sbwait(struct sockbuf *sb) 396df8bae1dSRodney W. Grimes { 397df8bae1dSRodney W. Grimes 39831f555a1SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 39931f555a1SRobert Watson 400df8bae1dSRodney W. Grimes sb->sb_flags |= SB_WAIT; 4010f9d0a73SGleb Smirnoff return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx, 40247daf5d5SBruce Evans (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", 4037729cbf1SDavide Italiano sb->sb_timeo, 0, 0)); 404df8bae1dSRodney W. Grimes } 405df8bae1dSRodney W. Grimes 40626f9a767SRodney W. Grimes int 4077abab911SRobert Watson sblock(struct sockbuf *sb, int flags) 408df8bae1dSRodney W. Grimes { 409df8bae1dSRodney W. Grimes 410265de5bbSRobert Watson KASSERT((flags & SBL_VALID) == flags, 411265de5bbSRobert Watson ("sblock: flags invalid (0x%x)", flags)); 412265de5bbSRobert Watson 413265de5bbSRobert Watson if (flags & SBL_WAIT) { 414265de5bbSRobert Watson if ((sb->sb_flags & SB_NOINTR) || 415265de5bbSRobert Watson (flags & SBL_NOINTR)) { 4167abab911SRobert Watson sx_xlock(&sb->sb_sx); 417df8bae1dSRodney W. Grimes return (0); 418049c3b6cSRobert Watson } 419049c3b6cSRobert Watson return (sx_xlock_sig(&sb->sb_sx)); 4207abab911SRobert Watson } else { 4217abab911SRobert Watson if (sx_try_xlock(&sb->sb_sx) == 0) 4227abab911SRobert Watson return (EWOULDBLOCK); 4237abab911SRobert Watson return (0); 4247abab911SRobert Watson } 4257abab911SRobert Watson } 4267abab911SRobert Watson 4277abab911SRobert Watson void 4287abab911SRobert Watson sbunlock(struct sockbuf *sb) 4297abab911SRobert Watson { 4307abab911SRobert Watson 4317abab911SRobert Watson sx_xunlock(&sb->sb_sx); 432df8bae1dSRodney W. Grimes } 433df8bae1dSRodney W. Grimes 434df8bae1dSRodney W. Grimes /* 435050ac265SRobert Watson * Wakeup processes waiting on a socket buffer. Do asynchronous notification 436050ac265SRobert Watson * via SIGIO if the socket has the SS_ASYNC flag set. 437a34b7046SRobert Watson * 438a34b7046SRobert Watson * Called with the socket buffer lock held; will release the lock by the end 439a34b7046SRobert Watson * of the function. This allows the caller to acquire the socket buffer lock 440a34b7046SRobert Watson * while testing for the need for various sorts of wakeup and hold it through 441a34b7046SRobert Watson * to the point where it's no longer required. We currently hold the lock 442a34b7046SRobert Watson * through calls out to other subsystems (with the exception of kqueue), and 443a34b7046SRobert Watson * then release it to avoid lock order issues. It's not clear that's 444a34b7046SRobert Watson * correct. 445df8bae1dSRodney W. Grimes */ 44626f9a767SRodney W. Grimes void 447050ac265SRobert Watson sowakeup(struct socket *so, struct sockbuf *sb) 448df8bae1dSRodney W. Grimes { 44974fb0ba7SJohn Baldwin int ret; 450d48d4b25SSeigo Tanimura 451a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 452a34b7046SRobert Watson 453779f106aSGleb Smirnoff selwakeuppri(sb->sb_sel, PSOCK); 454779f106aSGleb Smirnoff if (!SEL_WAITING(sb->sb_sel)) 455df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_SEL; 456df8bae1dSRodney W. Grimes if (sb->sb_flags & SB_WAIT) { 457df8bae1dSRodney W. Grimes sb->sb_flags &= ~SB_WAIT; 4580f9d0a73SGleb Smirnoff wakeup(&sb->sb_acc); 459df8bae1dSRodney W. Grimes } 460779f106aSGleb Smirnoff KNOTE_LOCKED(&sb->sb_sel->si_note, 0); 46198c92369SNavdeep Parhar if (sb->sb_upcall != NULL) { 462eb1b1807SGleb Smirnoff ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT); 46374fb0ba7SJohn Baldwin if (ret == SU_ISCONNECTED) { 46474fb0ba7SJohn Baldwin KASSERT(sb == &so->so_rcv, 46574fb0ba7SJohn Baldwin ("SO_SND upcall returned SU_ISCONNECTED")); 46674fb0ba7SJohn Baldwin soupcall_clear(so, SO_RCV); 46774fb0ba7SJohn Baldwin } 46874fb0ba7SJohn Baldwin } else 46974fb0ba7SJohn Baldwin ret = SU_OK; 4704cc20ab1SSeigo Tanimura if (sb->sb_flags & SB_AIO) 471f3215338SJohn Baldwin sowakeup_aio(so, sb); 47274fb0ba7SJohn Baldwin SOCKBUF_UNLOCK(sb); 473555b3e2fSGleb Smirnoff if (ret == SU_ISCONNECTED) 47474fb0ba7SJohn Baldwin soisconnected(so); 47574fb0ba7SJohn Baldwin if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) 47674fb0ba7SJohn Baldwin pgsigio(&so->so_sigio, SIGIO, 0); 477a34b7046SRobert Watson mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED); 478df8bae1dSRodney W. Grimes } 479df8bae1dSRodney W. Grimes 480df8bae1dSRodney W. Grimes /* 481df8bae1dSRodney W. Grimes * Socket buffer (struct sockbuf) utility routines. 482df8bae1dSRodney W. Grimes * 483050ac265SRobert Watson * Each socket contains two socket buffers: one for sending data and one for 484050ac265SRobert Watson * receiving data. Each buffer contains a queue of mbufs, information about 485050ac265SRobert Watson * the number of mbufs and amount of data in the queue, and other fields 486050ac265SRobert Watson * allowing select() statements and notification on data availability to be 487050ac265SRobert Watson * implemented. 488df8bae1dSRodney W. Grimes * 489050ac265SRobert Watson * Data stored in a socket buffer is maintained as a list of records. Each 490050ac265SRobert Watson * record is a list of mbufs chained together with the m_next field. Records 491050ac265SRobert Watson * are chained together with the m_nextpkt field. The upper level routine 492050ac265SRobert Watson * soreceive() expects the following conventions to be observed when placing 493050ac265SRobert Watson * information in the receive buffer: 494df8bae1dSRodney W. Grimes * 495050ac265SRobert Watson * 1. If the protocol requires each message be preceded by the sender's name, 496050ac265SRobert Watson * then a record containing that name must be present before any 497050ac265SRobert Watson * associated data (mbuf's must be of type MT_SONAME). 498050ac265SRobert Watson * 2. If the protocol supports the exchange of ``access rights'' (really just 499050ac265SRobert Watson * additional data associated with the message), and there are ``rights'' 500050ac265SRobert Watson * to be received, then a record containing this data should be present 501050ac265SRobert Watson * (mbuf's must be of type MT_RIGHTS). 502050ac265SRobert Watson * 3. If a name or rights record exists, then it must be followed by a data 503050ac265SRobert Watson * record, perhaps of zero length. 504df8bae1dSRodney W. Grimes * 505df8bae1dSRodney W. Grimes * Before using a new socket structure it is first necessary to reserve 506df8bae1dSRodney W. Grimes * buffer space to the socket, by calling sbreserve(). This should commit 507df8bae1dSRodney W. Grimes * some of the available buffer space in the system buffer pool for the 508050ac265SRobert Watson * socket (currently, it does nothing but enforce limits). The space should 509050ac265SRobert Watson * be released by calling sbrelease() when the socket is destroyed. 510df8bae1dSRodney W. Grimes */ 51126f9a767SRodney W. Grimes int 512050ac265SRobert Watson soreserve(struct socket *so, u_long sndcc, u_long rcvcc) 513df8bae1dSRodney W. Grimes { 514b40ce416SJulian Elischer struct thread *td = curthread; 515df8bae1dSRodney W. Grimes 5163f11a2f3SRobert Watson SOCKBUF_LOCK(&so->so_snd); 5179535efc0SRobert Watson SOCKBUF_LOCK(&so->so_rcv); 5183f11a2f3SRobert Watson if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0) 5193f11a2f3SRobert Watson goto bad; 5203f11a2f3SRobert Watson if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0) 5213f11a2f3SRobert Watson goto bad2; 522df8bae1dSRodney W. Grimes if (so->so_rcv.sb_lowat == 0) 523df8bae1dSRodney W. Grimes so->so_rcv.sb_lowat = 1; 524df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat == 0) 525df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = MCLBYTES; 526df8bae1dSRodney W. Grimes if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 527df8bae1dSRodney W. Grimes so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 5283f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 5299535efc0SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 530df8bae1dSRodney W. Grimes return (0); 531df8bae1dSRodney W. Grimes bad2: 5323f11a2f3SRobert Watson sbrelease_locked(&so->so_snd, so); 533df8bae1dSRodney W. Grimes bad: 5343f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_rcv); 5353f11a2f3SRobert Watson SOCKBUF_UNLOCK(&so->so_snd); 536df8bae1dSRodney W. Grimes return (ENOBUFS); 537df8bae1dSRodney W. Grimes } 538df8bae1dSRodney W. Grimes 53979cb7eb4SDavid Greenman static int 54079cb7eb4SDavid Greenman sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS) 54179cb7eb4SDavid Greenman { 54279cb7eb4SDavid Greenman int error = 0; 54386a93d51SJohn Baldwin u_long tmp_sb_max = sb_max; 54479cb7eb4SDavid Greenman 54586a93d51SJohn Baldwin error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req); 54679cb7eb4SDavid Greenman if (error || !req->newptr) 54779cb7eb4SDavid Greenman return (error); 54886a93d51SJohn Baldwin if (tmp_sb_max < MSIZE + MCLBYTES) 54979cb7eb4SDavid Greenman return (EINVAL); 55086a93d51SJohn Baldwin sb_max = tmp_sb_max; 55179cb7eb4SDavid Greenman sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES); 55279cb7eb4SDavid Greenman return (0); 55379cb7eb4SDavid Greenman } 55479cb7eb4SDavid Greenman 555df8bae1dSRodney W. Grimes /* 556050ac265SRobert Watson * Allot mbufs to a sockbuf. Attempt to scale mbmax so that mbcnt doesn't 557050ac265SRobert Watson * become limiting if buffering efficiency is near the normal case. 558df8bae1dSRodney W. Grimes */ 55926f9a767SRodney W. Grimes int 560050ac265SRobert Watson sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so, 561050ac265SRobert Watson struct thread *td) 562df8bae1dSRodney W. Grimes { 56391d5354aSJohn Baldwin rlim_t sbsize_limit; 564ecf72308SBrian Feldman 5653f11a2f3SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 5663f11a2f3SRobert Watson 567ecf72308SBrian Feldman /* 5687978014dSRobert Watson * When a thread is passed, we take into account the thread's socket 5697978014dSRobert Watson * buffer size limit. The caller will generally pass curthread, but 5707978014dSRobert Watson * in the TCP input path, NULL will be passed to indicate that no 5717978014dSRobert Watson * appropriate thread resource limits are available. In that case, 5727978014dSRobert Watson * we don't apply a process limit. 573ecf72308SBrian Feldman */ 57479cb7eb4SDavid Greenman if (cc > sb_max_adj) 575df8bae1dSRodney W. Grimes return (0); 57691d5354aSJohn Baldwin if (td != NULL) { 577f6f6d240SMateusz Guzik sbsize_limit = lim_cur(td, RLIMIT_SBSIZE); 57891d5354aSJohn Baldwin } else 57991d5354aSJohn Baldwin sbsize_limit = RLIM_INFINITY; 580f535380cSDon Lewis if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, 58191d5354aSJohn Baldwin sbsize_limit)) 582ecf72308SBrian Feldman return (0); 5834b29bc4fSGarrett Wollman sb->sb_mbmax = min(cc * sb_efficiency, sb_max); 584df8bae1dSRodney W. Grimes if (sb->sb_lowat > sb->sb_hiwat) 585df8bae1dSRodney W. Grimes sb->sb_lowat = sb->sb_hiwat; 586df8bae1dSRodney W. Grimes return (1); 587df8bae1dSRodney W. Grimes } 588df8bae1dSRodney W. Grimes 5893f11a2f3SRobert Watson int 59064290befSGleb Smirnoff sbsetopt(struct socket *so, int cmd, u_long cc) 5913f11a2f3SRobert Watson { 59264290befSGleb Smirnoff struct sockbuf *sb; 59364290befSGleb Smirnoff short *flags; 59464290befSGleb Smirnoff u_int *hiwat, *lowat; 5953f11a2f3SRobert Watson int error; 5963f11a2f3SRobert Watson 597b2037136SMatt Macy sb = NULL; 59864290befSGleb Smirnoff SOCK_LOCK(so); 59964290befSGleb Smirnoff if (SOLISTENING(so)) { 60064290befSGleb Smirnoff switch (cmd) { 60164290befSGleb Smirnoff case SO_SNDLOWAT: 60264290befSGleb Smirnoff case SO_SNDBUF: 60364290befSGleb Smirnoff lowat = &so->sol_sbsnd_lowat; 60464290befSGleb Smirnoff hiwat = &so->sol_sbsnd_hiwat; 60564290befSGleb Smirnoff flags = &so->sol_sbsnd_flags; 60664290befSGleb Smirnoff break; 60764290befSGleb Smirnoff case SO_RCVLOWAT: 60864290befSGleb Smirnoff case SO_RCVBUF: 60964290befSGleb Smirnoff lowat = &so->sol_sbrcv_lowat; 61064290befSGleb Smirnoff hiwat = &so->sol_sbrcv_hiwat; 61164290befSGleb Smirnoff flags = &so->sol_sbrcv_flags; 61264290befSGleb Smirnoff break; 61364290befSGleb Smirnoff } 61464290befSGleb Smirnoff } else { 61564290befSGleb Smirnoff switch (cmd) { 61664290befSGleb Smirnoff case SO_SNDLOWAT: 61764290befSGleb Smirnoff case SO_SNDBUF: 61864290befSGleb Smirnoff sb = &so->so_snd; 61964290befSGleb Smirnoff break; 62064290befSGleb Smirnoff case SO_RCVLOWAT: 62164290befSGleb Smirnoff case SO_RCVBUF: 62264290befSGleb Smirnoff sb = &so->so_rcv; 62364290befSGleb Smirnoff break; 62464290befSGleb Smirnoff } 62564290befSGleb Smirnoff flags = &sb->sb_flags; 62664290befSGleb Smirnoff hiwat = &sb->sb_hiwat; 62764290befSGleb Smirnoff lowat = &sb->sb_lowat; 6283f11a2f3SRobert Watson SOCKBUF_LOCK(sb); 62964290befSGleb Smirnoff } 63064290befSGleb Smirnoff 63164290befSGleb Smirnoff error = 0; 63264290befSGleb Smirnoff switch (cmd) { 63364290befSGleb Smirnoff case SO_SNDBUF: 63464290befSGleb Smirnoff case SO_RCVBUF: 63564290befSGleb Smirnoff if (SOLISTENING(so)) { 63664290befSGleb Smirnoff if (cc > sb_max_adj) { 63764290befSGleb Smirnoff error = ENOBUFS; 63864290befSGleb Smirnoff break; 63964290befSGleb Smirnoff } 64064290befSGleb Smirnoff *hiwat = cc; 64164290befSGleb Smirnoff if (*lowat > *hiwat) 64264290befSGleb Smirnoff *lowat = *hiwat; 64364290befSGleb Smirnoff } else { 64464290befSGleb Smirnoff if (!sbreserve_locked(sb, cc, so, curthread)) 64564290befSGleb Smirnoff error = ENOBUFS; 64664290befSGleb Smirnoff } 64764290befSGleb Smirnoff if (error == 0) 64864290befSGleb Smirnoff *flags &= ~SB_AUTOSIZE; 64964290befSGleb Smirnoff break; 65064290befSGleb Smirnoff case SO_SNDLOWAT: 65164290befSGleb Smirnoff case SO_RCVLOWAT: 65264290befSGleb Smirnoff /* 65364290befSGleb Smirnoff * Make sure the low-water is never greater than the 65464290befSGleb Smirnoff * high-water. 65564290befSGleb Smirnoff */ 65664290befSGleb Smirnoff *lowat = (cc > *hiwat) ? *hiwat : cc; 65764290befSGleb Smirnoff break; 65864290befSGleb Smirnoff } 65964290befSGleb Smirnoff 66064290befSGleb Smirnoff if (!SOLISTENING(so)) 6613f11a2f3SRobert Watson SOCKBUF_UNLOCK(sb); 66264290befSGleb Smirnoff SOCK_UNLOCK(so); 6633f11a2f3SRobert Watson return (error); 6643f11a2f3SRobert Watson } 6653f11a2f3SRobert Watson 666df8bae1dSRodney W. Grimes /* 667df8bae1dSRodney W. Grimes * Free mbufs held by a socket, and reserved mbuf space. 668df8bae1dSRodney W. Grimes */ 6693f0bfcccSRobert Watson void 670050ac265SRobert Watson sbrelease_internal(struct sockbuf *sb, struct socket *so) 671eaa6dfbcSRobert Watson { 672eaa6dfbcSRobert Watson 673eaa6dfbcSRobert Watson sbflush_internal(sb); 674eaa6dfbcSRobert Watson (void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0, 675eaa6dfbcSRobert Watson RLIM_INFINITY); 676eaa6dfbcSRobert Watson sb->sb_mbmax = 0; 677eaa6dfbcSRobert Watson } 678eaa6dfbcSRobert Watson 67926f9a767SRodney W. Grimes void 680050ac265SRobert Watson sbrelease_locked(struct sockbuf *sb, struct socket *so) 681df8bae1dSRodney W. Grimes { 682df8bae1dSRodney W. Grimes 683a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 684a34b7046SRobert Watson 685eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 686df8bae1dSRodney W. Grimes } 687df8bae1dSRodney W. Grimes 688a34b7046SRobert Watson void 689050ac265SRobert Watson sbrelease(struct sockbuf *sb, struct socket *so) 690a34b7046SRobert Watson { 691a34b7046SRobert Watson 692a34b7046SRobert Watson SOCKBUF_LOCK(sb); 693a34b7046SRobert Watson sbrelease_locked(sb, so); 694a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 695a34b7046SRobert Watson } 696eaa6dfbcSRobert Watson 697eaa6dfbcSRobert Watson void 698050ac265SRobert Watson sbdestroy(struct sockbuf *sb, struct socket *so) 699eaa6dfbcSRobert Watson { 700eaa6dfbcSRobert Watson 701eaa6dfbcSRobert Watson sbrelease_internal(sb, so); 702b2e60773SJohn Baldwin #ifdef KERN_TLS 703b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL) 704b2e60773SJohn Baldwin ktls_free(sb->sb_tls_info); 705b2e60773SJohn Baldwin sb->sb_tls_info = NULL; 706b2e60773SJohn Baldwin #endif 707eaa6dfbcSRobert Watson } 708eaa6dfbcSRobert Watson 709df8bae1dSRodney W. Grimes /* 710050ac265SRobert Watson * Routines to add and remove data from an mbuf queue. 711df8bae1dSRodney W. Grimes * 712050ac265SRobert Watson * The routines sbappend() or sbappendrecord() are normally called to append 713050ac265SRobert Watson * new mbufs to a socket buffer, after checking that adequate space is 714050ac265SRobert Watson * available, comparing the function sbspace() with the amount of data to be 715050ac265SRobert Watson * added. sbappendrecord() differs from sbappend() in that data supplied is 716050ac265SRobert Watson * treated as the beginning of a new record. To place a sender's address, 717050ac265SRobert Watson * optional access rights, and data in a socket receive buffer, 718050ac265SRobert Watson * sbappendaddr() should be used. To place access rights and data in a 719050ac265SRobert Watson * socket receive buffer, sbappendrights() should be used. In either case, 720050ac265SRobert Watson * the new data begins a new record. Note that unlike sbappend() and 721050ac265SRobert Watson * sbappendrecord(), these routines check for the caller that there will be 722050ac265SRobert Watson * enough space to store the data. Each fails if there is not enough space, 723050ac265SRobert Watson * or if it cannot find mbufs to store additional information in. 724df8bae1dSRodney W. Grimes * 725050ac265SRobert Watson * Reliable protocols may use the socket send buffer to hold data awaiting 726050ac265SRobert Watson * acknowledgement. Data is normally copied from a socket send buffer in a 727050ac265SRobert Watson * protocol with m_copy for output to a peer, and then removing the data from 728050ac265SRobert Watson * the socket buffer with sbdrop() or sbdroprecord() when the data is 729050ac265SRobert Watson * acknowledged by the peer. 730df8bae1dSRodney W. Grimes */ 731395bb186SSam Leffler #ifdef SOCKBUF_DEBUG 732395bb186SSam Leffler void 733395bb186SSam Leffler sblastrecordchk(struct sockbuf *sb, const char *file, int line) 734395bb186SSam Leffler { 735395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 736395bb186SSam Leffler 737a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 738a34b7046SRobert Watson 739395bb186SSam Leffler while (m && m->m_nextpkt) 740395bb186SSam Leffler m = m->m_nextpkt; 741395bb186SSam Leffler 742395bb186SSam Leffler if (m != sb->sb_lastrecord) { 743395bb186SSam Leffler printf("%s: sb_mb %p sb_lastrecord %p last %p\n", 744395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_lastrecord, m); 745395bb186SSam Leffler printf("packet chain:\n"); 746395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) 747395bb186SSam Leffler printf("\t%p\n", m); 748395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 749395bb186SSam Leffler } 750395bb186SSam Leffler } 751395bb186SSam Leffler 752395bb186SSam Leffler void 753395bb186SSam Leffler sblastmbufchk(struct sockbuf *sb, const char *file, int line) 754395bb186SSam Leffler { 755395bb186SSam Leffler struct mbuf *m = sb->sb_mb; 756395bb186SSam Leffler struct mbuf *n; 757395bb186SSam Leffler 758a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 759a34b7046SRobert Watson 760395bb186SSam Leffler while (m && m->m_nextpkt) 761395bb186SSam Leffler m = m->m_nextpkt; 762395bb186SSam Leffler 763395bb186SSam Leffler while (m && m->m_next) 764395bb186SSam Leffler m = m->m_next; 765395bb186SSam Leffler 766395bb186SSam Leffler if (m != sb->sb_mbtail) { 767395bb186SSam Leffler printf("%s: sb_mb %p sb_mbtail %p last %p\n", 768395bb186SSam Leffler __func__, sb->sb_mb, sb->sb_mbtail, m); 769395bb186SSam Leffler printf("packet tree:\n"); 770395bb186SSam Leffler for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) { 771395bb186SSam Leffler printf("\t"); 772395bb186SSam Leffler for (n = m; n != NULL; n = n->m_next) 773395bb186SSam Leffler printf("%p ", n); 774395bb186SSam Leffler printf("\n"); 775395bb186SSam Leffler } 776395bb186SSam Leffler panic("%s from %s:%u", __func__, file, line); 777395bb186SSam Leffler } 778395bb186SSam Leffler } 779395bb186SSam Leffler #endif /* SOCKBUF_DEBUG */ 780395bb186SSam Leffler 781395bb186SSam Leffler #define SBLINKRECORD(sb, m0) do { \ 782a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); \ 783395bb186SSam Leffler if ((sb)->sb_lastrecord != NULL) \ 784395bb186SSam Leffler (sb)->sb_lastrecord->m_nextpkt = (m0); \ 785395bb186SSam Leffler else \ 786395bb186SSam Leffler (sb)->sb_mb = (m0); \ 787395bb186SSam Leffler (sb)->sb_lastrecord = (m0); \ 788395bb186SSam Leffler } while (/*CONSTCOND*/0) 789395bb186SSam Leffler 790df8bae1dSRodney W. Grimes /* 791050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 792050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 793050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 794df8bae1dSRodney W. Grimes */ 79526f9a767SRodney W. Grimes void 796829fae90SGleb Smirnoff sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags) 797df8bae1dSRodney W. Grimes { 798050ac265SRobert Watson struct mbuf *n; 799df8bae1dSRodney W. Grimes 800a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 801a34b7046SRobert Watson 802b85f65afSPedro F. Giffuni if (m == NULL) 803df8bae1dSRodney W. Grimes return; 804829fae90SGleb Smirnoff sbm_clrprotoflags(m, flags); 805395bb186SSam Leffler SBLASTRECORDCHK(sb); 806797f2d22SPoul-Henning Kamp n = sb->sb_mb; 807797f2d22SPoul-Henning Kamp if (n) { 808df8bae1dSRodney W. Grimes while (n->m_nextpkt) 809df8bae1dSRodney W. Grimes n = n->m_nextpkt; 810df8bae1dSRodney W. Grimes do { 811df8bae1dSRodney W. Grimes if (n->m_flags & M_EOR) { 812a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 813df8bae1dSRodney W. Grimes return; 814df8bae1dSRodney W. Grimes } 815df8bae1dSRodney W. Grimes } while (n->m_next && (n = n->m_next)); 816395bb186SSam Leffler } else { 817395bb186SSam Leffler /* 818395bb186SSam Leffler * XXX Would like to simply use sb_mbtail here, but 819395bb186SSam Leffler * XXX I need to verify that I won't miss an EOR that 820395bb186SSam Leffler * XXX way. 821395bb186SSam Leffler */ 822395bb186SSam Leffler if ((n = sb->sb_lastrecord) != NULL) { 823395bb186SSam Leffler do { 824395bb186SSam Leffler if (n->m_flags & M_EOR) { 825a34b7046SRobert Watson sbappendrecord_locked(sb, m); /* XXXXXX!!!! */ 826395bb186SSam Leffler return; 827395bb186SSam Leffler } 828395bb186SSam Leffler } while (n->m_next && (n = n->m_next)); 829395bb186SSam Leffler } else { 830395bb186SSam Leffler /* 831395bb186SSam Leffler * If this is the first record in the socket buffer, 832395bb186SSam Leffler * it's also the last record. 833395bb186SSam Leffler */ 834395bb186SSam Leffler sb->sb_lastrecord = m; 835395bb186SSam Leffler } 836df8bae1dSRodney W. Grimes } 837df8bae1dSRodney W. Grimes sbcompress(sb, m, n); 838395bb186SSam Leffler SBLASTRECORDCHK(sb); 839395bb186SSam Leffler } 840395bb186SSam Leffler 841395bb186SSam Leffler /* 842050ac265SRobert Watson * Append mbuf chain m to the last record in the socket buffer sb. The 843050ac265SRobert Watson * additional space associated the mbuf chain is recorded in sb. Empty mbufs 844050ac265SRobert Watson * are discarded and mbufs are compacted where possible. 845a34b7046SRobert Watson */ 846a34b7046SRobert Watson void 847829fae90SGleb Smirnoff sbappend(struct sockbuf *sb, struct mbuf *m, int flags) 848a34b7046SRobert Watson { 849a34b7046SRobert Watson 850a34b7046SRobert Watson SOCKBUF_LOCK(sb); 851829fae90SGleb Smirnoff sbappend_locked(sb, m, flags); 852a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 853a34b7046SRobert Watson } 854a34b7046SRobert Watson 855a34b7046SRobert Watson /* 856050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 857050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 858050ac265SRobert Watson * that is, a stream protocol (such as TCP). 859395bb186SSam Leffler */ 860395bb186SSam Leffler void 861651e4e6aSGleb Smirnoff sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags) 862395bb186SSam Leffler { 863a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 864395bb186SSam Leffler 865395bb186SSam Leffler KASSERT(m->m_nextpkt == NULL,("sbappendstream 0")); 866395bb186SSam Leffler KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1")); 867395bb186SSam Leffler 868395bb186SSam Leffler SBLASTMBUFCHK(sb); 869395bb186SSam Leffler 870b2e60773SJohn Baldwin #ifdef KERN_TLS 871b2e60773SJohn Baldwin if (sb->sb_tls_info != NULL) 872b2e60773SJohn Baldwin ktls_seq(sb, m); 873b2e60773SJohn Baldwin #endif 874b2e60773SJohn Baldwin 875844cacd1SGleb Smirnoff /* Remove all packet headers and mbuf tags to get a pure data chain. */ 876651e4e6aSGleb Smirnoff m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); 877844cacd1SGleb Smirnoff 878395bb186SSam Leffler sbcompress(sb, m, sb->sb_mbtail); 879395bb186SSam Leffler 880395bb186SSam Leffler sb->sb_lastrecord = sb->sb_mb; 881395bb186SSam Leffler SBLASTRECORDCHK(sb); 882df8bae1dSRodney W. Grimes } 883df8bae1dSRodney W. Grimes 884a34b7046SRobert Watson /* 885050ac265SRobert Watson * This version of sbappend() should only be used when the caller absolutely 886050ac265SRobert Watson * knows that there will never be more than one record in the socket buffer, 887050ac265SRobert Watson * that is, a stream protocol (such as TCP). 888a34b7046SRobert Watson */ 889a34b7046SRobert Watson void 890651e4e6aSGleb Smirnoff sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags) 891a34b7046SRobert Watson { 892a34b7046SRobert Watson 893a34b7046SRobert Watson SOCKBUF_LOCK(sb); 894651e4e6aSGleb Smirnoff sbappendstream_locked(sb, m, flags); 895a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 896a34b7046SRobert Watson } 897a34b7046SRobert Watson 898df8bae1dSRodney W. Grimes #ifdef SOCKBUF_DEBUG 89926f9a767SRodney W. Grimes void 90057f43a45SGleb Smirnoff sbcheck(struct sockbuf *sb, const char *file, int line) 901df8bae1dSRodney W. Grimes { 9020f9d0a73SGleb Smirnoff struct mbuf *m, *n, *fnrdy; 9030f9d0a73SGleb Smirnoff u_long acc, ccc, mbcnt; 904df8bae1dSRodney W. Grimes 905a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 906a34b7046SRobert Watson 9070f9d0a73SGleb Smirnoff acc = ccc = mbcnt = 0; 9080f9d0a73SGleb Smirnoff fnrdy = NULL; 90957f43a45SGleb Smirnoff 9100931333fSBill Fenner for (m = sb->sb_mb; m; m = n) { 9110931333fSBill Fenner n = m->m_nextpkt; 9120931333fSBill Fenner for (; m; m = m->m_next) { 91357f43a45SGleb Smirnoff if (m->m_len == 0) { 91457f43a45SGleb Smirnoff printf("sb %p empty mbuf %p\n", sb, m); 91557f43a45SGleb Smirnoff goto fail; 91657f43a45SGleb Smirnoff } 9170f9d0a73SGleb Smirnoff if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) { 9180f9d0a73SGleb Smirnoff if (m != sb->sb_fnrdy) { 9190f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p != m %p\n", 9200f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 9210f9d0a73SGleb Smirnoff goto fail; 9220f9d0a73SGleb Smirnoff } 9230f9d0a73SGleb Smirnoff fnrdy = m; 9240f9d0a73SGleb Smirnoff } 9250f9d0a73SGleb Smirnoff if (fnrdy) { 9260f9d0a73SGleb Smirnoff if (!(m->m_flags & M_NOTAVAIL)) { 9270f9d0a73SGleb Smirnoff printf("sb %p: fnrdy %p, m %p is avail\n", 9280f9d0a73SGleb Smirnoff sb, sb->sb_fnrdy, m); 9290f9d0a73SGleb Smirnoff goto fail; 9300f9d0a73SGleb Smirnoff } 9310f9d0a73SGleb Smirnoff } else 9320f9d0a73SGleb Smirnoff acc += m->m_len; 9330f9d0a73SGleb Smirnoff ccc += m->m_len; 934df8bae1dSRodney W. Grimes mbcnt += MSIZE; 935313861b8SJulian Elischer if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ 936df8bae1dSRodney W. Grimes mbcnt += m->m_ext.ext_size; 9370931333fSBill Fenner } 938df8bae1dSRodney W. Grimes } 9390f9d0a73SGleb Smirnoff if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) { 9400f9d0a73SGleb Smirnoff printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n", 9410f9d0a73SGleb Smirnoff acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt); 94257f43a45SGleb Smirnoff goto fail; 943df8bae1dSRodney W. Grimes } 94457f43a45SGleb Smirnoff return; 94557f43a45SGleb Smirnoff fail: 94657f43a45SGleb Smirnoff panic("%s from %s:%u", __func__, file, line); 947df8bae1dSRodney W. Grimes } 948df8bae1dSRodney W. Grimes #endif 949df8bae1dSRodney W. Grimes 950df8bae1dSRodney W. Grimes /* 951050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 952df8bae1dSRodney W. Grimes */ 95326f9a767SRodney W. Grimes void 954050ac265SRobert Watson sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0) 955df8bae1dSRodney W. Grimes { 956050ac265SRobert Watson struct mbuf *m; 957df8bae1dSRodney W. Grimes 958a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 959a34b7046SRobert Watson 960b85f65afSPedro F. Giffuni if (m0 == NULL) 961df8bae1dSRodney W. Grimes return; 96253b680caSGleb Smirnoff m_clrprotoflags(m0); 963df8bae1dSRodney W. Grimes /* 964050ac265SRobert Watson * Put the first mbuf on the queue. Note this permits zero length 965050ac265SRobert Watson * records. 966df8bae1dSRodney W. Grimes */ 967df8bae1dSRodney W. Grimes sballoc(sb, m0); 968395bb186SSam Leffler SBLASTRECORDCHK(sb); 969395bb186SSam Leffler SBLINKRECORD(sb, m0); 970e72a94adSMaksim Yevmenkin sb->sb_mbtail = m0; 971df8bae1dSRodney W. Grimes m = m0->m_next; 972df8bae1dSRodney W. Grimes m0->m_next = 0; 973df8bae1dSRodney W. Grimes if (m && (m0->m_flags & M_EOR)) { 974df8bae1dSRodney W. Grimes m0->m_flags &= ~M_EOR; 975df8bae1dSRodney W. Grimes m->m_flags |= M_EOR; 976df8bae1dSRodney W. Grimes } 977e72a94adSMaksim Yevmenkin /* always call sbcompress() so it can do SBLASTMBUFCHK() */ 978df8bae1dSRodney W. Grimes sbcompress(sb, m, m0); 979df8bae1dSRodney W. Grimes } 980df8bae1dSRodney W. Grimes 981df8bae1dSRodney W. Grimes /* 982050ac265SRobert Watson * As above, except the mbuf chain begins a new record. 983a34b7046SRobert Watson */ 984a34b7046SRobert Watson void 985050ac265SRobert Watson sbappendrecord(struct sockbuf *sb, struct mbuf *m0) 986a34b7046SRobert Watson { 987a34b7046SRobert Watson 988a34b7046SRobert Watson SOCKBUF_LOCK(sb); 989a34b7046SRobert Watson sbappendrecord_locked(sb, m0); 990a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 991a34b7046SRobert Watson } 992a34b7046SRobert Watson 9938de34a88SAlan Somers /* Helper routine that appends data, control, and address to a sockbuf. */ 9948de34a88SAlan Somers static int 9958de34a88SAlan Somers sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, 9968de34a88SAlan Somers struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) 997df8bae1dSRodney W. Grimes { 998395bb186SSam Leffler struct mbuf *m, *n, *nlast; 999c43cad1aSScott Long #if MSIZE <= 256 1000df8bae1dSRodney W. Grimes if (asa->sa_len > MLEN) 1001df8bae1dSRodney W. Grimes return (0); 1002c43cad1aSScott Long #endif 1003c8b59ea7SGleb Smirnoff m = m_get(M_NOWAIT, MT_SONAME); 1004c8b59ea7SGleb Smirnoff if (m == NULL) 1005df8bae1dSRodney W. Grimes return (0); 1006df8bae1dSRodney W. Grimes m->m_len = asa->sa_len; 100780208239SAlfred Perlstein bcopy(asa, mtod(m, caddr_t), asa->sa_len); 1008c33a2313SAndrey V. Elsukov if (m0) { 100953b680caSGleb Smirnoff m_clrprotoflags(m0); 101057386f5dSAndrey V. Elsukov m_tag_delete_chain(m0, NULL); 1011c33a2313SAndrey V. Elsukov /* 1012c33a2313SAndrey V. Elsukov * Clear some persistent info from pkthdr. 1013c33a2313SAndrey V. Elsukov * We don't use m_demote(), because some netgraph consumers 1014c33a2313SAndrey V. Elsukov * expect M_PKTHDR presence. 1015c33a2313SAndrey V. Elsukov */ 1016c33a2313SAndrey V. Elsukov m0->m_pkthdr.rcvif = NULL; 1017c33a2313SAndrey V. Elsukov m0->m_pkthdr.flowid = 0; 1018c33a2313SAndrey V. Elsukov m0->m_pkthdr.csum_flags = 0; 1019c33a2313SAndrey V. Elsukov m0->m_pkthdr.fibnum = 0; 1020c33a2313SAndrey V. Elsukov m0->m_pkthdr.rsstype = 0; 1021c33a2313SAndrey V. Elsukov } 10228de34a88SAlan Somers if (ctrl_last) 10238de34a88SAlan Somers ctrl_last->m_next = m0; /* concatenate data to control */ 1024df8bae1dSRodney W. Grimes else 1025df8bae1dSRodney W. Grimes control = m0; 1026df8bae1dSRodney W. Grimes m->m_next = control; 1027395bb186SSam Leffler for (n = m; n->m_next != NULL; n = n->m_next) 1028df8bae1dSRodney W. Grimes sballoc(sb, n); 1029395bb186SSam Leffler sballoc(sb, n); 1030395bb186SSam Leffler nlast = n; 1031395bb186SSam Leffler SBLINKRECORD(sb, m); 1032395bb186SSam Leffler 1033395bb186SSam Leffler sb->sb_mbtail = nlast; 1034395bb186SSam Leffler SBLASTMBUFCHK(sb); 1035395bb186SSam Leffler 1036395bb186SSam Leffler SBLASTRECORDCHK(sb); 1037df8bae1dSRodney W. Grimes return (1); 1038df8bae1dSRodney W. Grimes } 1039df8bae1dSRodney W. Grimes 1040a34b7046SRobert Watson /* 1041050ac265SRobert Watson * Append address and data, and optionally, control (ancillary) data to the 1042050ac265SRobert Watson * receive queue of a socket. If present, m0 must include a packet header 1043050ac265SRobert Watson * with total length. Returns 0 if no space in sockbuf or insufficient 1044050ac265SRobert Watson * mbufs. 1045a34b7046SRobert Watson */ 104626f9a767SRodney W. Grimes int 10478de34a88SAlan Somers sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, 10488de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 10498de34a88SAlan Somers { 10508de34a88SAlan Somers struct mbuf *ctrl_last; 10518de34a88SAlan Somers int space = asa->sa_len; 10528de34a88SAlan Somers 10538de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 10548de34a88SAlan Somers 10558de34a88SAlan Somers if (m0 && (m0->m_flags & M_PKTHDR) == 0) 10568de34a88SAlan Somers panic("sbappendaddr_locked"); 10578de34a88SAlan Somers if (m0) 10588de34a88SAlan Somers space += m0->m_pkthdr.len; 10598de34a88SAlan Somers space += m_length(control, &ctrl_last); 10608de34a88SAlan Somers 10618de34a88SAlan Somers if (space > sbspace(sb)) 10628de34a88SAlan Somers return (0); 10638de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 10648de34a88SAlan Somers } 10658de34a88SAlan Somers 10668de34a88SAlan Somers /* 10678de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 10688de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 10698de34a88SAlan Somers * with total length. Returns 0 if insufficient mbufs. Does not validate space 10708de34a88SAlan Somers * on the receiving sockbuf. 10718de34a88SAlan Somers */ 10728de34a88SAlan Somers int 10738de34a88SAlan Somers sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, 10748de34a88SAlan Somers struct mbuf *m0, struct mbuf *control) 10758de34a88SAlan Somers { 10768de34a88SAlan Somers struct mbuf *ctrl_last; 10778de34a88SAlan Somers 10788de34a88SAlan Somers SOCKBUF_LOCK_ASSERT(sb); 10798de34a88SAlan Somers 10808de34a88SAlan Somers ctrl_last = (control == NULL) ? NULL : m_last(control); 10818de34a88SAlan Somers return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); 10828de34a88SAlan Somers } 10838de34a88SAlan Somers 10848de34a88SAlan Somers /* 10858de34a88SAlan Somers * Append address and data, and optionally, control (ancillary) data to the 10868de34a88SAlan Somers * receive queue of a socket. If present, m0 must include a packet header 10878de34a88SAlan Somers * with total length. Returns 0 if no space in sockbuf or insufficient 10888de34a88SAlan Somers * mbufs. 10898de34a88SAlan Somers */ 10908de34a88SAlan Somers int 1091050ac265SRobert Watson sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, 1092050ac265SRobert Watson struct mbuf *m0, struct mbuf *control) 1093a34b7046SRobert Watson { 1094a34b7046SRobert Watson int retval; 1095a34b7046SRobert Watson 1096a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1097a34b7046SRobert Watson retval = sbappendaddr_locked(sb, asa, m0, control); 1098a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1099a34b7046SRobert Watson return (retval); 1100a34b7046SRobert Watson } 1101a34b7046SRobert Watson 11025b0480f2SMark Johnston void 1103050ac265SRobert Watson sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, 110425f4ddfbSMark Johnston struct mbuf *control, int flags) 1105df8bae1dSRodney W. Grimes { 11065b0480f2SMark Johnston struct mbuf *m, *mlast; 1107df8bae1dSRodney W. Grimes 110825f4ddfbSMark Johnston sbm_clrprotoflags(m0, flags); 11095b0480f2SMark Johnston m_last(control)->m_next = m0; 1110395bb186SSam Leffler 1111395bb186SSam Leffler SBLASTRECORDCHK(sb); 1112395bb186SSam Leffler 1113395bb186SSam Leffler for (m = control; m->m_next; m = m->m_next) 1114df8bae1dSRodney W. Grimes sballoc(sb, m); 1115395bb186SSam Leffler sballoc(sb, m); 1116395bb186SSam Leffler mlast = m; 1117395bb186SSam Leffler SBLINKRECORD(sb, control); 1118395bb186SSam Leffler 1119395bb186SSam Leffler sb->sb_mbtail = mlast; 1120395bb186SSam Leffler SBLASTMBUFCHK(sb); 1121395bb186SSam Leffler 1122395bb186SSam Leffler SBLASTRECORDCHK(sb); 1123df8bae1dSRodney W. Grimes } 1124df8bae1dSRodney W. Grimes 11255b0480f2SMark Johnston void 112625f4ddfbSMark Johnston sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control, 112725f4ddfbSMark Johnston int flags) 1128a34b7046SRobert Watson { 1129a34b7046SRobert Watson 1130a34b7046SRobert Watson SOCKBUF_LOCK(sb); 113125f4ddfbSMark Johnston sbappendcontrol_locked(sb, m0, control, flags); 1132a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1133a34b7046SRobert Watson } 1134a34b7046SRobert Watson 1135df8bae1dSRodney W. Grimes /* 11367da7362bSRobert Watson * Append the data in mbuf chain (m) into the socket buffer sb following mbuf 11377da7362bSRobert Watson * (n). If (n) is NULL, the buffer is presumed empty. 11387da7362bSRobert Watson * 11397da7362bSRobert Watson * When the data is compressed, mbufs in the chain may be handled in one of 11407da7362bSRobert Watson * three ways: 11417da7362bSRobert Watson * 11427da7362bSRobert Watson * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no 11437da7362bSRobert Watson * record boundary, and no change in data type). 11447da7362bSRobert Watson * 11457da7362bSRobert Watson * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into 11467da7362bSRobert Watson * an mbuf already in the socket buffer. This can occur if an 11470f9d0a73SGleb Smirnoff * appropriate mbuf exists, there is room, both mbufs are not marked as 11480f9d0a73SGleb Smirnoff * not ready, and no merging of data types will occur. 11497da7362bSRobert Watson * 11507da7362bSRobert Watson * (3) The mbuf may be appended to the end of the existing mbuf chain. 11517da7362bSRobert Watson * 11527da7362bSRobert Watson * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as 11537da7362bSRobert Watson * end-of-record. 1154df8bae1dSRodney W. Grimes */ 115526f9a767SRodney W. Grimes void 1156050ac265SRobert Watson sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n) 1157df8bae1dSRodney W. Grimes { 1158050ac265SRobert Watson int eor = 0; 1159050ac265SRobert Watson struct mbuf *o; 1160df8bae1dSRodney W. Grimes 1161a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1162a34b7046SRobert Watson 1163df8bae1dSRodney W. Grimes while (m) { 1164df8bae1dSRodney W. Grimes eor |= m->m_flags & M_EOR; 1165df8bae1dSRodney W. Grimes if (m->m_len == 0 && 1166df8bae1dSRodney W. Grimes (eor == 0 || 1167df8bae1dSRodney W. Grimes (((o = m->m_next) || (o = n)) && 1168df8bae1dSRodney W. Grimes o->m_type == m->m_type))) { 1169395bb186SSam Leffler if (sb->sb_lastrecord == m) 1170395bb186SSam Leffler sb->sb_lastrecord = m->m_next; 1171df8bae1dSRodney W. Grimes m = m_free(m); 1172df8bae1dSRodney W. Grimes continue; 1173df8bae1dSRodney W. Grimes } 117432af0d74SDavid Malone if (n && (n->m_flags & M_EOR) == 0 && 117532af0d74SDavid Malone M_WRITABLE(n) && 11765e0f5cfaSKip Macy ((sb->sb_flags & SB_NOCOALESCE) == 0) && 11770f9d0a73SGleb Smirnoff !(m->m_flags & M_NOTREADY) && 117882334850SJohn Baldwin !(n->m_flags & (M_NOTREADY | M_NOMAP)) && 1179b2e60773SJohn Baldwin !mbuf_has_tls_session(m) && 1180b2e60773SJohn Baldwin !mbuf_has_tls_session(n) && 118132af0d74SDavid Malone m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ 118232af0d74SDavid Malone m->m_len <= M_TRAILINGSPACE(n) && 1183df8bae1dSRodney W. Grimes n->m_type == m->m_type) { 118482334850SJohn Baldwin m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); 1185df8bae1dSRodney W. Grimes n->m_len += m->m_len; 11860f9d0a73SGleb Smirnoff sb->sb_ccc += m->m_len; 11870f9d0a73SGleb Smirnoff if (sb->sb_fnrdy == NULL) 11880f9d0a73SGleb Smirnoff sb->sb_acc += m->m_len; 118934333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 1190b3f1af6bSTim J. Robbins /* XXX: Probably don't need.*/ 119104ac9b97SKelly Yancey sb->sb_ctl += m->m_len; 1192df8bae1dSRodney W. Grimes m = m_free(m); 1193df8bae1dSRodney W. Grimes continue; 1194df8bae1dSRodney W. Grimes } 119582334850SJohn Baldwin if (m->m_len <= MLEN && (m->m_flags & M_NOMAP) && 1196b2e60773SJohn Baldwin (m->m_flags & M_NOTREADY) == 0 && 1197b2e60773SJohn Baldwin !mbuf_has_tls_session(m)) 119882334850SJohn Baldwin (void)mb_unmapped_compress(m); 1199df8bae1dSRodney W. Grimes if (n) 1200df8bae1dSRodney W. Grimes n->m_next = m; 1201df8bae1dSRodney W. Grimes else 1202df8bae1dSRodney W. Grimes sb->sb_mb = m; 1203395bb186SSam Leffler sb->sb_mbtail = m; 1204df8bae1dSRodney W. Grimes sballoc(sb, m); 1205df8bae1dSRodney W. Grimes n = m; 1206df8bae1dSRodney W. Grimes m->m_flags &= ~M_EOR; 1207df8bae1dSRodney W. Grimes m = m->m_next; 1208df8bae1dSRodney W. Grimes n->m_next = 0; 1209df8bae1dSRodney W. Grimes } 1210df8bae1dSRodney W. Grimes if (eor) { 12117da7362bSRobert Watson KASSERT(n != NULL, ("sbcompress: eor && n == NULL")); 1212df8bae1dSRodney W. Grimes n->m_flags |= eor; 1213df8bae1dSRodney W. Grimes } 1214395bb186SSam Leffler SBLASTMBUFCHK(sb); 1215df8bae1dSRodney W. Grimes } 1216df8bae1dSRodney W. Grimes 1217df8bae1dSRodney W. Grimes /* 1218050ac265SRobert Watson * Free all mbufs in a sockbuf. Check that all resources are reclaimed. 1219df8bae1dSRodney W. Grimes */ 1220eaa6dfbcSRobert Watson static void 1221050ac265SRobert Watson sbflush_internal(struct sockbuf *sb) 1222df8bae1dSRodney W. Grimes { 1223df8bae1dSRodney W. Grimes 122423f84772SPierre Beyssac while (sb->sb_mbcnt) { 122523f84772SPierre Beyssac /* 1226761a9a1fSGleb Smirnoff * Don't call sbcut(sb, 0) if the leading mbuf is non-empty: 122723f84772SPierre Beyssac * we would loop forever. Panic instead. 122823f84772SPierre Beyssac */ 12290f9d0a73SGleb Smirnoff if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len)) 123023f84772SPierre Beyssac break; 12310f9d0a73SGleb Smirnoff m_freem(sbcut_internal(sb, (int)sb->sb_ccc)); 123223f84772SPierre Beyssac } 12330f9d0a73SGleb Smirnoff KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0, 12340f9d0a73SGleb Smirnoff ("%s: ccc %u mb %p mbcnt %u", __func__, 12350f9d0a73SGleb Smirnoff sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt)); 1236a34b7046SRobert Watson } 1237a34b7046SRobert Watson 1238a34b7046SRobert Watson void 1239050ac265SRobert Watson sbflush_locked(struct sockbuf *sb) 1240eaa6dfbcSRobert Watson { 1241eaa6dfbcSRobert Watson 1242eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1243eaa6dfbcSRobert Watson sbflush_internal(sb); 1244eaa6dfbcSRobert Watson } 1245eaa6dfbcSRobert Watson 1246eaa6dfbcSRobert Watson void 1247050ac265SRobert Watson sbflush(struct sockbuf *sb) 1248a34b7046SRobert Watson { 1249a34b7046SRobert Watson 1250a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1251a34b7046SRobert Watson sbflush_locked(sb); 1252a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1253df8bae1dSRodney W. Grimes } 1254df8bae1dSRodney W. Grimes 1255df8bae1dSRodney W. Grimes /* 12561d2df300SGleb Smirnoff * Cut data from (the front of) a sockbuf. 1257df8bae1dSRodney W. Grimes */ 12581d2df300SGleb Smirnoff static struct mbuf * 12591d2df300SGleb Smirnoff sbcut_internal(struct sockbuf *sb, int len) 1260df8bae1dSRodney W. Grimes { 12610f9d0a73SGleb Smirnoff struct mbuf *m, *next, *mfree; 1262df8bae1dSRodney W. Grimes 1263f41b2de7SHiren Panchasara KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", 1264b5b023b9SHiren Panchasara __func__, len)); 1265b5b023b9SHiren Panchasara KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", 1266b5b023b9SHiren Panchasara __func__, len, sb->sb_ccc)); 1267b5b023b9SHiren Panchasara 1268df8bae1dSRodney W. Grimes next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 12691d2df300SGleb Smirnoff mfree = NULL; 12701d2df300SGleb Smirnoff 1271df8bae1dSRodney W. Grimes while (len > 0) { 12728146bcfeSGleb Smirnoff if (m == NULL) { 12738146bcfeSGleb Smirnoff KASSERT(next, ("%s: no next, len %d", __func__, len)); 1274df8bae1dSRodney W. Grimes m = next; 1275df8bae1dSRodney W. Grimes next = m->m_nextpkt; 1276df8bae1dSRodney W. Grimes } 1277df8bae1dSRodney W. Grimes if (m->m_len > len) { 12780f9d0a73SGleb Smirnoff KASSERT(!(m->m_flags & M_NOTAVAIL), 12790f9d0a73SGleb Smirnoff ("%s: m %p M_NOTAVAIL", __func__, m)); 1280df8bae1dSRodney W. Grimes m->m_len -= len; 1281df8bae1dSRodney W. Grimes m->m_data += len; 12820f9d0a73SGleb Smirnoff sb->sb_ccc -= len; 12830f9d0a73SGleb Smirnoff sb->sb_acc -= len; 12844e023759SAndre Oppermann if (sb->sb_sndptroff != 0) 12854e023759SAndre Oppermann sb->sb_sndptroff -= len; 128634333b16SAndre Oppermann if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA) 128704ac9b97SKelly Yancey sb->sb_ctl -= len; 1288df8bae1dSRodney W. Grimes break; 1289df8bae1dSRodney W. Grimes } 1290df8bae1dSRodney W. Grimes len -= m->m_len; 1291df8bae1dSRodney W. Grimes sbfree(sb, m); 12920f9d0a73SGleb Smirnoff /* 12930f9d0a73SGleb Smirnoff * Do not put M_NOTREADY buffers to the free list, they 12940f9d0a73SGleb Smirnoff * are referenced from outside. 12950f9d0a73SGleb Smirnoff */ 12960f9d0a73SGleb Smirnoff if (m->m_flags & M_NOTREADY) 12970f9d0a73SGleb Smirnoff m = m->m_next; 12980f9d0a73SGleb Smirnoff else { 12990f9d0a73SGleb Smirnoff struct mbuf *n; 13000f9d0a73SGleb Smirnoff 13011d2df300SGleb Smirnoff n = m->m_next; 13021d2df300SGleb Smirnoff m->m_next = mfree; 13031d2df300SGleb Smirnoff mfree = m; 13041d2df300SGleb Smirnoff m = n; 1305df8bae1dSRodney W. Grimes } 13060f9d0a73SGleb Smirnoff } 1307e834a840SGleb Smirnoff /* 1308e834a840SGleb Smirnoff * Free any zero-length mbufs from the buffer. 1309e834a840SGleb Smirnoff * For SOCK_DGRAM sockets such mbufs represent empty records. 1310e834a840SGleb Smirnoff * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer, 1311e834a840SGleb Smirnoff * when sosend_generic() needs to send only control data. 1312e834a840SGleb Smirnoff */ 1313e834a840SGleb Smirnoff while (m && m->m_len == 0) { 1314e834a840SGleb Smirnoff struct mbuf *n; 1315e834a840SGleb Smirnoff 1316e834a840SGleb Smirnoff sbfree(sb, m); 1317e834a840SGleb Smirnoff n = m->m_next; 1318e834a840SGleb Smirnoff m->m_next = mfree; 1319e834a840SGleb Smirnoff mfree = m; 1320e834a840SGleb Smirnoff m = n; 1321e834a840SGleb Smirnoff } 1322df8bae1dSRodney W. Grimes if (m) { 1323df8bae1dSRodney W. Grimes sb->sb_mb = m; 1324df8bae1dSRodney W. Grimes m->m_nextpkt = next; 1325df8bae1dSRodney W. Grimes } else 1326df8bae1dSRodney W. Grimes sb->sb_mb = next; 1327395bb186SSam Leffler /* 1328050ac265SRobert Watson * First part is an inline SB_EMPTY_FIXUP(). Second part makes sure 1329050ac265SRobert Watson * sb_lastrecord is up-to-date if we dropped part of the last record. 1330395bb186SSam Leffler */ 1331395bb186SSam Leffler m = sb->sb_mb; 1332395bb186SSam Leffler if (m == NULL) { 1333395bb186SSam Leffler sb->sb_mbtail = NULL; 1334395bb186SSam Leffler sb->sb_lastrecord = NULL; 1335395bb186SSam Leffler } else if (m->m_nextpkt == NULL) { 1336395bb186SSam Leffler sb->sb_lastrecord = m; 1337395bb186SSam Leffler } 13381d2df300SGleb Smirnoff 13391d2df300SGleb Smirnoff return (mfree); 1340df8bae1dSRodney W. Grimes } 1341df8bae1dSRodney W. Grimes 1342df8bae1dSRodney W. Grimes /* 1343a34b7046SRobert Watson * Drop data from (the front of) a sockbuf. 1344a34b7046SRobert Watson */ 1345a34b7046SRobert Watson void 1346050ac265SRobert Watson sbdrop_locked(struct sockbuf *sb, int len) 1347eaa6dfbcSRobert Watson { 1348eaa6dfbcSRobert Watson 1349eaa6dfbcSRobert Watson SOCKBUF_LOCK_ASSERT(sb); 13501d2df300SGleb Smirnoff m_freem(sbcut_internal(sb, len)); 13511d2df300SGleb Smirnoff } 1352eaa6dfbcSRobert Watson 13531d2df300SGleb Smirnoff /* 13541d2df300SGleb Smirnoff * Drop data from (the front of) a sockbuf, 13551d2df300SGleb Smirnoff * and return it to caller. 13561d2df300SGleb Smirnoff */ 13571d2df300SGleb Smirnoff struct mbuf * 13581d2df300SGleb Smirnoff sbcut_locked(struct sockbuf *sb, int len) 13591d2df300SGleb Smirnoff { 13601d2df300SGleb Smirnoff 13611d2df300SGleb Smirnoff SOCKBUF_LOCK_ASSERT(sb); 13621d2df300SGleb Smirnoff return (sbcut_internal(sb, len)); 1363eaa6dfbcSRobert Watson } 1364eaa6dfbcSRobert Watson 1365eaa6dfbcSRobert Watson void 1366050ac265SRobert Watson sbdrop(struct sockbuf *sb, int len) 1367a34b7046SRobert Watson { 13681d2df300SGleb Smirnoff struct mbuf *mfree; 1369a34b7046SRobert Watson 1370a34b7046SRobert Watson SOCKBUF_LOCK(sb); 13711d2df300SGleb Smirnoff mfree = sbcut_internal(sb, len); 1372a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 13731d2df300SGleb Smirnoff 13741d2df300SGleb Smirnoff m_freem(mfree); 1375a34b7046SRobert Watson } 1376a34b7046SRobert Watson 137789e560f4SRandall Stewart struct mbuf * 137889e560f4SRandall Stewart sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff) 137989e560f4SRandall Stewart { 138089e560f4SRandall Stewart struct mbuf *m; 138189e560f4SRandall Stewart 138289e560f4SRandall Stewart KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 138389e560f4SRandall Stewart if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 138489e560f4SRandall Stewart *moff = off; 138589e560f4SRandall Stewart if (sb->sb_sndptr == NULL) { 138689e560f4SRandall Stewart sb->sb_sndptr = sb->sb_mb; 138789e560f4SRandall Stewart sb->sb_sndptroff = 0; 138889e560f4SRandall Stewart } 138989e560f4SRandall Stewart return (sb->sb_mb); 139089e560f4SRandall Stewart } else { 139189e560f4SRandall Stewart m = sb->sb_sndptr; 139289e560f4SRandall Stewart off -= sb->sb_sndptroff; 139389e560f4SRandall Stewart } 139489e560f4SRandall Stewart *moff = off; 139589e560f4SRandall Stewart return (m); 139689e560f4SRandall Stewart } 139789e560f4SRandall Stewart 139889e560f4SRandall Stewart void 139989e560f4SRandall Stewart sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len) 140089e560f4SRandall Stewart { 140189e560f4SRandall Stewart /* 140289e560f4SRandall Stewart * A small copy was done, advance forward the sb_sbsndptr to cover 140389e560f4SRandall Stewart * it. 140489e560f4SRandall Stewart */ 140589e560f4SRandall Stewart struct mbuf *m; 140689e560f4SRandall Stewart 140789e560f4SRandall Stewart if (mb != sb->sb_sndptr) { 140889e560f4SRandall Stewart /* Did not copyout at the same mbuf */ 140989e560f4SRandall Stewart return; 141089e560f4SRandall Stewart } 141189e560f4SRandall Stewart m = mb; 141289e560f4SRandall Stewart while (m && (len > 0)) { 141389e560f4SRandall Stewart if (len >= m->m_len) { 141489e560f4SRandall Stewart len -= m->m_len; 141589e560f4SRandall Stewart if (m->m_next) { 141689e560f4SRandall Stewart sb->sb_sndptroff += m->m_len; 141789e560f4SRandall Stewart sb->sb_sndptr = m->m_next; 141889e560f4SRandall Stewart } 141989e560f4SRandall Stewart m = m->m_next; 142089e560f4SRandall Stewart } else { 142189e560f4SRandall Stewart len = 0; 142289e560f4SRandall Stewart } 142389e560f4SRandall Stewart } 142489e560f4SRandall Stewart } 142589e560f4SRandall Stewart 1426a34b7046SRobert Watson /* 14279fd573c3SHans Petter Selasky * Return the first mbuf and the mbuf data offset for the provided 14289fd573c3SHans Petter Selasky * send offset without changing the "sb_sndptroff" field. 14299fd573c3SHans Petter Selasky */ 14309fd573c3SHans Petter Selasky struct mbuf * 14319fd573c3SHans Petter Selasky sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff) 14329fd573c3SHans Petter Selasky { 14339fd573c3SHans Petter Selasky struct mbuf *m; 14349fd573c3SHans Petter Selasky 14359fd573c3SHans Petter Selasky KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); 14369fd573c3SHans Petter Selasky 14379fd573c3SHans Petter Selasky /* 14389fd573c3SHans Petter Selasky * If the "off" is below the stored offset, which happens on 14399fd573c3SHans Petter Selasky * retransmits, just use "sb_mb": 14409fd573c3SHans Petter Selasky */ 14419fd573c3SHans Petter Selasky if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { 14429fd573c3SHans Petter Selasky m = sb->sb_mb; 14439fd573c3SHans Petter Selasky } else { 14449fd573c3SHans Petter Selasky m = sb->sb_sndptr; 14459fd573c3SHans Petter Selasky off -= sb->sb_sndptroff; 14469fd573c3SHans Petter Selasky } 14479fd573c3SHans Petter Selasky while (off > 0 && m != NULL) { 14489fd573c3SHans Petter Selasky if (off < m->m_len) 14499fd573c3SHans Petter Selasky break; 14509fd573c3SHans Petter Selasky off -= m->m_len; 14519fd573c3SHans Petter Selasky m = m->m_next; 14529fd573c3SHans Petter Selasky } 14539fd573c3SHans Petter Selasky *moff = off; 14549fd573c3SHans Petter Selasky return (m); 14559fd573c3SHans Petter Selasky } 14569fd573c3SHans Petter Selasky 14579fd573c3SHans Petter Selasky /* 1458050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1459050ac265SRobert Watson * front. 1460df8bae1dSRodney W. Grimes */ 146126f9a767SRodney W. Grimes void 1462050ac265SRobert Watson sbdroprecord_locked(struct sockbuf *sb) 1463df8bae1dSRodney W. Grimes { 1464050ac265SRobert Watson struct mbuf *m; 1465df8bae1dSRodney W. Grimes 1466a34b7046SRobert Watson SOCKBUF_LOCK_ASSERT(sb); 1467a34b7046SRobert Watson 1468df8bae1dSRodney W. Grimes m = sb->sb_mb; 1469df8bae1dSRodney W. Grimes if (m) { 1470df8bae1dSRodney W. Grimes sb->sb_mb = m->m_nextpkt; 1471df8bae1dSRodney W. Grimes do { 1472df8bae1dSRodney W. Grimes sbfree(sb, m); 1473ecde8f7cSMatthew Dillon m = m_free(m); 1474797f2d22SPoul-Henning Kamp } while (m); 1475df8bae1dSRodney W. Grimes } 1476395bb186SSam Leffler SB_EMPTY_FIXUP(sb); 1477df8bae1dSRodney W. Grimes } 14781e4ad9ceSGarrett Wollman 147982c23ebaSBill Fenner /* 1480050ac265SRobert Watson * Drop a record off the front of a sockbuf and move the next record to the 1481050ac265SRobert Watson * front. 1482a34b7046SRobert Watson */ 1483a34b7046SRobert Watson void 1484050ac265SRobert Watson sbdroprecord(struct sockbuf *sb) 1485a34b7046SRobert Watson { 1486a34b7046SRobert Watson 1487a34b7046SRobert Watson SOCKBUF_LOCK(sb); 1488a34b7046SRobert Watson sbdroprecord_locked(sb); 1489a34b7046SRobert Watson SOCKBUF_UNLOCK(sb); 1490a34b7046SRobert Watson } 1491a34b7046SRobert Watson 149220d9e5e8SRobert Watson /* 14938c799760SRobert Watson * Create a "control" mbuf containing the specified data with the specified 14948c799760SRobert Watson * type for presentation on a socket buffer. 149520d9e5e8SRobert Watson */ 149620d9e5e8SRobert Watson struct mbuf * 1497d19e16a7SRobert Watson sbcreatecontrol(caddr_t p, int size, int type, int level) 149820d9e5e8SRobert Watson { 1499d19e16a7SRobert Watson struct cmsghdr *cp; 150020d9e5e8SRobert Watson struct mbuf *m; 150120d9e5e8SRobert Watson 150220d9e5e8SRobert Watson if (CMSG_SPACE((u_int)size) > MCLBYTES) 150320d9e5e8SRobert Watson return ((struct mbuf *) NULL); 150420d9e5e8SRobert Watson if (CMSG_SPACE((u_int)size) > MLEN) 1505eb1b1807SGleb Smirnoff m = m_getcl(M_NOWAIT, MT_CONTROL, 0); 150620d9e5e8SRobert Watson else 1507eb1b1807SGleb Smirnoff m = m_get(M_NOWAIT, MT_CONTROL); 150820d9e5e8SRobert Watson if (m == NULL) 150920d9e5e8SRobert Watson return ((struct mbuf *) NULL); 151020d9e5e8SRobert Watson cp = mtod(m, struct cmsghdr *); 151120d9e5e8SRobert Watson m->m_len = 0; 151220d9e5e8SRobert Watson KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), 151320d9e5e8SRobert Watson ("sbcreatecontrol: short mbuf")); 15142827952eSXin LI /* 15152827952eSXin LI * Don't leave the padding between the msg header and the 15162827952eSXin LI * cmsg data and the padding after the cmsg data un-initialized. 15172827952eSXin LI */ 15182827952eSXin LI bzero(cp, CMSG_SPACE((u_int)size)); 151920d9e5e8SRobert Watson if (p != NULL) 152020d9e5e8SRobert Watson (void)memcpy(CMSG_DATA(cp), p, size); 152120d9e5e8SRobert Watson m->m_len = CMSG_SPACE(size); 152220d9e5e8SRobert Watson cp->cmsg_len = CMSG_LEN(size); 152320d9e5e8SRobert Watson cp->cmsg_level = level; 152420d9e5e8SRobert Watson cp->cmsg_type = type; 152520d9e5e8SRobert Watson return (m); 152620d9e5e8SRobert Watson } 152720d9e5e8SRobert Watson 152820d9e5e8SRobert Watson /* 15298c799760SRobert Watson * This does the same for socket buffers that sotoxsocket does for sockets: 15308c799760SRobert Watson * generate an user-format data structure describing the socket buffer. Note 15318c799760SRobert Watson * that the xsockbuf structure, since it is always embedded in a socket, does 15328c799760SRobert Watson * not include a self pointer nor a length. We make this entry point public 15338c799760SRobert Watson * in case some other mechanism needs it. 153420d9e5e8SRobert Watson */ 153520d9e5e8SRobert Watson void 153620d9e5e8SRobert Watson sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) 153720d9e5e8SRobert Watson { 1538d19e16a7SRobert Watson 15390f9d0a73SGleb Smirnoff xsb->sb_cc = sb->sb_ccc; 154020d9e5e8SRobert Watson xsb->sb_hiwat = sb->sb_hiwat; 154120d9e5e8SRobert Watson xsb->sb_mbcnt = sb->sb_mbcnt; 154249f287f8SGeorge V. Neville-Neil xsb->sb_mcnt = sb->sb_mcnt; 154349f287f8SGeorge V. Neville-Neil xsb->sb_ccnt = sb->sb_ccnt; 154420d9e5e8SRobert Watson xsb->sb_mbmax = sb->sb_mbmax; 154520d9e5e8SRobert Watson xsb->sb_lowat = sb->sb_lowat; 154620d9e5e8SRobert Watson xsb->sb_flags = sb->sb_flags; 154720d9e5e8SRobert Watson xsb->sb_timeo = sb->sb_timeo; 154820d9e5e8SRobert Watson } 154920d9e5e8SRobert Watson 1550639acc13SGarrett Wollman /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ 1551639acc13SGarrett Wollman static int dummy; 1552e8cdbb48SPawel Biernacki SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW | CTLFLAG_SKIP, &dummy, 0, ""); 15537029da5cSPawel Biernacki SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, 15547029da5cSPawel Biernacki CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &sb_max, 0, 15557029da5cSPawel Biernacki sysctl_handle_sb_max, "LU", 15567029da5cSPawel Biernacki "Maximum socket buffer size"); 15571b978d45SHartmut Brandt SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, 15583eb9ab52SEitan Adler &sb_efficiency, 0, "Socket buffer size waste factor"); 1559