1099a0e58SBosko Milekic /*- 28a36da99SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 38a36da99SPedro F. Giffuni * 48076cb52SBosko Milekic * Copyright (c) 2004, 2005, 58076cb52SBosko Milekic * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 6099a0e58SBosko Milekic * 7099a0e58SBosko Milekic * Redistribution and use in source and binary forms, with or without 8099a0e58SBosko Milekic * modification, are permitted provided that the following conditions 9099a0e58SBosko Milekic * are met: 10099a0e58SBosko Milekic * 1. Redistributions of source code must retain the above copyright 11099a0e58SBosko Milekic * notice unmodified, this list of conditions and the following 12099a0e58SBosko Milekic * disclaimer. 13099a0e58SBosko Milekic * 2. Redistributions in binary form must reproduce the above copyright 14099a0e58SBosko Milekic * notice, this list of conditions and the following disclaimer in the 15099a0e58SBosko Milekic * documentation and/or other materials provided with the distribution. 16099a0e58SBosko Milekic * 17099a0e58SBosko Milekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18099a0e58SBosko Milekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19099a0e58SBosko Milekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20099a0e58SBosko Milekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21099a0e58SBosko Milekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22099a0e58SBosko Milekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23099a0e58SBosko Milekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24099a0e58SBosko Milekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25099a0e58SBosko Milekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26099a0e58SBosko Milekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27099a0e58SBosko Milekic * SUCH DAMAGE. 28099a0e58SBosko Milekic */ 29099a0e58SBosko Milekic 30099a0e58SBosko Milekic #include <sys/cdefs.h> 31099a0e58SBosko Milekic __FBSDID("$FreeBSD$"); 32099a0e58SBosko Milekic 33099a0e58SBosko Milekic #include "opt_param.h" 34b2e60773SJohn Baldwin #include "opt_kern_tls.h" 35099a0e58SBosko Milekic 36099a0e58SBosko Milekic #include <sys/param.h> 373937ee75SConrad Meyer #include <sys/conf.h> 389978bd99SMark Johnston #include <sys/domainset.h> 39099a0e58SBosko Milekic #include <sys/malloc.h> 40099a0e58SBosko Milekic #include <sys/systm.h> 41099a0e58SBosko Milekic #include <sys/mbuf.h> 42099a0e58SBosko Milekic #include <sys/domain.h> 43099a0e58SBosko Milekic #include <sys/eventhandler.h> 44099a0e58SBosko Milekic #include <sys/kernel.h> 45b2e60773SJohn Baldwin #include <sys/ktls.h> 465475ca5aSMark Johnston #include <sys/limits.h> 4754503a13SJonathan T. Looney #include <sys/lock.h> 4854503a13SJonathan T. Looney #include <sys/mutex.h> 49099a0e58SBosko Milekic #include <sys/protosw.h> 50b2e60773SJohn Baldwin #include <sys/refcount.h> 5182334850SJohn Baldwin #include <sys/sf_buf.h> 52099a0e58SBosko Milekic #include <sys/smp.h> 53fb3bc596SJohn Baldwin #include <sys/socket.h> 54099a0e58SBosko Milekic #include <sys/sysctl.h> 55099a0e58SBosko Milekic 56fb3bc596SJohn Baldwin #include <net/if.h> 57fb3bc596SJohn Baldwin #include <net/if_var.h> 58fb3bc596SJohn Baldwin 59099a0e58SBosko Milekic #include <vm/vm.h> 60c45c0034SAlan Cox #include <vm/vm_extern.h> 61c45c0034SAlan Cox #include <vm/vm_kern.h> 62099a0e58SBosko Milekic #include <vm/vm_page.h> 6384d746deSRick Macklem #include <vm/vm_pageout.h> 6437140716SAndre Oppermann #include <vm/vm_map.h> 65099a0e58SBosko Milekic #include <vm/uma.h> 66121f0509SMike Silbersack #include <vm/uma_dbg.h> 67099a0e58SBosko Milekic 68099a0e58SBosko Milekic /* 69099a0e58SBosko Milekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 70099a0e58SBosko Milekic * Zones. 71099a0e58SBosko Milekic * 72099a0e58SBosko Milekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 73099a0e58SBosko Milekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 74099a0e58SBosko Milekic * administrator so desires. 75099a0e58SBosko Milekic * 7603270b59SJeff Roberson * Mbufs are allocated from a UMA Primary Zone called the Mbuf 77099a0e58SBosko Milekic * Zone. 78099a0e58SBosko Milekic * 79099a0e58SBosko Milekic * Additionally, FreeBSD provides a Packet Zone, which it 8003270b59SJeff Roberson * configures as a Secondary Zone to the Mbuf Primary Zone, 8103270b59SJeff Roberson * thus sharing backend Slab kegs with the Mbuf Primary Zone. 82099a0e58SBosko Milekic * 83099a0e58SBosko Milekic * Thus common-case allocations and locking are simplified: 84099a0e58SBosko Milekic * 85099a0e58SBosko Milekic * m_clget() m_getcl() 86099a0e58SBosko Milekic * | | 87099a0e58SBosko Milekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 88099a0e58SBosko Milekic * | | [ Packet ] | 89099a0e58SBosko Milekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 9003270b59SJeff Roberson * [ Cluster Zone ] [ Zone ] [ Mbuf Primary Zone ] 91099a0e58SBosko Milekic * | \________ | 92099a0e58SBosko Milekic * [ Cluster Keg ] \ / 93099a0e58SBosko Milekic * | [ Mbuf Keg ] 94099a0e58SBosko Milekic * [ Cluster Slabs ] | 95099a0e58SBosko Milekic * | [ Mbuf Slabs ] 96099a0e58SBosko Milekic * \____________(VM)_________________/ 9756a4e45aSAndre Oppermann * 9856a4e45aSAndre Oppermann * 99fcf90618SGleb Smirnoff * Whenever an object is allocated with uma_zalloc() out of 10056a4e45aSAndre Oppermann * one of the Zones its _ctor_ function is executed. The same 101fcf90618SGleb Smirnoff * for any deallocation through uma_zfree() the _dtor_ function 10256a4e45aSAndre Oppermann * is executed. 10356a4e45aSAndre Oppermann * 10403270b59SJeff Roberson * Caches are per-CPU and are filled from the Primary Zone. 10556a4e45aSAndre Oppermann * 106fcf90618SGleb Smirnoff * Whenever an object is allocated from the underlying global 10756a4e45aSAndre Oppermann * memory pool it gets pre-initialized with the _zinit_ functions. 108e3043798SPedro F. Giffuni * When the Keg's are overfull objects get decommissioned with 10956a4e45aSAndre Oppermann * _zfini_ functions and free'd back to the global memory pool. 11056a4e45aSAndre Oppermann * 111099a0e58SBosko Milekic */ 112099a0e58SBosko Milekic 113ead46972SAndre Oppermann int nmbufs; /* limits number of mbufs */ 11456a4e45aSAndre Oppermann int nmbclusters; /* limits number of mbuf clusters */ 115ec63cb90SAndre Oppermann int nmbjumbop; /* limits number of page size jumbo clusters */ 11656a4e45aSAndre Oppermann int nmbjumbo9; /* limits number of 9k jumbo clusters */ 11756a4e45aSAndre Oppermann int nmbjumbo16; /* limits number of 16k jumbo clusters */ 118099a0e58SBosko Milekic 11961664ee7SGleb Smirnoff bool mb_use_ext_pgs; /* use M_EXTPG mbufs for sendfile & TLS */ 120cec06a3eSJohn Baldwin SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN, 121cec06a3eSJohn Baldwin &mb_use_ext_pgs, 0, 122b2e60773SJohn Baldwin "Use unmapped mbufs for sendfile(2) and TLS offload"); 123cec06a3eSJohn Baldwin 124e0c00addSAndre Oppermann static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 125e0c00addSAndre Oppermann 126af3b2549SHans Petter Selasky SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, 127b6f49c23SHiren Panchasara "Maximum real memory allocatable to various mbuf types"); 128e0c00addSAndre Oppermann 129fb3bc596SJohn Baldwin static counter_u64_t snd_tag_count; 130fb3bc596SJohn Baldwin SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW, 131fb3bc596SJohn Baldwin &snd_tag_count, "# of active mbuf send tags"); 132fb3bc596SJohn Baldwin 13362938659SBjoern A. Zeeb /* 13437140716SAndre Oppermann * tunable_mbinit() has to be run before any mbuf allocations are done. 13562938659SBjoern A. Zeeb */ 136099a0e58SBosko Milekic static void 137099a0e58SBosko Milekic tunable_mbinit(void *dummy) 138099a0e58SBosko Milekic { 139e0c00addSAndre Oppermann quad_t realmem; 14037140716SAndre Oppermann 14137140716SAndre Oppermann /* 14237140716SAndre Oppermann * The default limit for all mbuf related memory is 1/2 of all 14337140716SAndre Oppermann * available kernel memory (physical or kmem). 14437140716SAndre Oppermann * At most it can be 3/4 of available kernel memory. 14537140716SAndre Oppermann */ 1465df87b21SJeff Roberson realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 14737140716SAndre Oppermann maxmbufmem = realmem / 2; 148e0c00addSAndre Oppermann TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 14937140716SAndre Oppermann if (maxmbufmem > realmem / 4 * 3) 15037140716SAndre Oppermann maxmbufmem = realmem / 4 * 3; 151099a0e58SBosko Milekic 152812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 153416a434cSAndre Oppermann if (nmbclusters == 0) 154416a434cSAndre Oppermann nmbclusters = maxmbufmem / MCLBYTES / 4; 155812302c3SNavdeep Parhar 156812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 157812302c3SNavdeep Parhar if (nmbjumbop == 0) 158416a434cSAndre Oppermann nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 159812302c3SNavdeep Parhar 160812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 161812302c3SNavdeep Parhar if (nmbjumbo9 == 0) 162416a434cSAndre Oppermann nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 163812302c3SNavdeep Parhar 164812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 165812302c3SNavdeep Parhar if (nmbjumbo16 == 0) 166416a434cSAndre Oppermann nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 167416a434cSAndre Oppermann 168416a434cSAndre Oppermann /* 169416a434cSAndre Oppermann * We need at least as many mbufs as we have clusters of 170416a434cSAndre Oppermann * the various types added together. 171416a434cSAndre Oppermann */ 172416a434cSAndre Oppermann TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 173416a434cSAndre Oppermann if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 174416a434cSAndre Oppermann nmbufs = lmax(maxmbufmem / MSIZE / 5, 175416a434cSAndre Oppermann nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 176099a0e58SBosko Milekic } 17737140716SAndre Oppermann SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 178099a0e58SBosko Milekic 1794f590175SPaul Saab static int 1804f590175SPaul Saab sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 1814f590175SPaul Saab { 1824f590175SPaul Saab int error, newnmbclusters; 1834f590175SPaul Saab 1844f590175SPaul Saab newnmbclusters = nmbclusters; 185041b706bSDavid Malone error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 186d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { 187ead46972SAndre Oppermann if (newnmbclusters > nmbclusters && 188ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 1894f590175SPaul Saab nmbclusters = newnmbclusters; 190bc4a1b8cSAndre Oppermann nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 1914f590175SPaul Saab EVENTHANDLER_INVOKE(nmbclusters_change); 1924f590175SPaul Saab } else 1934f590175SPaul Saab error = EINVAL; 1944f590175SPaul Saab } 1954f590175SPaul Saab return (error); 1964f590175SPaul Saab } 1977029da5cSPawel Biernacki SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, 1987029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbclusters, 0, 1997029da5cSPawel Biernacki sysctl_nmbclusters, "IU", 200099a0e58SBosko Milekic "Maximum number of mbuf clusters allowed"); 201cf70a46bSRandall Stewart 202cf70a46bSRandall Stewart static int 203cf70a46bSRandall Stewart sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 204cf70a46bSRandall Stewart { 205cf70a46bSRandall Stewart int error, newnmbjumbop; 206cf70a46bSRandall Stewart 207cf70a46bSRandall Stewart newnmbjumbop = nmbjumbop; 208cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 209d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { 210ead46972SAndre Oppermann if (newnmbjumbop > nmbjumbop && 211ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 212cf70a46bSRandall Stewart nmbjumbop = newnmbjumbop; 213bc4a1b8cSAndre Oppermann nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 214cf70a46bSRandall Stewart } else 215cf70a46bSRandall Stewart error = EINVAL; 216cf70a46bSRandall Stewart } 217cf70a46bSRandall Stewart return (error); 218cf70a46bSRandall Stewart } 2197029da5cSPawel Biernacki SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, 2207029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbjumbop, 0, 2217029da5cSPawel Biernacki sysctl_nmbjumbop, "IU", 222ec63cb90SAndre Oppermann "Maximum number of mbuf page size jumbo clusters allowed"); 223cf70a46bSRandall Stewart 224cf70a46bSRandall Stewart static int 225cf70a46bSRandall Stewart sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 226cf70a46bSRandall Stewart { 227cf70a46bSRandall Stewart int error, newnmbjumbo9; 228cf70a46bSRandall Stewart 229cf70a46bSRandall Stewart newnmbjumbo9 = nmbjumbo9; 230cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 231d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { 232ead46972SAndre Oppermann if (newnmbjumbo9 > nmbjumbo9 && 233ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 234cf70a46bSRandall Stewart nmbjumbo9 = newnmbjumbo9; 235bc4a1b8cSAndre Oppermann nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 236cf70a46bSRandall Stewart } else 237cf70a46bSRandall Stewart error = EINVAL; 238cf70a46bSRandall Stewart } 239cf70a46bSRandall Stewart return (error); 240cf70a46bSRandall Stewart } 2417029da5cSPawel Biernacki SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, 2427029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbjumbo9, 0, 2437029da5cSPawel Biernacki sysctl_nmbjumbo9, "IU", 24456a4e45aSAndre Oppermann "Maximum number of mbuf 9k jumbo clusters allowed"); 245cf70a46bSRandall Stewart 246cf70a46bSRandall Stewart static int 247cf70a46bSRandall Stewart sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 248cf70a46bSRandall Stewart { 249cf70a46bSRandall Stewart int error, newnmbjumbo16; 250cf70a46bSRandall Stewart 251cf70a46bSRandall Stewart newnmbjumbo16 = nmbjumbo16; 252cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 253d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { 254ead46972SAndre Oppermann if (newnmbjumbo16 > nmbjumbo16 && 255ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 256cf70a46bSRandall Stewart nmbjumbo16 = newnmbjumbo16; 257bc4a1b8cSAndre Oppermann nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 258cf70a46bSRandall Stewart } else 259cf70a46bSRandall Stewart error = EINVAL; 260cf70a46bSRandall Stewart } 261cf70a46bSRandall Stewart return (error); 262cf70a46bSRandall Stewart } 2637029da5cSPawel Biernacki SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, 2647029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &nmbjumbo16, 0, 2657029da5cSPawel Biernacki sysctl_nmbjumbo16, "IU", 26656a4e45aSAndre Oppermann "Maximum number of mbuf 16k jumbo clusters allowed"); 267cf70a46bSRandall Stewart 268ead46972SAndre Oppermann static int 269ead46972SAndre Oppermann sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 270ead46972SAndre Oppermann { 271ead46972SAndre Oppermann int error, newnmbufs; 272ead46972SAndre Oppermann 273ead46972SAndre Oppermann newnmbufs = nmbufs; 274ead46972SAndre Oppermann error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 275d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbufs != nmbufs) { 276ead46972SAndre Oppermann if (newnmbufs > nmbufs) { 277ead46972SAndre Oppermann nmbufs = newnmbufs; 278bc4a1b8cSAndre Oppermann nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 279ead46972SAndre Oppermann EVENTHANDLER_INVOKE(nmbufs_change); 280ead46972SAndre Oppermann } else 281ead46972SAndre Oppermann error = EINVAL; 282ead46972SAndre Oppermann } 283ead46972SAndre Oppermann return (error); 284ead46972SAndre Oppermann } 2857029da5cSPawel Biernacki SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, 2867029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 287ead46972SAndre Oppermann &nmbufs, 0, sysctl_nmbufs, "IU", 288ead46972SAndre Oppermann "Maximum number of mbufs allowed"); 289cf70a46bSRandall Stewart 290099a0e58SBosko Milekic /* 291099a0e58SBosko Milekic * Zones from which we allocate. 292099a0e58SBosko Milekic */ 293099a0e58SBosko Milekic uma_zone_t zone_mbuf; 294099a0e58SBosko Milekic uma_zone_t zone_clust; 295099a0e58SBosko Milekic uma_zone_t zone_pack; 296ec63cb90SAndre Oppermann uma_zone_t zone_jumbop; 29756a4e45aSAndre Oppermann uma_zone_t zone_jumbo9; 29856a4e45aSAndre Oppermann uma_zone_t zone_jumbo16; 299099a0e58SBosko Milekic 300099a0e58SBosko Milekic /* 301099a0e58SBosko Milekic * Local prototypes. 302099a0e58SBosko Milekic */ 303b23f72e9SBrian Feldman static int mb_ctor_mbuf(void *, int, void *, int); 304b23f72e9SBrian Feldman static int mb_ctor_clust(void *, int, void *, int); 305b23f72e9SBrian Feldman static int mb_ctor_pack(void *, int, void *, int); 306099a0e58SBosko Milekic static void mb_dtor_mbuf(void *, int, void *); 30756a4e45aSAndre Oppermann static void mb_dtor_pack(void *, int, void *); 30856a4e45aSAndre Oppermann static int mb_zinit_pack(void *, int, int); 30956a4e45aSAndre Oppermann static void mb_zfini_pack(void *, int); 310e60b2fcbSGleb Smirnoff static void mb_reclaim(uma_zone_t, int); 311099a0e58SBosko Milekic 31237140716SAndre Oppermann /* Ensure that MSIZE is a power of 2. */ 313a04946cfSBrian Somers CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 314a04946cfSBrian Somers 31523feb563SAndrew Gallatin _Static_assert(sizeof(struct mbuf) <= MSIZE, 31623feb563SAndrew Gallatin "size of mbuf exceeds MSIZE"); 317099a0e58SBosko Milekic /* 318099a0e58SBosko Milekic * Initialize FreeBSD Network buffer allocation. 319099a0e58SBosko Milekic */ 320099a0e58SBosko Milekic static void 321099a0e58SBosko Milekic mbuf_init(void *dummy) 322099a0e58SBosko Milekic { 323099a0e58SBosko Milekic 324099a0e58SBosko Milekic /* 325099a0e58SBosko Milekic * Configure UMA zones for Mbufs, Clusters, and Packets. 326099a0e58SBosko Milekic */ 32756a4e45aSAndre Oppermann zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 32830be9685SRyan Libby mb_ctor_mbuf, mb_dtor_mbuf, NULL, NULL, 32910c8fb47SRyan Libby MSIZE - 1, UMA_ZONE_CONTIG | UMA_ZONE_MAXBUCKET); 33045fe0bf7SPawel Jakub Dawidek if (nmbufs > 0) 33145fe0bf7SPawel Jakub Dawidek nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 3326e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 333e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_mbuf, mb_reclaim); 33456a4e45aSAndre Oppermann 33568352adfSRobert Watson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 33630be9685SRyan Libby mb_ctor_clust, NULL, NULL, NULL, 33710c8fb47SRyan Libby UMA_ALIGN_PTR, UMA_ZONE_CONTIG); 33845fe0bf7SPawel Jakub Dawidek if (nmbclusters > 0) 33945fe0bf7SPawel Jakub Dawidek nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 3406e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 341e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_clust, mb_reclaim); 342099a0e58SBosko Milekic 34356a4e45aSAndre Oppermann zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 34456a4e45aSAndre Oppermann mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 34556a4e45aSAndre Oppermann 346fcf90618SGleb Smirnoff /* Make jumbo frame zone too. Page size, 9k and 16k. */ 347ec63cb90SAndre Oppermann zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 34830be9685SRyan Libby mb_ctor_clust, NULL, NULL, NULL, 34910c8fb47SRyan Libby UMA_ALIGN_PTR, UMA_ZONE_CONTIG); 35045fe0bf7SPawel Jakub Dawidek if (nmbjumbop > 0) 35145fe0bf7SPawel Jakub Dawidek nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 3526e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 353e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_jumbop, mb_reclaim); 354d5269a63SAndre Oppermann 35556a4e45aSAndre Oppermann zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 35630be9685SRyan Libby mb_ctor_clust, NULL, NULL, NULL, 35710c8fb47SRyan Libby UMA_ALIGN_PTR, UMA_ZONE_CONTIG); 35845fe0bf7SPawel Jakub Dawidek if (nmbjumbo9 > 0) 35945fe0bf7SPawel Jakub Dawidek nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 3606e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 361e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); 36256a4e45aSAndre Oppermann 36356a4e45aSAndre Oppermann zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 36430be9685SRyan Libby mb_ctor_clust, NULL, NULL, NULL, 36510c8fb47SRyan Libby UMA_ALIGN_PTR, UMA_ZONE_CONTIG); 36645fe0bf7SPawel Jakub Dawidek if (nmbjumbo16 > 0) 36745fe0bf7SPawel Jakub Dawidek nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 3686e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 369e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); 37056a4e45aSAndre Oppermann 371099a0e58SBosko Milekic /* 372099a0e58SBosko Milekic * Hook event handler for low-memory situation, used to 373099a0e58SBosko Milekic * drain protocols and push data back to the caches (UMA 374099a0e58SBosko Milekic * later pushes it back to VM). 375099a0e58SBosko Milekic */ 376099a0e58SBosko Milekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 377099a0e58SBosko Milekic EVENTHANDLER_PRI_FIRST); 378fb3bc596SJohn Baldwin 379fb3bc596SJohn Baldwin snd_tag_count = counter_u64_alloc(M_WAITOK); 380099a0e58SBosko Milekic } 38137140716SAndre Oppermann SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 382099a0e58SBosko Milekic 3837790c8c1SConrad Meyer #ifdef DEBUGNET 3845475ca5aSMark Johnston /* 3857790c8c1SConrad Meyer * debugnet makes use of a pre-allocated pool of mbufs and clusters. When 3867790c8c1SConrad Meyer * debugnet is configured, we initialize a set of UMA cache zones which return 3875475ca5aSMark Johnston * items from this pool. At panic-time, the regular UMA zone pointers are 3885475ca5aSMark Johnston * overwritten with those of the cache zones so that drivers may allocate and 3895475ca5aSMark Johnston * free mbufs and clusters without attempting to allocate physical memory. 3905475ca5aSMark Johnston * 3915475ca5aSMark Johnston * We keep mbufs and clusters in a pair of mbuf queues. In particular, for 3925475ca5aSMark Johnston * the purpose of caching clusters, we treat them as mbufs. 3935475ca5aSMark Johnston */ 3947790c8c1SConrad Meyer static struct mbufq dn_mbufq = 3957790c8c1SConrad Meyer { STAILQ_HEAD_INITIALIZER(dn_mbufq.mq_head), 0, INT_MAX }; 3967790c8c1SConrad Meyer static struct mbufq dn_clustq = 3977790c8c1SConrad Meyer { STAILQ_HEAD_INITIALIZER(dn_clustq.mq_head), 0, INT_MAX }; 3985475ca5aSMark Johnston 3997790c8c1SConrad Meyer static int dn_clsize; 4007790c8c1SConrad Meyer static uma_zone_t dn_zone_mbuf; 4017790c8c1SConrad Meyer static uma_zone_t dn_zone_clust; 4027790c8c1SConrad Meyer static uma_zone_t dn_zone_pack; 4037790c8c1SConrad Meyer 4047790c8c1SConrad Meyer static struct debugnet_saved_zones { 4057790c8c1SConrad Meyer uma_zone_t dsz_mbuf; 4067790c8c1SConrad Meyer uma_zone_t dsz_clust; 4077790c8c1SConrad Meyer uma_zone_t dsz_pack; 4087790c8c1SConrad Meyer uma_zone_t dsz_jumbop; 4097790c8c1SConrad Meyer uma_zone_t dsz_jumbo9; 4107790c8c1SConrad Meyer uma_zone_t dsz_jumbo16; 4117790c8c1SConrad Meyer bool dsz_debugnet_zones_enabled; 4127790c8c1SConrad Meyer } dn_saved_zones; 4135475ca5aSMark Johnston 4145475ca5aSMark Johnston static int 4157790c8c1SConrad Meyer dn_buf_import(void *arg, void **store, int count, int domain __unused, 4165475ca5aSMark Johnston int flags) 4175475ca5aSMark Johnston { 4185475ca5aSMark Johnston struct mbufq *q; 4195475ca5aSMark Johnston struct mbuf *m; 4205475ca5aSMark Johnston int i; 4215475ca5aSMark Johnston 4225475ca5aSMark Johnston q = arg; 4235475ca5aSMark Johnston 4245475ca5aSMark Johnston for (i = 0; i < count; i++) { 4255475ca5aSMark Johnston m = mbufq_dequeue(q); 4265475ca5aSMark Johnston if (m == NULL) 4275475ca5aSMark Johnston break; 4287790c8c1SConrad Meyer trash_init(m, q == &dn_mbufq ? MSIZE : dn_clsize, flags); 4295475ca5aSMark Johnston store[i] = m; 4305475ca5aSMark Johnston } 4310d1467b1SConrad Meyer KASSERT((flags & M_WAITOK) == 0 || i == count, 4320d1467b1SConrad Meyer ("%s: ran out of pre-allocated mbufs", __func__)); 4335475ca5aSMark Johnston return (i); 4345475ca5aSMark Johnston } 4355475ca5aSMark Johnston 4365475ca5aSMark Johnston static void 4377790c8c1SConrad Meyer dn_buf_release(void *arg, void **store, int count) 4385475ca5aSMark Johnston { 4395475ca5aSMark Johnston struct mbufq *q; 4405475ca5aSMark Johnston struct mbuf *m; 4415475ca5aSMark Johnston int i; 4425475ca5aSMark Johnston 4435475ca5aSMark Johnston q = arg; 4445475ca5aSMark Johnston 4455475ca5aSMark Johnston for (i = 0; i < count; i++) { 4465475ca5aSMark Johnston m = store[i]; 4475475ca5aSMark Johnston (void)mbufq_enqueue(q, m); 4485475ca5aSMark Johnston } 4495475ca5aSMark Johnston } 4505475ca5aSMark Johnston 4515475ca5aSMark Johnston static int 4527790c8c1SConrad Meyer dn_pack_import(void *arg __unused, void **store, int count, int domain __unused, 4535475ca5aSMark Johnston int flags __unused) 4545475ca5aSMark Johnston { 4555475ca5aSMark Johnston struct mbuf *m; 4565475ca5aSMark Johnston void *clust; 4575475ca5aSMark Johnston int i; 4585475ca5aSMark Johnston 4595475ca5aSMark Johnston for (i = 0; i < count; i++) { 4605475ca5aSMark Johnston m = m_get(MT_DATA, M_NOWAIT); 4615475ca5aSMark Johnston if (m == NULL) 4625475ca5aSMark Johnston break; 4637790c8c1SConrad Meyer clust = uma_zalloc(dn_zone_clust, M_NOWAIT); 4645475ca5aSMark Johnston if (clust == NULL) { 4655475ca5aSMark Johnston m_free(m); 4665475ca5aSMark Johnston break; 4675475ca5aSMark Johnston } 4687790c8c1SConrad Meyer mb_ctor_clust(clust, dn_clsize, m, 0); 4695475ca5aSMark Johnston store[i] = m; 4705475ca5aSMark Johnston } 4710d1467b1SConrad Meyer KASSERT((flags & M_WAITOK) == 0 || i == count, 4720d1467b1SConrad Meyer ("%s: ran out of pre-allocated mbufs", __func__)); 4735475ca5aSMark Johnston return (i); 4745475ca5aSMark Johnston } 4755475ca5aSMark Johnston 4765475ca5aSMark Johnston static void 4777790c8c1SConrad Meyer dn_pack_release(void *arg __unused, void **store, int count) 4785475ca5aSMark Johnston { 4795475ca5aSMark Johnston struct mbuf *m; 4805475ca5aSMark Johnston void *clust; 4815475ca5aSMark Johnston int i; 4825475ca5aSMark Johnston 4835475ca5aSMark Johnston for (i = 0; i < count; i++) { 4845475ca5aSMark Johnston m = store[i]; 4855475ca5aSMark Johnston clust = m->m_ext.ext_buf; 4867790c8c1SConrad Meyer uma_zfree(dn_zone_clust, clust); 4877790c8c1SConrad Meyer uma_zfree(dn_zone_mbuf, m); 4885475ca5aSMark Johnston } 4895475ca5aSMark Johnston } 4905475ca5aSMark Johnston 4915475ca5aSMark Johnston /* 4927790c8c1SConrad Meyer * Free the pre-allocated mbufs and clusters reserved for debugnet, and destroy 4935475ca5aSMark Johnston * the corresponding UMA cache zones. 4945475ca5aSMark Johnston */ 4955475ca5aSMark Johnston void 4967790c8c1SConrad Meyer debugnet_mbuf_drain(void) 4975475ca5aSMark Johnston { 4985475ca5aSMark Johnston struct mbuf *m; 4995475ca5aSMark Johnston void *item; 5005475ca5aSMark Johnston 5017790c8c1SConrad Meyer if (dn_zone_mbuf != NULL) { 5027790c8c1SConrad Meyer uma_zdestroy(dn_zone_mbuf); 5037790c8c1SConrad Meyer dn_zone_mbuf = NULL; 5045475ca5aSMark Johnston } 5057790c8c1SConrad Meyer if (dn_zone_clust != NULL) { 5067790c8c1SConrad Meyer uma_zdestroy(dn_zone_clust); 5077790c8c1SConrad Meyer dn_zone_clust = NULL; 5085475ca5aSMark Johnston } 5097790c8c1SConrad Meyer if (dn_zone_pack != NULL) { 5107790c8c1SConrad Meyer uma_zdestroy(dn_zone_pack); 5117790c8c1SConrad Meyer dn_zone_pack = NULL; 5125475ca5aSMark Johnston } 5135475ca5aSMark Johnston 5147790c8c1SConrad Meyer while ((m = mbufq_dequeue(&dn_mbufq)) != NULL) 5155475ca5aSMark Johnston m_free(m); 5167790c8c1SConrad Meyer while ((item = mbufq_dequeue(&dn_clustq)) != NULL) 5177790c8c1SConrad Meyer uma_zfree(m_getzone(dn_clsize), item); 5185475ca5aSMark Johnston } 5195475ca5aSMark Johnston 5205475ca5aSMark Johnston /* 5217790c8c1SConrad Meyer * Callback invoked immediately prior to starting a debugnet connection. 5225475ca5aSMark Johnston */ 5235475ca5aSMark Johnston void 5247790c8c1SConrad Meyer debugnet_mbuf_start(void) 5255475ca5aSMark Johnston { 5265475ca5aSMark Johnston 5277790c8c1SConrad Meyer MPASS(!dn_saved_zones.dsz_debugnet_zones_enabled); 5287790c8c1SConrad Meyer 5297790c8c1SConrad Meyer /* Save the old zone pointers to restore when debugnet is closed. */ 5307790c8c1SConrad Meyer dn_saved_zones = (struct debugnet_saved_zones) { 5317790c8c1SConrad Meyer .dsz_debugnet_zones_enabled = true, 5327790c8c1SConrad Meyer .dsz_mbuf = zone_mbuf, 5337790c8c1SConrad Meyer .dsz_clust = zone_clust, 5347790c8c1SConrad Meyer .dsz_pack = zone_pack, 5357790c8c1SConrad Meyer .dsz_jumbop = zone_jumbop, 5367790c8c1SConrad Meyer .dsz_jumbo9 = zone_jumbo9, 5377790c8c1SConrad Meyer .dsz_jumbo16 = zone_jumbo16, 5387790c8c1SConrad Meyer }; 5397790c8c1SConrad Meyer 5405475ca5aSMark Johnston /* 5415475ca5aSMark Johnston * All cluster zones return buffers of the size requested by the 5425475ca5aSMark Johnston * drivers. It's up to the driver to reinitialize the zones if the 5437790c8c1SConrad Meyer * MTU of a debugnet-enabled interface changes. 5445475ca5aSMark Johnston */ 5457790c8c1SConrad Meyer printf("debugnet: overwriting mbuf zone pointers\n"); 5467790c8c1SConrad Meyer zone_mbuf = dn_zone_mbuf; 5477790c8c1SConrad Meyer zone_clust = dn_zone_clust; 5487790c8c1SConrad Meyer zone_pack = dn_zone_pack; 5497790c8c1SConrad Meyer zone_jumbop = dn_zone_clust; 5507790c8c1SConrad Meyer zone_jumbo9 = dn_zone_clust; 5517790c8c1SConrad Meyer zone_jumbo16 = dn_zone_clust; 5525475ca5aSMark Johnston } 5535475ca5aSMark Johnston 5545475ca5aSMark Johnston /* 5557790c8c1SConrad Meyer * Callback invoked when a debugnet connection is closed/finished. 5565475ca5aSMark Johnston */ 5575475ca5aSMark Johnston void 5587790c8c1SConrad Meyer debugnet_mbuf_finish(void) 5597790c8c1SConrad Meyer { 5607790c8c1SConrad Meyer 5617790c8c1SConrad Meyer MPASS(dn_saved_zones.dsz_debugnet_zones_enabled); 5627790c8c1SConrad Meyer 5637790c8c1SConrad Meyer printf("debugnet: restoring mbuf zone pointers\n"); 5647790c8c1SConrad Meyer zone_mbuf = dn_saved_zones.dsz_mbuf; 5657790c8c1SConrad Meyer zone_clust = dn_saved_zones.dsz_clust; 5667790c8c1SConrad Meyer zone_pack = dn_saved_zones.dsz_pack; 5677790c8c1SConrad Meyer zone_jumbop = dn_saved_zones.dsz_jumbop; 5687790c8c1SConrad Meyer zone_jumbo9 = dn_saved_zones.dsz_jumbo9; 5697790c8c1SConrad Meyer zone_jumbo16 = dn_saved_zones.dsz_jumbo16; 5707790c8c1SConrad Meyer 5717790c8c1SConrad Meyer memset(&dn_saved_zones, 0, sizeof(dn_saved_zones)); 5727790c8c1SConrad Meyer } 5737790c8c1SConrad Meyer 5747790c8c1SConrad Meyer /* 5757790c8c1SConrad Meyer * Reinitialize the debugnet mbuf+cluster pool and cache zones. 5767790c8c1SConrad Meyer */ 5777790c8c1SConrad Meyer void 5787790c8c1SConrad Meyer debugnet_mbuf_reinit(int nmbuf, int nclust, int clsize) 5795475ca5aSMark Johnston { 5805475ca5aSMark Johnston struct mbuf *m; 5815475ca5aSMark Johnston void *item; 5825475ca5aSMark Johnston 5837790c8c1SConrad Meyer debugnet_mbuf_drain(); 5845475ca5aSMark Johnston 5857790c8c1SConrad Meyer dn_clsize = clsize; 5865475ca5aSMark Johnston 5877790c8c1SConrad Meyer dn_zone_mbuf = uma_zcache_create("debugnet_" MBUF_MEM_NAME, 58830be9685SRyan Libby MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, NULL, NULL, 5897790c8c1SConrad Meyer dn_buf_import, dn_buf_release, 5907790c8c1SConrad Meyer &dn_mbufq, UMA_ZONE_NOBUCKET); 5915475ca5aSMark Johnston 5927790c8c1SConrad Meyer dn_zone_clust = uma_zcache_create("debugnet_" MBUF_CLUSTER_MEM_NAME, 59330be9685SRyan Libby clsize, mb_ctor_clust, NULL, NULL, NULL, 5947790c8c1SConrad Meyer dn_buf_import, dn_buf_release, 5957790c8c1SConrad Meyer &dn_clustq, UMA_ZONE_NOBUCKET); 5965475ca5aSMark Johnston 5977790c8c1SConrad Meyer dn_zone_pack = uma_zcache_create("debugnet_" MBUF_PACKET_MEM_NAME, 5985475ca5aSMark Johnston MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL, 5997790c8c1SConrad Meyer dn_pack_import, dn_pack_release, 6005475ca5aSMark Johnston NULL, UMA_ZONE_NOBUCKET); 6015475ca5aSMark Johnston 6025475ca5aSMark Johnston while (nmbuf-- > 0) { 6035475ca5aSMark Johnston m = m_get(MT_DATA, M_WAITOK); 6047790c8c1SConrad Meyer uma_zfree(dn_zone_mbuf, m); 6055475ca5aSMark Johnston } 6065475ca5aSMark Johnston while (nclust-- > 0) { 6077790c8c1SConrad Meyer item = uma_zalloc(m_getzone(dn_clsize), M_WAITOK); 6087790c8c1SConrad Meyer uma_zfree(dn_zone_clust, item); 6095475ca5aSMark Johnston } 6105475ca5aSMark Johnston } 6117790c8c1SConrad Meyer #endif /* DEBUGNET */ 6125475ca5aSMark Johnston 613099a0e58SBosko Milekic /* 61403270b59SJeff Roberson * Constructor for Mbuf primary zone. 615099a0e58SBosko Milekic * 616099a0e58SBosko Milekic * The 'arg' pointer points to a mb_args structure which 617099a0e58SBosko Milekic * contains call-specific information required to support the 61856a4e45aSAndre Oppermann * mbuf allocation API. See mbuf.h. 619099a0e58SBosko Milekic */ 620b23f72e9SBrian Feldman static int 621b23f72e9SBrian Feldman mb_ctor_mbuf(void *mem, int size, void *arg, int how) 622099a0e58SBosko Milekic { 623099a0e58SBosko Milekic struct mbuf *m; 624099a0e58SBosko Milekic struct mb_args *args; 625b23f72e9SBrian Feldman int error; 626099a0e58SBosko Milekic int flags; 627099a0e58SBosko Milekic short type; 628099a0e58SBosko Milekic 629099a0e58SBosko Milekic args = (struct mb_args *)arg; 630099a0e58SBosko Milekic type = args->type; 631099a0e58SBosko Milekic 63256a4e45aSAndre Oppermann /* 63356a4e45aSAndre Oppermann * The mbuf is initialized later. The caller has the 634fcf90618SGleb Smirnoff * responsibility to set up any MAC labels too. 63556a4e45aSAndre Oppermann */ 63656a4e45aSAndre Oppermann if (type == MT_NOINIT) 63756a4e45aSAndre Oppermann return (0); 63856a4e45aSAndre Oppermann 639afb295ccSAndre Oppermann m = (struct mbuf *)mem; 640afb295ccSAndre Oppermann flags = args->flags; 641fddd4f62SNavdeep Parhar MPASS((flags & M_NOFREE) == 0); 642afb295ccSAndre Oppermann 643b4b12e52SGleb Smirnoff error = m_init(m, how, type, flags); 644afb295ccSAndre Oppermann 645b23f72e9SBrian Feldman return (error); 646099a0e58SBosko Milekic } 647099a0e58SBosko Milekic 648099a0e58SBosko Milekic /* 64903270b59SJeff Roberson * The Mbuf primary zone destructor. 650099a0e58SBosko Milekic */ 651099a0e58SBosko Milekic static void 652099a0e58SBosko Milekic mb_dtor_mbuf(void *mem, int size, void *arg) 653099a0e58SBosko Milekic { 654099a0e58SBosko Milekic struct mbuf *m; 655629b9e08SKip Macy unsigned long flags; 656099a0e58SBosko Milekic 657099a0e58SBosko Milekic m = (struct mbuf *)mem; 658629b9e08SKip Macy flags = (unsigned long)arg; 659629b9e08SKip Macy 660a9fa76f2SNavdeep Parhar KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 6614c7070dbSScott Long if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 662099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 663099a0e58SBosko Milekic } 664099a0e58SBosko Milekic 66556a4e45aSAndre Oppermann /* 66656a4e45aSAndre Oppermann * The Mbuf Packet zone destructor. 66756a4e45aSAndre Oppermann */ 668099a0e58SBosko Milekic static void 669099a0e58SBosko Milekic mb_dtor_pack(void *mem, int size, void *arg) 670099a0e58SBosko Milekic { 671099a0e58SBosko Milekic struct mbuf *m; 672099a0e58SBosko Milekic 673099a0e58SBosko Milekic m = (struct mbuf *)mem; 674099a0e58SBosko Milekic if ((m->m_flags & M_PKTHDR) != 0) 675099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 67656a4e45aSAndre Oppermann 67756a4e45aSAndre Oppermann /* Make sure we've got a clean cluster back. */ 67856a4e45aSAndre Oppermann KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 67956a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 68056a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 681cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 682cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 68356a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 68449d46b61SGleb Smirnoff KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 685121f0509SMike Silbersack #ifdef INVARIANTS 686121f0509SMike Silbersack trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 687121f0509SMike Silbersack #endif 6886c125b8dSMohan Srinivasan /* 689ef44c8d2SDavid E. O'Brien * If there are processes blocked on zone_clust, waiting for pages 69008cfa56eSMark Johnston * to be freed up, cause them to be woken up by draining the 69108cfa56eSMark Johnston * packet zone. We are exposed to a race here (in the check for 692ef44c8d2SDavid E. O'Brien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 693ef44c8d2SDavid E. O'Brien * is deliberate. We don't want to acquire the zone lock for every 694ef44c8d2SDavid E. O'Brien * mbuf free. 6956c125b8dSMohan Srinivasan */ 696727c6918SJeff Roberson if (uma_zone_exhausted(zone_clust)) 69708cfa56eSMark Johnston uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN); 698099a0e58SBosko Milekic } 699099a0e58SBosko Milekic 700099a0e58SBosko Milekic /* 701ec63cb90SAndre Oppermann * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 702099a0e58SBosko Milekic * 703099a0e58SBosko Milekic * Here the 'arg' pointer points to the Mbuf which we 70456a4e45aSAndre Oppermann * are configuring cluster storage for. If 'arg' is 70556a4e45aSAndre Oppermann * empty we allocate just the cluster without setting 70656a4e45aSAndre Oppermann * the mbuf to it. See mbuf.h. 707099a0e58SBosko Milekic */ 708b23f72e9SBrian Feldman static int 709b23f72e9SBrian Feldman mb_ctor_clust(void *mem, int size, void *arg, int how) 710099a0e58SBosko Milekic { 711099a0e58SBosko Milekic struct mbuf *m; 712099a0e58SBosko Milekic 7130f4d9d04SKip Macy m = (struct mbuf *)arg; 7140f4d9d04SKip Macy if (m != NULL) { 715e8fd18f3SGleb Smirnoff m->m_ext.ext_buf = (char *)mem; 716099a0e58SBosko Milekic m->m_data = m->m_ext.ext_buf; 717099a0e58SBosko Milekic m->m_flags |= M_EXT; 718099a0e58SBosko Milekic m->m_ext.ext_free = NULL; 719cf827063SPoul-Henning Kamp m->m_ext.ext_arg1 = NULL; 720cf827063SPoul-Henning Kamp m->m_ext.ext_arg2 = NULL; 72156a4e45aSAndre Oppermann m->m_ext.ext_size = size; 72256a5f52eSGleb Smirnoff m->m_ext.ext_type = m_gettype(size); 72356a5f52eSGleb Smirnoff m->m_ext.ext_flags = EXT_FLAG_EMBREF; 72456a5f52eSGleb Smirnoff m->m_ext.ext_count = 1; 72556a4e45aSAndre Oppermann } 7260f4d9d04SKip Macy 727b23f72e9SBrian Feldman return (0); 728099a0e58SBosko Milekic } 729099a0e58SBosko Milekic 73056a4e45aSAndre Oppermann /* 731099a0e58SBosko Milekic * The Packet secondary zone's init routine, executed on the 73256a4e45aSAndre Oppermann * object's transition from mbuf keg slab to zone cache. 733099a0e58SBosko Milekic */ 734b23f72e9SBrian Feldman static int 73556a4e45aSAndre Oppermann mb_zinit_pack(void *mem, int size, int how) 736099a0e58SBosko Milekic { 737099a0e58SBosko Milekic struct mbuf *m; 738099a0e58SBosko Milekic 73956a4e45aSAndre Oppermann m = (struct mbuf *)mem; /* m is virgin. */ 740a7bd90efSAndre Oppermann if (uma_zalloc_arg(zone_clust, m, how) == NULL || 741a7bd90efSAndre Oppermann m->m_ext.ext_buf == NULL) 742b23f72e9SBrian Feldman return (ENOMEM); 743cd5bb63bSAndre Oppermann m->m_ext.ext_type = EXT_PACKET; /* Override. */ 744121f0509SMike Silbersack #ifdef INVARIANTS 745121f0509SMike Silbersack trash_init(m->m_ext.ext_buf, MCLBYTES, how); 746121f0509SMike Silbersack #endif 747b23f72e9SBrian Feldman return (0); 748099a0e58SBosko Milekic } 749099a0e58SBosko Milekic 750099a0e58SBosko Milekic /* 751099a0e58SBosko Milekic * The Packet secondary zone's fini routine, executed on the 752099a0e58SBosko Milekic * object's transition from zone cache to keg slab. 753099a0e58SBosko Milekic */ 754099a0e58SBosko Milekic static void 75556a4e45aSAndre Oppermann mb_zfini_pack(void *mem, int size) 756099a0e58SBosko Milekic { 757099a0e58SBosko Milekic struct mbuf *m; 758099a0e58SBosko Milekic 759099a0e58SBosko Milekic m = (struct mbuf *)mem; 760121f0509SMike Silbersack #ifdef INVARIANTS 761121f0509SMike Silbersack trash_fini(m->m_ext.ext_buf, MCLBYTES); 762121f0509SMike Silbersack #endif 763099a0e58SBosko Milekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 764a7b844d2SMike Silbersack #ifdef INVARIANTS 765a7b844d2SMike Silbersack trash_dtor(mem, size, NULL); 766a7b844d2SMike Silbersack #endif 767099a0e58SBosko Milekic } 768099a0e58SBosko Milekic 769099a0e58SBosko Milekic /* 770099a0e58SBosko Milekic * The "packet" keg constructor. 771099a0e58SBosko Milekic */ 772b23f72e9SBrian Feldman static int 773b23f72e9SBrian Feldman mb_ctor_pack(void *mem, int size, void *arg, int how) 774099a0e58SBosko Milekic { 775099a0e58SBosko Milekic struct mbuf *m; 776099a0e58SBosko Milekic struct mb_args *args; 777ce28636bSAndre Oppermann int error, flags; 778099a0e58SBosko Milekic short type; 779099a0e58SBosko Milekic 780099a0e58SBosko Milekic m = (struct mbuf *)mem; 781099a0e58SBosko Milekic args = (struct mb_args *)arg; 782099a0e58SBosko Milekic flags = args->flags; 783099a0e58SBosko Milekic type = args->type; 784fddd4f62SNavdeep Parhar MPASS((flags & M_NOFREE) == 0); 785099a0e58SBosko Milekic 786121f0509SMike Silbersack #ifdef INVARIANTS 787121f0509SMike Silbersack trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 788121f0509SMike Silbersack #endif 789099a0e58SBosko Milekic 790b4b12e52SGleb Smirnoff error = m_init(m, how, type, flags); 791afb295ccSAndre Oppermann 79256a4e45aSAndre Oppermann /* m_ext is already initialized. */ 793afb295ccSAndre Oppermann m->m_data = m->m_ext.ext_buf; 794afb295ccSAndre Oppermann m->m_flags = (flags | M_EXT); 79556a4e45aSAndre Oppermann 796afb295ccSAndre Oppermann return (error); 797099a0e58SBosko Milekic } 798099a0e58SBosko Milekic 799099a0e58SBosko Milekic /* 800e60b2fcbSGleb Smirnoff * This is the protocol drain routine. Called by UMA whenever any of the 801e60b2fcbSGleb Smirnoff * mbuf zones is closed to its limit. 802099a0e58SBosko Milekic * 803099a0e58SBosko Milekic * No locks should be held when this is called. The drain routines have to 804099a0e58SBosko Milekic * presently acquire some locks which raises the possibility of lock order 805099a0e58SBosko Milekic * reversal. 806099a0e58SBosko Milekic */ 807099a0e58SBosko Milekic static void 808e60b2fcbSGleb Smirnoff mb_reclaim(uma_zone_t zone __unused, int pending __unused) 809099a0e58SBosko Milekic { 8100017b2adSGleb Smirnoff struct epoch_tracker et; 811099a0e58SBosko Milekic struct domain *dp; 812099a0e58SBosko Milekic struct protosw *pr; 813099a0e58SBosko Milekic 814e60b2fcbSGleb Smirnoff WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); 815099a0e58SBosko Milekic 8160017b2adSGleb Smirnoff NET_EPOCH_ENTER(et); 817099a0e58SBosko Milekic for (dp = domains; dp != NULL; dp = dp->dom_next) 818099a0e58SBosko Milekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 819099a0e58SBosko Milekic if (pr->pr_drain != NULL) 820099a0e58SBosko Milekic (*pr->pr_drain)(); 8210017b2adSGleb Smirnoff NET_EPOCH_EXIT(et); 822099a0e58SBosko Milekic } 8235e4bc63bSGleb Smirnoff 8245e4bc63bSGleb Smirnoff /* 82582334850SJohn Baldwin * Free "count" units of I/O from an mbuf chain. They could be held 82661664ee7SGleb Smirnoff * in M_EXTPG or just as a normal mbuf. This code is intended to be 82782334850SJohn Baldwin * called in an error path (I/O error, closed connection, etc). 82882334850SJohn Baldwin */ 82982334850SJohn Baldwin void 83082334850SJohn Baldwin mb_free_notready(struct mbuf *m, int count) 83182334850SJohn Baldwin { 83282334850SJohn Baldwin int i; 83382334850SJohn Baldwin 83482334850SJohn Baldwin for (i = 0; i < count && m != NULL; i++) { 83561664ee7SGleb Smirnoff if ((m->m_flags & M_EXTPG) != 0) { 8367b6c99d0SGleb Smirnoff m->m_epg_nrdy--; 8377b6c99d0SGleb Smirnoff if (m->m_epg_nrdy != 0) 83882334850SJohn Baldwin continue; 83982334850SJohn Baldwin } 84082334850SJohn Baldwin m = m_free(m); 84182334850SJohn Baldwin } 84282334850SJohn Baldwin KASSERT(i == count, ("Removed only %d items from %p", i, m)); 84382334850SJohn Baldwin } 84482334850SJohn Baldwin 84582334850SJohn Baldwin /* 84682334850SJohn Baldwin * Compress an unmapped mbuf into a simple mbuf when it holds a small 84782334850SJohn Baldwin * amount of data. This is used as a DOS defense to avoid having 84882334850SJohn Baldwin * small packets tie up wired pages, an ext_pgs structure, and an 84982334850SJohn Baldwin * mbuf. Since this converts the existing mbuf in place, it can only 85082334850SJohn Baldwin * be used if there are no other references to 'm'. 85182334850SJohn Baldwin */ 85282334850SJohn Baldwin int 85382334850SJohn Baldwin mb_unmapped_compress(struct mbuf *m) 85482334850SJohn Baldwin { 85582334850SJohn Baldwin volatile u_int *refcnt; 8564c9f0f98SGleb Smirnoff char buf[MLEN]; 85782334850SJohn Baldwin 85882334850SJohn Baldwin /* 85982334850SJohn Baldwin * Assert that 'm' does not have a packet header. If 'm' had 86082334850SJohn Baldwin * a packet header, it would only be able to hold MHLEN bytes 86182334850SJohn Baldwin * and m_data would have to be initialized differently. 86282334850SJohn Baldwin */ 86361664ee7SGleb Smirnoff KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXTPG), 86461664ee7SGleb Smirnoff ("%s: m %p !M_EXTPG or M_PKTHDR", __func__, m)); 86582334850SJohn Baldwin KASSERT(m->m_len <= MLEN, ("m_len too large %p", m)); 86682334850SJohn Baldwin 86782334850SJohn Baldwin if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 86882334850SJohn Baldwin refcnt = &m->m_ext.ext_count; 86982334850SJohn Baldwin } else { 87082334850SJohn Baldwin KASSERT(m->m_ext.ext_cnt != NULL, 87182334850SJohn Baldwin ("%s: no refcounting pointer on %p", __func__, m)); 87282334850SJohn Baldwin refcnt = m->m_ext.ext_cnt; 87382334850SJohn Baldwin } 87482334850SJohn Baldwin 87582334850SJohn Baldwin if (*refcnt != 1) 87682334850SJohn Baldwin return (EBUSY); 87782334850SJohn Baldwin 8784c9f0f98SGleb Smirnoff m_copydata(m, 0, m->m_len, buf); 87923feb563SAndrew Gallatin 88023feb563SAndrew Gallatin /* Free the backing pages. */ 88123feb563SAndrew Gallatin m->m_ext.ext_free(m); 88282334850SJohn Baldwin 88382334850SJohn Baldwin /* Turn 'm' into a "normal" mbuf. */ 8846edfd179SGleb Smirnoff m->m_flags &= ~(M_EXT | M_RDONLY | M_EXTPG); 88582334850SJohn Baldwin m->m_data = m->m_dat; 88682334850SJohn Baldwin 8874c9f0f98SGleb Smirnoff /* Copy data back into m. */ 8884c9f0f98SGleb Smirnoff bcopy(buf, mtod(m, char *), m->m_len); 88982334850SJohn Baldwin 89082334850SJohn Baldwin return (0); 89182334850SJohn Baldwin } 89282334850SJohn Baldwin 89382334850SJohn Baldwin /* 89482334850SJohn Baldwin * These next few routines are used to permit downgrading an unmapped 89582334850SJohn Baldwin * mbuf to a chain of mapped mbufs. This is used when an interface 89682334850SJohn Baldwin * doesn't supported unmapped mbufs or if checksums need to be 89782334850SJohn Baldwin * computed in software. 89882334850SJohn Baldwin * 89982334850SJohn Baldwin * Each unmapped mbuf is converted to a chain of mbufs. First, any 90082334850SJohn Baldwin * TLS header data is stored in a regular mbuf. Second, each page of 90182334850SJohn Baldwin * unmapped data is stored in an mbuf with an EXT_SFBUF external 90282334850SJohn Baldwin * cluster. These mbufs use an sf_buf to provide a valid KVA for the 90382334850SJohn Baldwin * associated physical page. They also hold a reference on the 90461664ee7SGleb Smirnoff * original M_EXTPG mbuf to ensure the physical page doesn't go away. 90582334850SJohn Baldwin * Finally, any TLS trailer data is stored in a regular mbuf. 90682334850SJohn Baldwin * 90782334850SJohn Baldwin * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF 90882334850SJohn Baldwin * mbufs. It frees the associated sf_buf and releases its reference 90961664ee7SGleb Smirnoff * on the original M_EXTPG mbuf. 91082334850SJohn Baldwin * 91182334850SJohn Baldwin * _mb_unmapped_to_ext() is a helper function that converts a single 91282334850SJohn Baldwin * unmapped mbuf into a chain of mbufs. 91382334850SJohn Baldwin * 91482334850SJohn Baldwin * mb_unmapped_to_ext() is the public function that walks an mbuf 91582334850SJohn Baldwin * chain converting any unmapped mbufs to mapped mbufs. It returns 91682334850SJohn Baldwin * the new chain of unmapped mbufs on success. On failure it frees 91782334850SJohn Baldwin * the original mbuf chain and returns NULL. 91882334850SJohn Baldwin */ 91982334850SJohn Baldwin static void 92082334850SJohn Baldwin mb_unmapped_free_mext(struct mbuf *m) 92182334850SJohn Baldwin { 92282334850SJohn Baldwin struct sf_buf *sf; 92382334850SJohn Baldwin struct mbuf *old_m; 92482334850SJohn Baldwin 92582334850SJohn Baldwin sf = m->m_ext.ext_arg1; 92682334850SJohn Baldwin sf_buf_free(sf); 92782334850SJohn Baldwin 92861664ee7SGleb Smirnoff /* Drop the reference on the backing M_EXTPG mbuf. */ 92982334850SJohn Baldwin old_m = m->m_ext.ext_arg2; 93061664ee7SGleb Smirnoff mb_free_extpg(old_m); 93182334850SJohn Baldwin } 93282334850SJohn Baldwin 93382334850SJohn Baldwin static struct mbuf * 93482334850SJohn Baldwin _mb_unmapped_to_ext(struct mbuf *m) 93582334850SJohn Baldwin { 93682334850SJohn Baldwin struct mbuf *m_new, *top, *prev, *mref; 93782334850SJohn Baldwin struct sf_buf *sf; 93882334850SJohn Baldwin vm_page_t pg; 93982334850SJohn Baldwin int i, len, off, pglen, pgoff, seglen, segoff; 94082334850SJohn Baldwin volatile u_int *refcnt; 94182334850SJohn Baldwin u_int ref_inc = 0; 94282334850SJohn Baldwin 943365e8da4SGleb Smirnoff M_ASSERTEXTPG(m); 94482334850SJohn Baldwin len = m->m_len; 9457b6c99d0SGleb Smirnoff KASSERT(m->m_epg_tls == NULL, ("%s: can't convert TLS mbuf %p", 94682334850SJohn Baldwin __func__, m)); 94782334850SJohn Baldwin 94882334850SJohn Baldwin /* See if this is the mbuf that holds the embedded refcount. */ 94982334850SJohn Baldwin if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 95082334850SJohn Baldwin refcnt = &m->m_ext.ext_count; 95182334850SJohn Baldwin mref = m; 95282334850SJohn Baldwin } else { 95382334850SJohn Baldwin KASSERT(m->m_ext.ext_cnt != NULL, 95482334850SJohn Baldwin ("%s: no refcounting pointer on %p", __func__, m)); 95582334850SJohn Baldwin refcnt = m->m_ext.ext_cnt; 95682334850SJohn Baldwin mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 95782334850SJohn Baldwin } 95882334850SJohn Baldwin 95982334850SJohn Baldwin /* Skip over any data removed from the front. */ 96082334850SJohn Baldwin off = mtod(m, vm_offset_t); 96182334850SJohn Baldwin 96282334850SJohn Baldwin top = NULL; 9637b6c99d0SGleb Smirnoff if (m->m_epg_hdrlen != 0) { 9647b6c99d0SGleb Smirnoff if (off >= m->m_epg_hdrlen) { 9657b6c99d0SGleb Smirnoff off -= m->m_epg_hdrlen; 96682334850SJohn Baldwin } else { 9677b6c99d0SGleb Smirnoff seglen = m->m_epg_hdrlen - off; 96882334850SJohn Baldwin segoff = off; 96982334850SJohn Baldwin seglen = min(seglen, len); 97082334850SJohn Baldwin off = 0; 97182334850SJohn Baldwin len -= seglen; 97282334850SJohn Baldwin m_new = m_get(M_NOWAIT, MT_DATA); 97382334850SJohn Baldwin if (m_new == NULL) 97482334850SJohn Baldwin goto fail; 97582334850SJohn Baldwin m_new->m_len = seglen; 97682334850SJohn Baldwin prev = top = m_new; 9770c103266SGleb Smirnoff memcpy(mtod(m_new, void *), &m->m_epg_hdr[segoff], 97882334850SJohn Baldwin seglen); 97982334850SJohn Baldwin } 98082334850SJohn Baldwin } 9817b6c99d0SGleb Smirnoff pgoff = m->m_epg_1st_off; 9827b6c99d0SGleb Smirnoff for (i = 0; i < m->m_epg_npgs && len > 0; i++) { 983c4ee38f8SGleb Smirnoff pglen = m_epg_pagelen(m, i, pgoff); 98482334850SJohn Baldwin if (off >= pglen) { 98582334850SJohn Baldwin off -= pglen; 98682334850SJohn Baldwin pgoff = 0; 98782334850SJohn Baldwin continue; 98882334850SJohn Baldwin } 98982334850SJohn Baldwin seglen = pglen - off; 99082334850SJohn Baldwin segoff = pgoff + off; 99182334850SJohn Baldwin off = 0; 99282334850SJohn Baldwin seglen = min(seglen, len); 99382334850SJohn Baldwin len -= seglen; 99482334850SJohn Baldwin 9950c103266SGleb Smirnoff pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); 99682334850SJohn Baldwin m_new = m_get(M_NOWAIT, MT_DATA); 99782334850SJohn Baldwin if (m_new == NULL) 99882334850SJohn Baldwin goto fail; 99982334850SJohn Baldwin if (top == NULL) { 100082334850SJohn Baldwin top = prev = m_new; 100182334850SJohn Baldwin } else { 100282334850SJohn Baldwin prev->m_next = m_new; 100382334850SJohn Baldwin prev = m_new; 100482334850SJohn Baldwin } 100582334850SJohn Baldwin sf = sf_buf_alloc(pg, SFB_NOWAIT); 100682334850SJohn Baldwin if (sf == NULL) 100782334850SJohn Baldwin goto fail; 100882334850SJohn Baldwin 100982334850SJohn Baldwin ref_inc++; 101082334850SJohn Baldwin m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE, 101182334850SJohn Baldwin mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF); 101282334850SJohn Baldwin m_new->m_data += segoff; 101382334850SJohn Baldwin m_new->m_len = seglen; 101482334850SJohn Baldwin 101582334850SJohn Baldwin pgoff = 0; 101682334850SJohn Baldwin }; 101782334850SJohn Baldwin if (len != 0) { 10187b6c99d0SGleb Smirnoff KASSERT((off + len) <= m->m_epg_trllen, 101982334850SJohn Baldwin ("off + len > trail (%d + %d > %d)", off, len, 10207b6c99d0SGleb Smirnoff m->m_epg_trllen)); 102182334850SJohn Baldwin m_new = m_get(M_NOWAIT, MT_DATA); 102282334850SJohn Baldwin if (m_new == NULL) 102382334850SJohn Baldwin goto fail; 102482334850SJohn Baldwin if (top == NULL) 102582334850SJohn Baldwin top = m_new; 102682334850SJohn Baldwin else 102782334850SJohn Baldwin prev->m_next = m_new; 102882334850SJohn Baldwin m_new->m_len = len; 10290c103266SGleb Smirnoff memcpy(mtod(m_new, void *), &m->m_epg_trail[off], len); 103082334850SJohn Baldwin } 103182334850SJohn Baldwin 103282334850SJohn Baldwin if (ref_inc != 0) { 103382334850SJohn Baldwin /* 103482334850SJohn Baldwin * Obtain an additional reference on the old mbuf for 103582334850SJohn Baldwin * each created EXT_SFBUF mbuf. They will be dropped 103682334850SJohn Baldwin * in mb_unmapped_free_mext(). 103782334850SJohn Baldwin */ 103882334850SJohn Baldwin if (*refcnt == 1) 103982334850SJohn Baldwin *refcnt += ref_inc; 104082334850SJohn Baldwin else 104182334850SJohn Baldwin atomic_add_int(refcnt, ref_inc); 104282334850SJohn Baldwin } 104382334850SJohn Baldwin m_free(m); 104482334850SJohn Baldwin return (top); 104582334850SJohn Baldwin 104682334850SJohn Baldwin fail: 104782334850SJohn Baldwin if (ref_inc != 0) { 104882334850SJohn Baldwin /* 104982334850SJohn Baldwin * Obtain an additional reference on the old mbuf for 105082334850SJohn Baldwin * each created EXT_SFBUF mbuf. They will be 105182334850SJohn Baldwin * immediately dropped when these mbufs are freed 105282334850SJohn Baldwin * below. 105382334850SJohn Baldwin */ 105482334850SJohn Baldwin if (*refcnt == 1) 105582334850SJohn Baldwin *refcnt += ref_inc; 105682334850SJohn Baldwin else 105782334850SJohn Baldwin atomic_add_int(refcnt, ref_inc); 105882334850SJohn Baldwin } 105982334850SJohn Baldwin m_free(m); 106082334850SJohn Baldwin m_freem(top); 106182334850SJohn Baldwin return (NULL); 106282334850SJohn Baldwin } 106382334850SJohn Baldwin 106482334850SJohn Baldwin struct mbuf * 106582334850SJohn Baldwin mb_unmapped_to_ext(struct mbuf *top) 106682334850SJohn Baldwin { 106782334850SJohn Baldwin struct mbuf *m, *next, *prev = NULL; 106882334850SJohn Baldwin 106982334850SJohn Baldwin prev = NULL; 107082334850SJohn Baldwin for (m = top; m != NULL; m = next) { 107182334850SJohn Baldwin /* m might be freed, so cache the next pointer. */ 107282334850SJohn Baldwin next = m->m_next; 10736edfd179SGleb Smirnoff if (m->m_flags & M_EXTPG) { 107482334850SJohn Baldwin if (prev != NULL) { 107582334850SJohn Baldwin /* 107682334850SJohn Baldwin * Remove 'm' from the new chain so 107782334850SJohn Baldwin * that the 'top' chain terminates 107882334850SJohn Baldwin * before 'm' in case 'top' is freed 107982334850SJohn Baldwin * due to an error. 108082334850SJohn Baldwin */ 108182334850SJohn Baldwin prev->m_next = NULL; 108282334850SJohn Baldwin } 108382334850SJohn Baldwin m = _mb_unmapped_to_ext(m); 108482334850SJohn Baldwin if (m == NULL) { 108582334850SJohn Baldwin m_freem(top); 108682334850SJohn Baldwin m_freem(next); 108782334850SJohn Baldwin return (NULL); 108882334850SJohn Baldwin } 108982334850SJohn Baldwin if (prev == NULL) { 109082334850SJohn Baldwin top = m; 109182334850SJohn Baldwin } else { 109282334850SJohn Baldwin prev->m_next = m; 109382334850SJohn Baldwin } 109482334850SJohn Baldwin 109582334850SJohn Baldwin /* 109682334850SJohn Baldwin * Replaced one mbuf with a chain, so we must 109782334850SJohn Baldwin * find the end of chain. 109882334850SJohn Baldwin */ 109982334850SJohn Baldwin prev = m_last(m); 110082334850SJohn Baldwin } else { 110182334850SJohn Baldwin if (prev != NULL) { 110282334850SJohn Baldwin prev->m_next = m; 110382334850SJohn Baldwin } 110482334850SJohn Baldwin prev = m; 110582334850SJohn Baldwin } 110682334850SJohn Baldwin } 110782334850SJohn Baldwin return (top); 110882334850SJohn Baldwin } 110982334850SJohn Baldwin 111082334850SJohn Baldwin /* 111161664ee7SGleb Smirnoff * Allocate an empty M_EXTPG mbuf. The ext_free routine is 111282334850SJohn Baldwin * responsible for freeing any pages backing this mbuf when it is 111382334850SJohn Baldwin * freed. 111482334850SJohn Baldwin */ 111582334850SJohn Baldwin struct mbuf * 111623feb563SAndrew Gallatin mb_alloc_ext_pgs(int how, m_ext_free_t ext_free) 111782334850SJohn Baldwin { 111882334850SJohn Baldwin struct mbuf *m; 111982334850SJohn Baldwin 112082334850SJohn Baldwin m = m_get(how, MT_DATA); 112182334850SJohn Baldwin if (m == NULL) 112282334850SJohn Baldwin return (NULL); 112382334850SJohn Baldwin 11247b6c99d0SGleb Smirnoff m->m_epg_npgs = 0; 11257b6c99d0SGleb Smirnoff m->m_epg_nrdy = 0; 11267b6c99d0SGleb Smirnoff m->m_epg_1st_off = 0; 11277b6c99d0SGleb Smirnoff m->m_epg_last_len = 0; 11287b6c99d0SGleb Smirnoff m->m_epg_flags = 0; 11297b6c99d0SGleb Smirnoff m->m_epg_hdrlen = 0; 11307b6c99d0SGleb Smirnoff m->m_epg_trllen = 0; 11317b6c99d0SGleb Smirnoff m->m_epg_tls = NULL; 11327b6c99d0SGleb Smirnoff m->m_epg_so = NULL; 113382334850SJohn Baldwin m->m_data = NULL; 11346edfd179SGleb Smirnoff m->m_flags |= (M_EXT | M_RDONLY | M_EXTPG); 113582334850SJohn Baldwin m->m_ext.ext_flags = EXT_FLAG_EMBREF; 113682334850SJohn Baldwin m->m_ext.ext_count = 1; 113782334850SJohn Baldwin m->m_ext.ext_size = 0; 113882334850SJohn Baldwin m->m_ext.ext_free = ext_free; 113982334850SJohn Baldwin return (m); 114082334850SJohn Baldwin } 114182334850SJohn Baldwin 114282334850SJohn Baldwin /* 11435e4bc63bSGleb Smirnoff * Clean up after mbufs with M_EXT storage attached to them if the 11445e4bc63bSGleb Smirnoff * reference count hits 1. 11455e4bc63bSGleb Smirnoff */ 11465e4bc63bSGleb Smirnoff void 11475e4bc63bSGleb Smirnoff mb_free_ext(struct mbuf *m) 11485e4bc63bSGleb Smirnoff { 114956a5f52eSGleb Smirnoff volatile u_int *refcnt; 115056a5f52eSGleb Smirnoff struct mbuf *mref; 11515e4bc63bSGleb Smirnoff int freembuf; 11525e4bc63bSGleb Smirnoff 11535e4bc63bSGleb Smirnoff KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 11545e4bc63bSGleb Smirnoff 115556a5f52eSGleb Smirnoff /* See if this is the mbuf that holds the embedded refcount. */ 115656a5f52eSGleb Smirnoff if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 115756a5f52eSGleb Smirnoff refcnt = &m->m_ext.ext_count; 115856a5f52eSGleb Smirnoff mref = m; 115956a5f52eSGleb Smirnoff } else { 116056a5f52eSGleb Smirnoff KASSERT(m->m_ext.ext_cnt != NULL, 116156a5f52eSGleb Smirnoff ("%s: no refcounting pointer on %p", __func__, m)); 116256a5f52eSGleb Smirnoff refcnt = m->m_ext.ext_cnt; 116356a5f52eSGleb Smirnoff mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 116456a5f52eSGleb Smirnoff } 116556a5f52eSGleb Smirnoff 11665e4bc63bSGleb Smirnoff /* 116756a5f52eSGleb Smirnoff * Check if the header is embedded in the cluster. It is 116856a5f52eSGleb Smirnoff * important that we can't touch any of the mbuf fields 116956a5f52eSGleb Smirnoff * after we have freed the external storage, since mbuf 117017cd649fSGleb Smirnoff * could have been embedded in it. For now, the mbufs 117117cd649fSGleb Smirnoff * embedded into the cluster are always of type EXT_EXTREF, 117217cd649fSGleb Smirnoff * and for this type we won't free the mref. 11735e4bc63bSGleb Smirnoff */ 117417cd649fSGleb Smirnoff if (m->m_flags & M_NOFREE) { 117517cd649fSGleb Smirnoff freembuf = 0; 1176eec189c7SGleb Smirnoff KASSERT(m->m_ext.ext_type == EXT_EXTREF || 1177eec189c7SGleb Smirnoff m->m_ext.ext_type == EXT_RXRING, 117817cd649fSGleb Smirnoff ("%s: no-free mbuf %p has wrong type", __func__, m)); 117917cd649fSGleb Smirnoff } else 118017cd649fSGleb Smirnoff freembuf = 1; 11815e4bc63bSGleb Smirnoff 118256a5f52eSGleb Smirnoff /* Free attached storage if this mbuf is the only reference to it. */ 118356a5f52eSGleb Smirnoff if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { 11845e4bc63bSGleb Smirnoff switch (m->m_ext.ext_type) { 118556a5f52eSGleb Smirnoff case EXT_PACKET: 118656a5f52eSGleb Smirnoff /* The packet zone is special. */ 118756a5f52eSGleb Smirnoff if (*refcnt == 0) 118856a5f52eSGleb Smirnoff *refcnt = 1; 118956a5f52eSGleb Smirnoff uma_zfree(zone_pack, mref); 11905e4bc63bSGleb Smirnoff break; 11915e4bc63bSGleb Smirnoff case EXT_CLUSTER: 11925e4bc63bSGleb Smirnoff uma_zfree(zone_clust, m->m_ext.ext_buf); 119356a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 11945e4bc63bSGleb Smirnoff break; 11955e4bc63bSGleb Smirnoff case EXT_JUMBOP: 11965e4bc63bSGleb Smirnoff uma_zfree(zone_jumbop, m->m_ext.ext_buf); 119756a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 11985e4bc63bSGleb Smirnoff break; 11995e4bc63bSGleb Smirnoff case EXT_JUMBO9: 12005e4bc63bSGleb Smirnoff uma_zfree(zone_jumbo9, m->m_ext.ext_buf); 120156a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 12025e4bc63bSGleb Smirnoff break; 12035e4bc63bSGleb Smirnoff case EXT_JUMBO16: 12045e4bc63bSGleb Smirnoff uma_zfree(zone_jumbo16, m->m_ext.ext_buf); 120556a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 120656a5f52eSGleb Smirnoff break; 120756a5f52eSGleb Smirnoff case EXT_SFBUF: 12085e4bc63bSGleb Smirnoff case EXT_NET_DRV: 12095e4bc63bSGleb Smirnoff case EXT_MOD_TYPE: 12105e4bc63bSGleb Smirnoff case EXT_DISPOSABLE: 121107e87a1dSGleb Smirnoff KASSERT(mref->m_ext.ext_free != NULL, 12120ea37a86SGleb Smirnoff ("%s: ext_free not set", __func__)); 121307e87a1dSGleb Smirnoff mref->m_ext.ext_free(mref); 121456a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 12150ea37a86SGleb Smirnoff break; 12165e4bc63bSGleb Smirnoff case EXT_EXTREF: 12175e4bc63bSGleb Smirnoff KASSERT(m->m_ext.ext_free != NULL, 12185e4bc63bSGleb Smirnoff ("%s: ext_free not set", __func__)); 1219e8fd18f3SGleb Smirnoff m->m_ext.ext_free(m); 12205e4bc63bSGleb Smirnoff break; 1221eec189c7SGleb Smirnoff case EXT_RXRING: 1222eec189c7SGleb Smirnoff KASSERT(m->m_ext.ext_free == NULL, 1223eec189c7SGleb Smirnoff ("%s: ext_free is set", __func__)); 1224eec189c7SGleb Smirnoff break; 12255e4bc63bSGleb Smirnoff default: 12265e4bc63bSGleb Smirnoff KASSERT(m->m_ext.ext_type == 0, 12275e4bc63bSGleb Smirnoff ("%s: unknown ext_type", __func__)); 12285e4bc63bSGleb Smirnoff } 12295e4bc63bSGleb Smirnoff } 12305e4bc63bSGleb Smirnoff 123156a5f52eSGleb Smirnoff if (freembuf && m != mref) 12325e4bc63bSGleb Smirnoff uma_zfree(zone_mbuf, m); 12335e4bc63bSGleb Smirnoff } 12345e4bc63bSGleb Smirnoff 12355e4bc63bSGleb Smirnoff /* 123661664ee7SGleb Smirnoff * Clean up after mbufs with M_EXTPG storage attached to them if the 123761664ee7SGleb Smirnoff * reference count hits 1. 123861664ee7SGleb Smirnoff */ 123961664ee7SGleb Smirnoff void 124061664ee7SGleb Smirnoff mb_free_extpg(struct mbuf *m) 124161664ee7SGleb Smirnoff { 124261664ee7SGleb Smirnoff volatile u_int *refcnt; 124361664ee7SGleb Smirnoff struct mbuf *mref; 124461664ee7SGleb Smirnoff 124561664ee7SGleb Smirnoff M_ASSERTEXTPG(m); 124661664ee7SGleb Smirnoff 124761664ee7SGleb Smirnoff /* See if this is the mbuf that holds the embedded refcount. */ 124861664ee7SGleb Smirnoff if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 124961664ee7SGleb Smirnoff refcnt = &m->m_ext.ext_count; 125061664ee7SGleb Smirnoff mref = m; 125161664ee7SGleb Smirnoff } else { 125261664ee7SGleb Smirnoff KASSERT(m->m_ext.ext_cnt != NULL, 125361664ee7SGleb Smirnoff ("%s: no refcounting pointer on %p", __func__, m)); 125461664ee7SGleb Smirnoff refcnt = m->m_ext.ext_cnt; 125561664ee7SGleb Smirnoff mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 125661664ee7SGleb Smirnoff } 125761664ee7SGleb Smirnoff 125861664ee7SGleb Smirnoff /* Free attached storage if this mbuf is the only reference to it. */ 125961664ee7SGleb Smirnoff if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { 126061664ee7SGleb Smirnoff KASSERT(mref->m_ext.ext_free != NULL, 126161664ee7SGleb Smirnoff ("%s: ext_free not set", __func__)); 126261664ee7SGleb Smirnoff 126361664ee7SGleb Smirnoff mref->m_ext.ext_free(mref); 126461664ee7SGleb Smirnoff #ifdef KERN_TLS 126561664ee7SGleb Smirnoff if (mref->m_epg_tls != NULL && 126661664ee7SGleb Smirnoff !refcount_release_if_not_last(&mref->m_epg_tls->refcount)) 126761664ee7SGleb Smirnoff ktls_enqueue_to_free(mref); 126861664ee7SGleb Smirnoff else 126961664ee7SGleb Smirnoff #endif 127061664ee7SGleb Smirnoff uma_zfree(zone_mbuf, mref); 127161664ee7SGleb Smirnoff } 127261664ee7SGleb Smirnoff 127361664ee7SGleb Smirnoff if (m != mref) 127461664ee7SGleb Smirnoff uma_zfree(zone_mbuf, m); 127561664ee7SGleb Smirnoff } 127661664ee7SGleb Smirnoff 127761664ee7SGleb Smirnoff /* 12785e4bc63bSGleb Smirnoff * Official mbuf(9) allocation KPI for stack and drivers: 12795e4bc63bSGleb Smirnoff * 12805e4bc63bSGleb Smirnoff * m_get() - a single mbuf without any attachments, sys/mbuf.h. 12815e4bc63bSGleb Smirnoff * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. 12825e4bc63bSGleb Smirnoff * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. 12835e4bc63bSGleb Smirnoff * m_clget() - attach cluster to already allocated mbuf. 12845e4bc63bSGleb Smirnoff * m_cljget() - attach jumbo cluster to already allocated mbuf. 12855e4bc63bSGleb Smirnoff * m_get2() - allocate minimum mbuf that would fit size argument. 12865e4bc63bSGleb Smirnoff * m_getm2() - allocate a chain of mbufs/clusters. 12875e4bc63bSGleb Smirnoff * m_extadd() - attach external cluster to mbuf. 12885e4bc63bSGleb Smirnoff * 12895e4bc63bSGleb Smirnoff * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. 12905e4bc63bSGleb Smirnoff * m_freem() - free chain of mbufs. 12915e4bc63bSGleb Smirnoff */ 12925e4bc63bSGleb Smirnoff 12935e4bc63bSGleb Smirnoff int 12945e4bc63bSGleb Smirnoff m_clget(struct mbuf *m, int how) 12955e4bc63bSGleb Smirnoff { 12965e4bc63bSGleb Smirnoff 12975e4bc63bSGleb Smirnoff KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 12985e4bc63bSGleb Smirnoff __func__, m)); 12995e4bc63bSGleb Smirnoff m->m_ext.ext_buf = (char *)NULL; 13005e4bc63bSGleb Smirnoff uma_zalloc_arg(zone_clust, m, how); 13015e4bc63bSGleb Smirnoff /* 13025e4bc63bSGleb Smirnoff * On a cluster allocation failure, drain the packet zone and retry, 13035e4bc63bSGleb Smirnoff * we might be able to loosen a few clusters up on the drain. 13045e4bc63bSGleb Smirnoff */ 13055e4bc63bSGleb Smirnoff if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { 130608cfa56eSMark Johnston uma_zone_reclaim(zone_pack, UMA_RECLAIM_DRAIN); 13075e4bc63bSGleb Smirnoff uma_zalloc_arg(zone_clust, m, how); 13085e4bc63bSGleb Smirnoff } 1309480f4e94SGeorge V. Neville-Neil MBUF_PROBE2(m__clget, m, how); 13105e4bc63bSGleb Smirnoff return (m->m_flags & M_EXT); 13115e4bc63bSGleb Smirnoff } 13125e4bc63bSGleb Smirnoff 13135e4bc63bSGleb Smirnoff /* 13145e4bc63bSGleb Smirnoff * m_cljget() is different from m_clget() as it can allocate clusters without 13155e4bc63bSGleb Smirnoff * attaching them to an mbuf. In that case the return value is the pointer 13165e4bc63bSGleb Smirnoff * to the cluster of the requested size. If an mbuf was specified, it gets 13175e4bc63bSGleb Smirnoff * the cluster attached to it and the return value can be safely ignored. 13185e4bc63bSGleb Smirnoff * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 13195e4bc63bSGleb Smirnoff */ 13205e4bc63bSGleb Smirnoff void * 13215e4bc63bSGleb Smirnoff m_cljget(struct mbuf *m, int how, int size) 13225e4bc63bSGleb Smirnoff { 13235e4bc63bSGleb Smirnoff uma_zone_t zone; 1324480f4e94SGeorge V. Neville-Neil void *retval; 13255e4bc63bSGleb Smirnoff 13265e4bc63bSGleb Smirnoff if (m != NULL) { 13275e4bc63bSGleb Smirnoff KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 13285e4bc63bSGleb Smirnoff __func__, m)); 13295e4bc63bSGleb Smirnoff m->m_ext.ext_buf = NULL; 13305e4bc63bSGleb Smirnoff } 13315e4bc63bSGleb Smirnoff 13325e4bc63bSGleb Smirnoff zone = m_getzone(size); 1333480f4e94SGeorge V. Neville-Neil retval = uma_zalloc_arg(zone, m, how); 1334480f4e94SGeorge V. Neville-Neil 1335480f4e94SGeorge V. Neville-Neil MBUF_PROBE4(m__cljget, m, how, size, retval); 1336480f4e94SGeorge V. Neville-Neil 1337480f4e94SGeorge V. Neville-Neil return (retval); 13385e4bc63bSGleb Smirnoff } 13395e4bc63bSGleb Smirnoff 13405e4bc63bSGleb Smirnoff /* 13415e4bc63bSGleb Smirnoff * m_get2() allocates minimum mbuf that would fit "size" argument. 13425e4bc63bSGleb Smirnoff */ 13435e4bc63bSGleb Smirnoff struct mbuf * 13445e4bc63bSGleb Smirnoff m_get2(int size, int how, short type, int flags) 13455e4bc63bSGleb Smirnoff { 13465e4bc63bSGleb Smirnoff struct mb_args args; 13475e4bc63bSGleb Smirnoff struct mbuf *m, *n; 13485e4bc63bSGleb Smirnoff 13495e4bc63bSGleb Smirnoff args.flags = flags; 13505e4bc63bSGleb Smirnoff args.type = type; 13515e4bc63bSGleb Smirnoff 13525e4bc63bSGleb Smirnoff if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) 13535e4bc63bSGleb Smirnoff return (uma_zalloc_arg(zone_mbuf, &args, how)); 13545e4bc63bSGleb Smirnoff if (size <= MCLBYTES) 13555e4bc63bSGleb Smirnoff return (uma_zalloc_arg(zone_pack, &args, how)); 13565e4bc63bSGleb Smirnoff 13575e4bc63bSGleb Smirnoff if (size > MJUMPAGESIZE) 13585e4bc63bSGleb Smirnoff return (NULL); 13595e4bc63bSGleb Smirnoff 13605e4bc63bSGleb Smirnoff m = uma_zalloc_arg(zone_mbuf, &args, how); 13615e4bc63bSGleb Smirnoff if (m == NULL) 13625e4bc63bSGleb Smirnoff return (NULL); 13635e4bc63bSGleb Smirnoff 13645e4bc63bSGleb Smirnoff n = uma_zalloc_arg(zone_jumbop, m, how); 13655e4bc63bSGleb Smirnoff if (n == NULL) { 13665e4bc63bSGleb Smirnoff uma_zfree(zone_mbuf, m); 13675e4bc63bSGleb Smirnoff return (NULL); 13685e4bc63bSGleb Smirnoff } 13695e4bc63bSGleb Smirnoff 13705e4bc63bSGleb Smirnoff return (m); 13715e4bc63bSGleb Smirnoff } 13725e4bc63bSGleb Smirnoff 13735e4bc63bSGleb Smirnoff /* 13745e4bc63bSGleb Smirnoff * m_getjcl() returns an mbuf with a cluster of the specified size attached. 13755e4bc63bSGleb Smirnoff * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 13765e4bc63bSGleb Smirnoff */ 13775e4bc63bSGleb Smirnoff struct mbuf * 13785e4bc63bSGleb Smirnoff m_getjcl(int how, short type, int flags, int size) 13795e4bc63bSGleb Smirnoff { 13805e4bc63bSGleb Smirnoff struct mb_args args; 13815e4bc63bSGleb Smirnoff struct mbuf *m, *n; 13825e4bc63bSGleb Smirnoff uma_zone_t zone; 13835e4bc63bSGleb Smirnoff 13845e4bc63bSGleb Smirnoff if (size == MCLBYTES) 13855e4bc63bSGleb Smirnoff return m_getcl(how, type, flags); 13865e4bc63bSGleb Smirnoff 13875e4bc63bSGleb Smirnoff args.flags = flags; 13885e4bc63bSGleb Smirnoff args.type = type; 13895e4bc63bSGleb Smirnoff 13905e4bc63bSGleb Smirnoff m = uma_zalloc_arg(zone_mbuf, &args, how); 13915e4bc63bSGleb Smirnoff if (m == NULL) 13925e4bc63bSGleb Smirnoff return (NULL); 13935e4bc63bSGleb Smirnoff 13945e4bc63bSGleb Smirnoff zone = m_getzone(size); 13955e4bc63bSGleb Smirnoff n = uma_zalloc_arg(zone, m, how); 13965e4bc63bSGleb Smirnoff if (n == NULL) { 13975e4bc63bSGleb Smirnoff uma_zfree(zone_mbuf, m); 13985e4bc63bSGleb Smirnoff return (NULL); 13995e4bc63bSGleb Smirnoff } 1400*edde7a53SAndrey V. Elsukov MBUF_PROBE5(m__getjcl, how, type, flags, size, m); 14015e4bc63bSGleb Smirnoff return (m); 14025e4bc63bSGleb Smirnoff } 14035e4bc63bSGleb Smirnoff 14045e4bc63bSGleb Smirnoff /* 14055e4bc63bSGleb Smirnoff * Allocate a given length worth of mbufs and/or clusters (whatever fits 14065e4bc63bSGleb Smirnoff * best) and return a pointer to the top of the allocated chain. If an 14075e4bc63bSGleb Smirnoff * existing mbuf chain is provided, then we will append the new chain 140858c43838SAndriy Voskoboinyk * to the existing one and return a pointer to the provided mbuf. 14095e4bc63bSGleb Smirnoff */ 14105e4bc63bSGleb Smirnoff struct mbuf * 14115e4bc63bSGleb Smirnoff m_getm2(struct mbuf *m, int len, int how, short type, int flags) 14125e4bc63bSGleb Smirnoff { 14135e4bc63bSGleb Smirnoff struct mbuf *mb, *nm = NULL, *mtail = NULL; 14145e4bc63bSGleb Smirnoff 14155e4bc63bSGleb Smirnoff KASSERT(len >= 0, ("%s: len is < 0", __func__)); 14165e4bc63bSGleb Smirnoff 14175e4bc63bSGleb Smirnoff /* Validate flags. */ 14185e4bc63bSGleb Smirnoff flags &= (M_PKTHDR | M_EOR); 14195e4bc63bSGleb Smirnoff 14205e4bc63bSGleb Smirnoff /* Packet header mbuf must be first in chain. */ 14215e4bc63bSGleb Smirnoff if ((flags & M_PKTHDR) && m != NULL) 14225e4bc63bSGleb Smirnoff flags &= ~M_PKTHDR; 14235e4bc63bSGleb Smirnoff 14245e4bc63bSGleb Smirnoff /* Loop and append maximum sized mbufs to the chain tail. */ 14255e4bc63bSGleb Smirnoff while (len > 0) { 14265e4bc63bSGleb Smirnoff if (len > MCLBYTES) 14275e4bc63bSGleb Smirnoff mb = m_getjcl(how, type, (flags & M_PKTHDR), 14285e4bc63bSGleb Smirnoff MJUMPAGESIZE); 14295e4bc63bSGleb Smirnoff else if (len >= MINCLSIZE) 14305e4bc63bSGleb Smirnoff mb = m_getcl(how, type, (flags & M_PKTHDR)); 14315e4bc63bSGleb Smirnoff else if (flags & M_PKTHDR) 14325e4bc63bSGleb Smirnoff mb = m_gethdr(how, type); 14335e4bc63bSGleb Smirnoff else 14345e4bc63bSGleb Smirnoff mb = m_get(how, type); 14355e4bc63bSGleb Smirnoff 14365e4bc63bSGleb Smirnoff /* Fail the whole operation if one mbuf can't be allocated. */ 14375e4bc63bSGleb Smirnoff if (mb == NULL) { 14385e4bc63bSGleb Smirnoff if (nm != NULL) 14395e4bc63bSGleb Smirnoff m_freem(nm); 14405e4bc63bSGleb Smirnoff return (NULL); 14415e4bc63bSGleb Smirnoff } 14425e4bc63bSGleb Smirnoff 14435e4bc63bSGleb Smirnoff /* Book keeping. */ 14445e4bc63bSGleb Smirnoff len -= M_SIZE(mb); 14455e4bc63bSGleb Smirnoff if (mtail != NULL) 14465e4bc63bSGleb Smirnoff mtail->m_next = mb; 14475e4bc63bSGleb Smirnoff else 14485e4bc63bSGleb Smirnoff nm = mb; 14495e4bc63bSGleb Smirnoff mtail = mb; 14505e4bc63bSGleb Smirnoff flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 14515e4bc63bSGleb Smirnoff } 14525e4bc63bSGleb Smirnoff if (flags & M_EOR) 14535e4bc63bSGleb Smirnoff mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ 14545e4bc63bSGleb Smirnoff 14555e4bc63bSGleb Smirnoff /* If mbuf was supplied, append new chain to the end of it. */ 14565e4bc63bSGleb Smirnoff if (m != NULL) { 14575e4bc63bSGleb Smirnoff for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) 14585e4bc63bSGleb Smirnoff ; 14595e4bc63bSGleb Smirnoff mtail->m_next = nm; 14605e4bc63bSGleb Smirnoff mtail->m_flags &= ~M_EOR; 14615e4bc63bSGleb Smirnoff } else 14625e4bc63bSGleb Smirnoff m = nm; 14635e4bc63bSGleb Smirnoff 14645e4bc63bSGleb Smirnoff return (m); 14655e4bc63bSGleb Smirnoff } 14665e4bc63bSGleb Smirnoff 14675e4bc63bSGleb Smirnoff /*- 14685e4bc63bSGleb Smirnoff * Configure a provided mbuf to refer to the provided external storage 146956a5f52eSGleb Smirnoff * buffer and setup a reference count for said buffer. 14705e4bc63bSGleb Smirnoff * 14715e4bc63bSGleb Smirnoff * Arguments: 14725e4bc63bSGleb Smirnoff * mb The existing mbuf to which to attach the provided buffer. 14735e4bc63bSGleb Smirnoff * buf The address of the provided external storage buffer. 14745e4bc63bSGleb Smirnoff * size The size of the provided buffer. 14755e4bc63bSGleb Smirnoff * freef A pointer to a routine that is responsible for freeing the 14765e4bc63bSGleb Smirnoff * provided external storage buffer. 14775e4bc63bSGleb Smirnoff * args A pointer to an argument structure (of any type) to be passed 14785e4bc63bSGleb Smirnoff * to the provided freef routine (may be NULL). 14795e4bc63bSGleb Smirnoff * flags Any other flags to be passed to the provided mbuf. 14805e4bc63bSGleb Smirnoff * type The type that the external storage buffer should be 14815e4bc63bSGleb Smirnoff * labeled with. 14825e4bc63bSGleb Smirnoff * 14835e4bc63bSGleb Smirnoff * Returns: 14845e4bc63bSGleb Smirnoff * Nothing. 14855e4bc63bSGleb Smirnoff */ 148656a5f52eSGleb Smirnoff void 1487e8fd18f3SGleb Smirnoff m_extadd(struct mbuf *mb, char *buf, u_int size, m_ext_free_t freef, 1488e8fd18f3SGleb Smirnoff void *arg1, void *arg2, int flags, int type) 14895e4bc63bSGleb Smirnoff { 149056a5f52eSGleb Smirnoff 14915e4bc63bSGleb Smirnoff KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); 14925e4bc63bSGleb Smirnoff 14935e4bc63bSGleb Smirnoff mb->m_flags |= (M_EXT | flags); 14945e4bc63bSGleb Smirnoff mb->m_ext.ext_buf = buf; 14955e4bc63bSGleb Smirnoff mb->m_data = mb->m_ext.ext_buf; 14965e4bc63bSGleb Smirnoff mb->m_ext.ext_size = size; 14975e4bc63bSGleb Smirnoff mb->m_ext.ext_free = freef; 14985e4bc63bSGleb Smirnoff mb->m_ext.ext_arg1 = arg1; 14995e4bc63bSGleb Smirnoff mb->m_ext.ext_arg2 = arg2; 15005e4bc63bSGleb Smirnoff mb->m_ext.ext_type = type; 15015e4bc63bSGleb Smirnoff 150256a5f52eSGleb Smirnoff if (type != EXT_EXTREF) { 150356a5f52eSGleb Smirnoff mb->m_ext.ext_count = 1; 150456a5f52eSGleb Smirnoff mb->m_ext.ext_flags = EXT_FLAG_EMBREF; 150556a5f52eSGleb Smirnoff } else 150656a5f52eSGleb Smirnoff mb->m_ext.ext_flags = 0; 15075e4bc63bSGleb Smirnoff } 15085e4bc63bSGleb Smirnoff 15095e4bc63bSGleb Smirnoff /* 15105e4bc63bSGleb Smirnoff * Free an entire chain of mbufs and associated external buffers, if 15115e4bc63bSGleb Smirnoff * applicable. 15125e4bc63bSGleb Smirnoff */ 15135e4bc63bSGleb Smirnoff void 15145e4bc63bSGleb Smirnoff m_freem(struct mbuf *mb) 15155e4bc63bSGleb Smirnoff { 15165e4bc63bSGleb Smirnoff 1517c8f59118SGleb Smirnoff MBUF_PROBE1(m__freem, mb); 15185e4bc63bSGleb Smirnoff while (mb != NULL) 15195e4bc63bSGleb Smirnoff mb = m_free(mb); 15205e4bc63bSGleb Smirnoff } 1521fb3bc596SJohn Baldwin 1522fb3bc596SJohn Baldwin void 1523fb3bc596SJohn Baldwin m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp) 1524fb3bc596SJohn Baldwin { 1525fb3bc596SJohn Baldwin 1526fb3bc596SJohn Baldwin if_ref(ifp); 1527fb3bc596SJohn Baldwin mst->ifp = ifp; 1528fb3bc596SJohn Baldwin refcount_init(&mst->refcount, 1); 1529fb3bc596SJohn Baldwin counter_u64_add(snd_tag_count, 1); 1530fb3bc596SJohn Baldwin } 1531fb3bc596SJohn Baldwin 1532fb3bc596SJohn Baldwin void 1533fb3bc596SJohn Baldwin m_snd_tag_destroy(struct m_snd_tag *mst) 1534fb3bc596SJohn Baldwin { 1535fb3bc596SJohn Baldwin struct ifnet *ifp; 1536fb3bc596SJohn Baldwin 1537fb3bc596SJohn Baldwin ifp = mst->ifp; 1538fb3bc596SJohn Baldwin ifp->if_snd_tag_free(mst); 1539fb3bc596SJohn Baldwin if_rele(ifp); 1540fb3bc596SJohn Baldwin counter_u64_add(snd_tag_count, -1); 1541fb3bc596SJohn Baldwin } 154284d746deSRick Macklem 154384d746deSRick Macklem /* 154484d746deSRick Macklem * Allocate an mbuf with anonymous external pages. 154584d746deSRick Macklem */ 154684d746deSRick Macklem struct mbuf * 154784d746deSRick Macklem mb_alloc_ext_plus_pages(int len, int how) 154884d746deSRick Macklem { 154984d746deSRick Macklem struct mbuf *m; 155084d746deSRick Macklem vm_page_t pg; 155184d746deSRick Macklem int i, npgs; 155284d746deSRick Macklem 155384d746deSRick Macklem m = mb_alloc_ext_pgs(how, mb_free_mext_pgs); 155484d746deSRick Macklem if (m == NULL) 155584d746deSRick Macklem return (NULL); 155684d746deSRick Macklem m->m_epg_flags |= EPG_FLAG_ANON; 155784d746deSRick Macklem npgs = howmany(len, PAGE_SIZE); 155884d746deSRick Macklem for (i = 0; i < npgs; i++) { 155984d746deSRick Macklem do { 156084d746deSRick Macklem pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 156184d746deSRick Macklem VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP | VM_ALLOC_WIRED); 156284d746deSRick Macklem if (pg == NULL) { 156384d746deSRick Macklem if (how == M_NOWAIT) { 156484d746deSRick Macklem m->m_epg_npgs = i; 156584d746deSRick Macklem m_free(m); 156684d746deSRick Macklem return (NULL); 156784d746deSRick Macklem } 156884d746deSRick Macklem vm_wait(NULL); 156984d746deSRick Macklem } 157084d746deSRick Macklem } while (pg == NULL); 157184d746deSRick Macklem m->m_epg_pa[i] = VM_PAGE_TO_PHYS(pg); 157284d746deSRick Macklem } 157384d746deSRick Macklem m->m_epg_npgs = npgs; 157484d746deSRick Macklem return (m); 157584d746deSRick Macklem } 157684d746deSRick Macklem 157784d746deSRick Macklem /* 157884d746deSRick Macklem * Copy the data in the mbuf chain to a chain of mbufs with anonymous external 157984d746deSRick Macklem * unmapped pages. 158084d746deSRick Macklem * len is the length of data in the input mbuf chain. 158184d746deSRick Macklem * mlen is the maximum number of bytes put into each ext_page mbuf. 158284d746deSRick Macklem */ 158384d746deSRick Macklem struct mbuf * 158484d746deSRick Macklem mb_mapped_to_unmapped(struct mbuf *mp, int len, int mlen, int how, 158584d746deSRick Macklem struct mbuf **mlast) 158684d746deSRick Macklem { 158784d746deSRick Macklem struct mbuf *m, *mout; 158884d746deSRick Macklem char *pgpos, *mbpos; 158984d746deSRick Macklem int i, mblen, mbufsiz, pglen, xfer; 159084d746deSRick Macklem 159184d746deSRick Macklem if (len == 0) 159284d746deSRick Macklem return (NULL); 159384d746deSRick Macklem mbufsiz = min(mlen, len); 159484d746deSRick Macklem m = mout = mb_alloc_ext_plus_pages(mbufsiz, how); 159584d746deSRick Macklem if (m == NULL) 159684d746deSRick Macklem return (m); 159784d746deSRick Macklem pgpos = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[0]); 159884d746deSRick Macklem pglen = PAGE_SIZE; 159984d746deSRick Macklem mblen = 0; 160084d746deSRick Macklem i = 0; 160184d746deSRick Macklem do { 160284d746deSRick Macklem if (pglen == 0) { 160384d746deSRick Macklem if (++i == m->m_epg_npgs) { 160484d746deSRick Macklem m->m_epg_last_len = PAGE_SIZE; 160584d746deSRick Macklem mbufsiz = min(mlen, len); 160684d746deSRick Macklem m->m_next = mb_alloc_ext_plus_pages(mbufsiz, 160784d746deSRick Macklem how); 160884d746deSRick Macklem m = m->m_next; 160984d746deSRick Macklem if (m == NULL) { 161084d746deSRick Macklem m_freem(mout); 161184d746deSRick Macklem return (m); 161284d746deSRick Macklem } 161384d746deSRick Macklem i = 0; 161484d746deSRick Macklem } 161584d746deSRick Macklem pgpos = (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[i]); 161684d746deSRick Macklem pglen = PAGE_SIZE; 161784d746deSRick Macklem } 161884d746deSRick Macklem while (mblen == 0) { 161984d746deSRick Macklem if (mp == NULL) { 162084d746deSRick Macklem m_freem(mout); 162184d746deSRick Macklem return (NULL); 162284d746deSRick Macklem } 162384d746deSRick Macklem KASSERT((mp->m_flags & M_EXTPG) == 0, 162484d746deSRick Macklem ("mb_copym_ext_pgs: ext_pgs input mbuf")); 162584d746deSRick Macklem mbpos = mtod(mp, char *); 162684d746deSRick Macklem mblen = mp->m_len; 162784d746deSRick Macklem mp = mp->m_next; 162884d746deSRick Macklem } 162984d746deSRick Macklem xfer = min(mblen, pglen); 163084d746deSRick Macklem memcpy(pgpos, mbpos, xfer); 163184d746deSRick Macklem pgpos += xfer; 163284d746deSRick Macklem mbpos += xfer; 163384d746deSRick Macklem pglen -= xfer; 163484d746deSRick Macklem mblen -= xfer; 163584d746deSRick Macklem len -= xfer; 163684d746deSRick Macklem m->m_len += xfer; 163784d746deSRick Macklem } while (len > 0); 163884d746deSRick Macklem m->m_epg_last_len = PAGE_SIZE - pglen; 163984d746deSRick Macklem if (mlast != NULL) 164084d746deSRick Macklem *mlast = m; 164184d746deSRick Macklem return (mout); 164284d746deSRick Macklem } 1643