1099a0e58SBosko Milekic /*- 28a36da99SPedro F. Giffuni * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 38a36da99SPedro F. Giffuni * 48076cb52SBosko Milekic * Copyright (c) 2004, 2005, 58076cb52SBosko Milekic * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 6099a0e58SBosko Milekic * 7099a0e58SBosko Milekic * Redistribution and use in source and binary forms, with or without 8099a0e58SBosko Milekic * modification, are permitted provided that the following conditions 9099a0e58SBosko Milekic * are met: 10099a0e58SBosko Milekic * 1. Redistributions of source code must retain the above copyright 11099a0e58SBosko Milekic * notice unmodified, this list of conditions and the following 12099a0e58SBosko Milekic * disclaimer. 13099a0e58SBosko Milekic * 2. Redistributions in binary form must reproduce the above copyright 14099a0e58SBosko Milekic * notice, this list of conditions and the following disclaimer in the 15099a0e58SBosko Milekic * documentation and/or other materials provided with the distribution. 16099a0e58SBosko Milekic * 17099a0e58SBosko Milekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18099a0e58SBosko Milekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19099a0e58SBosko Milekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20099a0e58SBosko Milekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21099a0e58SBosko Milekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22099a0e58SBosko Milekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23099a0e58SBosko Milekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24099a0e58SBosko Milekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25099a0e58SBosko Milekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26099a0e58SBosko Milekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27099a0e58SBosko Milekic * SUCH DAMAGE. 28099a0e58SBosko Milekic */ 29099a0e58SBosko Milekic 30099a0e58SBosko Milekic #include <sys/cdefs.h> 31099a0e58SBosko Milekic __FBSDID("$FreeBSD$"); 32099a0e58SBosko Milekic 33099a0e58SBosko Milekic #include "opt_param.h" 34099a0e58SBosko Milekic 35099a0e58SBosko Milekic #include <sys/param.h> 363937ee75SConrad Meyer #include <sys/conf.h> 379978bd99SMark Johnston #include <sys/domainset.h> 38099a0e58SBosko Milekic #include <sys/malloc.h> 39099a0e58SBosko Milekic #include <sys/systm.h> 40099a0e58SBosko Milekic #include <sys/mbuf.h> 41099a0e58SBosko Milekic #include <sys/domain.h> 42099a0e58SBosko Milekic #include <sys/eventhandler.h> 43099a0e58SBosko Milekic #include <sys/kernel.h> 445475ca5aSMark Johnston #include <sys/limits.h> 4554503a13SJonathan T. Looney #include <sys/lock.h> 4654503a13SJonathan T. Looney #include <sys/mutex.h> 47099a0e58SBosko Milekic #include <sys/protosw.h> 48*82334850SJohn Baldwin #include <sys/sf_buf.h> 49099a0e58SBosko Milekic #include <sys/smp.h> 50fb3bc596SJohn Baldwin #include <sys/socket.h> 51099a0e58SBosko Milekic #include <sys/sysctl.h> 52099a0e58SBosko Milekic 53fb3bc596SJohn Baldwin #include <net/if.h> 54fb3bc596SJohn Baldwin #include <net/if_var.h> 55fb3bc596SJohn Baldwin 56099a0e58SBosko Milekic #include <vm/vm.h> 57c45c0034SAlan Cox #include <vm/vm_extern.h> 58c45c0034SAlan Cox #include <vm/vm_kern.h> 59099a0e58SBosko Milekic #include <vm/vm_page.h> 6037140716SAndre Oppermann #include <vm/vm_map.h> 61099a0e58SBosko Milekic #include <vm/uma.h> 62121f0509SMike Silbersack #include <vm/uma_dbg.h> 63099a0e58SBosko Milekic 64099a0e58SBosko Milekic /* 65099a0e58SBosko Milekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 66099a0e58SBosko Milekic * Zones. 67099a0e58SBosko Milekic * 68099a0e58SBosko Milekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 69099a0e58SBosko Milekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 70099a0e58SBosko Milekic * administrator so desires. 71099a0e58SBosko Milekic * 72099a0e58SBosko Milekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 73099a0e58SBosko Milekic * Zone. 74099a0e58SBosko Milekic * 75099a0e58SBosko Milekic * Additionally, FreeBSD provides a Packet Zone, which it 76099a0e58SBosko Milekic * configures as a Secondary Zone to the Mbuf Master Zone, 77099a0e58SBosko Milekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 78099a0e58SBosko Milekic * 79099a0e58SBosko Milekic * Thus common-case allocations and locking are simplified: 80099a0e58SBosko Milekic * 81099a0e58SBosko Milekic * m_clget() m_getcl() 82099a0e58SBosko Milekic * | | 83099a0e58SBosko Milekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 84099a0e58SBosko Milekic * | | [ Packet ] | 85099a0e58SBosko Milekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 86099a0e58SBosko Milekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 87099a0e58SBosko Milekic * | \________ | 88099a0e58SBosko Milekic * [ Cluster Keg ] \ / 89099a0e58SBosko Milekic * | [ Mbuf Keg ] 90099a0e58SBosko Milekic * [ Cluster Slabs ] | 91099a0e58SBosko Milekic * | [ Mbuf Slabs ] 92099a0e58SBosko Milekic * \____________(VM)_________________/ 9356a4e45aSAndre Oppermann * 9456a4e45aSAndre Oppermann * 95fcf90618SGleb Smirnoff * Whenever an object is allocated with uma_zalloc() out of 9656a4e45aSAndre Oppermann * one of the Zones its _ctor_ function is executed. The same 97fcf90618SGleb Smirnoff * for any deallocation through uma_zfree() the _dtor_ function 9856a4e45aSAndre Oppermann * is executed. 9956a4e45aSAndre Oppermann * 10056a4e45aSAndre Oppermann * Caches are per-CPU and are filled from the Master Zone. 10156a4e45aSAndre Oppermann * 102fcf90618SGleb Smirnoff * Whenever an object is allocated from the underlying global 10356a4e45aSAndre Oppermann * memory pool it gets pre-initialized with the _zinit_ functions. 104e3043798SPedro F. Giffuni * When the Keg's are overfull objects get decommissioned with 10556a4e45aSAndre Oppermann * _zfini_ functions and free'd back to the global memory pool. 10656a4e45aSAndre Oppermann * 107099a0e58SBosko Milekic */ 108099a0e58SBosko Milekic 109ead46972SAndre Oppermann int nmbufs; /* limits number of mbufs */ 11056a4e45aSAndre Oppermann int nmbclusters; /* limits number of mbuf clusters */ 111ec63cb90SAndre Oppermann int nmbjumbop; /* limits number of page size jumbo clusters */ 11256a4e45aSAndre Oppermann int nmbjumbo9; /* limits number of 9k jumbo clusters */ 11356a4e45aSAndre Oppermann int nmbjumbo16; /* limits number of 16k jumbo clusters */ 114099a0e58SBosko Milekic 115e0c00addSAndre Oppermann static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 116e0c00addSAndre Oppermann 117af3b2549SHans Petter Selasky SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, 118b6f49c23SHiren Panchasara "Maximum real memory allocatable to various mbuf types"); 119e0c00addSAndre Oppermann 120fb3bc596SJohn Baldwin static counter_u64_t snd_tag_count; 121fb3bc596SJohn Baldwin SYSCTL_COUNTER_U64(_kern_ipc, OID_AUTO, num_snd_tags, CTLFLAG_RW, 122fb3bc596SJohn Baldwin &snd_tag_count, "# of active mbuf send tags"); 123fb3bc596SJohn Baldwin 12462938659SBjoern A. Zeeb /* 12537140716SAndre Oppermann * tunable_mbinit() has to be run before any mbuf allocations are done. 12662938659SBjoern A. Zeeb */ 127099a0e58SBosko Milekic static void 128099a0e58SBosko Milekic tunable_mbinit(void *dummy) 129099a0e58SBosko Milekic { 130e0c00addSAndre Oppermann quad_t realmem; 13137140716SAndre Oppermann 13237140716SAndre Oppermann /* 13337140716SAndre Oppermann * The default limit for all mbuf related memory is 1/2 of all 13437140716SAndre Oppermann * available kernel memory (physical or kmem). 13537140716SAndre Oppermann * At most it can be 3/4 of available kernel memory. 13637140716SAndre Oppermann */ 1375df87b21SJeff Roberson realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 13837140716SAndre Oppermann maxmbufmem = realmem / 2; 139e0c00addSAndre Oppermann TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 14037140716SAndre Oppermann if (maxmbufmem > realmem / 4 * 3) 14137140716SAndre Oppermann maxmbufmem = realmem / 4 * 3; 142099a0e58SBosko Milekic 143812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 144416a434cSAndre Oppermann if (nmbclusters == 0) 145416a434cSAndre Oppermann nmbclusters = maxmbufmem / MCLBYTES / 4; 146812302c3SNavdeep Parhar 147812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 148812302c3SNavdeep Parhar if (nmbjumbop == 0) 149416a434cSAndre Oppermann nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 150812302c3SNavdeep Parhar 151812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 152812302c3SNavdeep Parhar if (nmbjumbo9 == 0) 153416a434cSAndre Oppermann nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 154812302c3SNavdeep Parhar 155812302c3SNavdeep Parhar TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 156812302c3SNavdeep Parhar if (nmbjumbo16 == 0) 157416a434cSAndre Oppermann nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 158416a434cSAndre Oppermann 159416a434cSAndre Oppermann /* 160416a434cSAndre Oppermann * We need at least as many mbufs as we have clusters of 161416a434cSAndre Oppermann * the various types added together. 162416a434cSAndre Oppermann */ 163416a434cSAndre Oppermann TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 164416a434cSAndre Oppermann if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 165416a434cSAndre Oppermann nmbufs = lmax(maxmbufmem / MSIZE / 5, 166416a434cSAndre Oppermann nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 167099a0e58SBosko Milekic } 16837140716SAndre Oppermann SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 169099a0e58SBosko Milekic 1704f590175SPaul Saab static int 1714f590175SPaul Saab sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 1724f590175SPaul Saab { 1734f590175SPaul Saab int error, newnmbclusters; 1744f590175SPaul Saab 1754f590175SPaul Saab newnmbclusters = nmbclusters; 176041b706bSDavid Malone error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 177d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { 178ead46972SAndre Oppermann if (newnmbclusters > nmbclusters && 179ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 1804f590175SPaul Saab nmbclusters = newnmbclusters; 181bc4a1b8cSAndre Oppermann nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 1824f590175SPaul Saab EVENTHANDLER_INVOKE(nmbclusters_change); 1834f590175SPaul Saab } else 1844f590175SPaul Saab error = EINVAL; 1854f590175SPaul Saab } 1864f590175SPaul Saab return (error); 1874f590175SPaul Saab } 1884f590175SPaul Saab SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 1894f590175SPaul Saab &nmbclusters, 0, sysctl_nmbclusters, "IU", 190099a0e58SBosko Milekic "Maximum number of mbuf clusters allowed"); 191cf70a46bSRandall Stewart 192cf70a46bSRandall Stewart static int 193cf70a46bSRandall Stewart sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 194cf70a46bSRandall Stewart { 195cf70a46bSRandall Stewart int error, newnmbjumbop; 196cf70a46bSRandall Stewart 197cf70a46bSRandall Stewart newnmbjumbop = nmbjumbop; 198cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 199d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { 200ead46972SAndre Oppermann if (newnmbjumbop > nmbjumbop && 201ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 202cf70a46bSRandall Stewart nmbjumbop = newnmbjumbop; 203bc4a1b8cSAndre Oppermann nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 204cf70a46bSRandall Stewart } else 205cf70a46bSRandall Stewart error = EINVAL; 206cf70a46bSRandall Stewart } 207cf70a46bSRandall Stewart return (error); 208cf70a46bSRandall Stewart } 209cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 210cf70a46bSRandall Stewart &nmbjumbop, 0, sysctl_nmbjumbop, "IU", 211ec63cb90SAndre Oppermann "Maximum number of mbuf page size jumbo clusters allowed"); 212cf70a46bSRandall Stewart 213cf70a46bSRandall Stewart static int 214cf70a46bSRandall Stewart sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 215cf70a46bSRandall Stewart { 216cf70a46bSRandall Stewart int error, newnmbjumbo9; 217cf70a46bSRandall Stewart 218cf70a46bSRandall Stewart newnmbjumbo9 = nmbjumbo9; 219cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 220d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { 221ead46972SAndre Oppermann if (newnmbjumbo9 > nmbjumbo9 && 222ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 223cf70a46bSRandall Stewart nmbjumbo9 = newnmbjumbo9; 224bc4a1b8cSAndre Oppermann nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 225cf70a46bSRandall Stewart } else 226cf70a46bSRandall Stewart error = EINVAL; 227cf70a46bSRandall Stewart } 228cf70a46bSRandall Stewart return (error); 229cf70a46bSRandall Stewart } 230cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 231cf70a46bSRandall Stewart &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 23256a4e45aSAndre Oppermann "Maximum number of mbuf 9k jumbo clusters allowed"); 233cf70a46bSRandall Stewart 234cf70a46bSRandall Stewart static int 235cf70a46bSRandall Stewart sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 236cf70a46bSRandall Stewart { 237cf70a46bSRandall Stewart int error, newnmbjumbo16; 238cf70a46bSRandall Stewart 239cf70a46bSRandall Stewart newnmbjumbo16 = nmbjumbo16; 240cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 241d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { 242ead46972SAndre Oppermann if (newnmbjumbo16 > nmbjumbo16 && 243ead46972SAndre Oppermann nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 244cf70a46bSRandall Stewart nmbjumbo16 = newnmbjumbo16; 245bc4a1b8cSAndre Oppermann nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 246cf70a46bSRandall Stewart } else 247cf70a46bSRandall Stewart error = EINVAL; 248cf70a46bSRandall Stewart } 249cf70a46bSRandall Stewart return (error); 250cf70a46bSRandall Stewart } 251cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 252cf70a46bSRandall Stewart &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 25356a4e45aSAndre Oppermann "Maximum number of mbuf 16k jumbo clusters allowed"); 254cf70a46bSRandall Stewart 255ead46972SAndre Oppermann static int 256ead46972SAndre Oppermann sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 257ead46972SAndre Oppermann { 258ead46972SAndre Oppermann int error, newnmbufs; 259ead46972SAndre Oppermann 260ead46972SAndre Oppermann newnmbufs = nmbufs; 261ead46972SAndre Oppermann error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 262d251e700SJohn Baldwin if (error == 0 && req->newptr && newnmbufs != nmbufs) { 263ead46972SAndre Oppermann if (newnmbufs > nmbufs) { 264ead46972SAndre Oppermann nmbufs = newnmbufs; 265bc4a1b8cSAndre Oppermann nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 266ead46972SAndre Oppermann EVENTHANDLER_INVOKE(nmbufs_change); 267ead46972SAndre Oppermann } else 268ead46972SAndre Oppermann error = EINVAL; 269ead46972SAndre Oppermann } 270ead46972SAndre Oppermann return (error); 271ead46972SAndre Oppermann } 272e0c00addSAndre Oppermann SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 273ead46972SAndre Oppermann &nmbufs, 0, sysctl_nmbufs, "IU", 274ead46972SAndre Oppermann "Maximum number of mbufs allowed"); 275cf70a46bSRandall Stewart 276099a0e58SBosko Milekic /* 277099a0e58SBosko Milekic * Zones from which we allocate. 278099a0e58SBosko Milekic */ 279099a0e58SBosko Milekic uma_zone_t zone_mbuf; 280099a0e58SBosko Milekic uma_zone_t zone_clust; 281099a0e58SBosko Milekic uma_zone_t zone_pack; 282ec63cb90SAndre Oppermann uma_zone_t zone_jumbop; 28356a4e45aSAndre Oppermann uma_zone_t zone_jumbo9; 28456a4e45aSAndre Oppermann uma_zone_t zone_jumbo16; 285*82334850SJohn Baldwin uma_zone_t zone_extpgs; 286099a0e58SBosko Milekic 287099a0e58SBosko Milekic /* 288099a0e58SBosko Milekic * Local prototypes. 289099a0e58SBosko Milekic */ 290b23f72e9SBrian Feldman static int mb_ctor_mbuf(void *, int, void *, int); 291b23f72e9SBrian Feldman static int mb_ctor_clust(void *, int, void *, int); 292b23f72e9SBrian Feldman static int mb_ctor_pack(void *, int, void *, int); 293099a0e58SBosko Milekic static void mb_dtor_mbuf(void *, int, void *); 29456a4e45aSAndre Oppermann static void mb_dtor_pack(void *, int, void *); 29556a4e45aSAndre Oppermann static int mb_zinit_pack(void *, int, int); 29656a4e45aSAndre Oppermann static void mb_zfini_pack(void *, int); 297e60b2fcbSGleb Smirnoff static void mb_reclaim(uma_zone_t, int); 298ab3185d1SJeff Roberson static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); 299099a0e58SBosko Milekic 30037140716SAndre Oppermann /* Ensure that MSIZE is a power of 2. */ 301a04946cfSBrian Somers CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 302a04946cfSBrian Somers 303*82334850SJohn Baldwin _Static_assert(sizeof(struct mbuf_ext_pgs) == 256, 304*82334850SJohn Baldwin "mbuf_ext_pgs size mismatch"); 305*82334850SJohn Baldwin 306099a0e58SBosko Milekic /* 307099a0e58SBosko Milekic * Initialize FreeBSD Network buffer allocation. 308099a0e58SBosko Milekic */ 309099a0e58SBosko Milekic static void 310099a0e58SBosko Milekic mbuf_init(void *dummy) 311099a0e58SBosko Milekic { 312099a0e58SBosko Milekic 313099a0e58SBosko Milekic /* 314099a0e58SBosko Milekic * Configure UMA zones for Mbufs, Clusters, and Packets. 315099a0e58SBosko Milekic */ 31656a4e45aSAndre Oppermann zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 31756a4e45aSAndre Oppermann mb_ctor_mbuf, mb_dtor_mbuf, 318121f0509SMike Silbersack #ifdef INVARIANTS 31956a4e45aSAndre Oppermann trash_init, trash_fini, 320121f0509SMike Silbersack #else 32156a4e45aSAndre Oppermann NULL, NULL, 322121f0509SMike Silbersack #endif 32356a4e45aSAndre Oppermann MSIZE - 1, UMA_ZONE_MAXBUCKET); 32445fe0bf7SPawel Jakub Dawidek if (nmbufs > 0) 32545fe0bf7SPawel Jakub Dawidek nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 3266e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 327e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_mbuf, mb_reclaim); 32856a4e45aSAndre Oppermann 32968352adfSRobert Watson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 33056a5f52eSGleb Smirnoff mb_ctor_clust, 331121f0509SMike Silbersack #ifdef INVARIANTS 33256a5f52eSGleb Smirnoff trash_dtor, trash_init, trash_fini, 333121f0509SMike Silbersack #else 33456a5f52eSGleb Smirnoff NULL, NULL, NULL, 335121f0509SMike Silbersack #endif 33656a5f52eSGleb Smirnoff UMA_ALIGN_PTR, 0); 33745fe0bf7SPawel Jakub Dawidek if (nmbclusters > 0) 33845fe0bf7SPawel Jakub Dawidek nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 3396e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 340e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_clust, mb_reclaim); 341099a0e58SBosko Milekic 34256a4e45aSAndre Oppermann zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 34356a4e45aSAndre Oppermann mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 34456a4e45aSAndre Oppermann 345fcf90618SGleb Smirnoff /* Make jumbo frame zone too. Page size, 9k and 16k. */ 346ec63cb90SAndre Oppermann zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 34756a5f52eSGleb Smirnoff mb_ctor_clust, 348d5269a63SAndre Oppermann #ifdef INVARIANTS 34956a5f52eSGleb Smirnoff trash_dtor, trash_init, trash_fini, 350d5269a63SAndre Oppermann #else 35156a5f52eSGleb Smirnoff NULL, NULL, NULL, 352d5269a63SAndre Oppermann #endif 35356a5f52eSGleb Smirnoff UMA_ALIGN_PTR, 0); 35445fe0bf7SPawel Jakub Dawidek if (nmbjumbop > 0) 35545fe0bf7SPawel Jakub Dawidek nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 3566e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 357e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_jumbop, mb_reclaim); 358d5269a63SAndre Oppermann 35956a4e45aSAndre Oppermann zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 36056a5f52eSGleb Smirnoff mb_ctor_clust, 36156a4e45aSAndre Oppermann #ifdef INVARIANTS 36256a5f52eSGleb Smirnoff trash_dtor, trash_init, trash_fini, 36356a4e45aSAndre Oppermann #else 36456a5f52eSGleb Smirnoff NULL, NULL, NULL, 36556a4e45aSAndre Oppermann #endif 36656a5f52eSGleb Smirnoff UMA_ALIGN_PTR, 0); 367ba63339aSAlan Cox uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 36845fe0bf7SPawel Jakub Dawidek if (nmbjumbo9 > 0) 36945fe0bf7SPawel Jakub Dawidek nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 3706e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 371e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); 37256a4e45aSAndre Oppermann 37356a4e45aSAndre Oppermann zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 37456a5f52eSGleb Smirnoff mb_ctor_clust, 37556a4e45aSAndre Oppermann #ifdef INVARIANTS 37656a5f52eSGleb Smirnoff trash_dtor, trash_init, trash_fini, 37756a4e45aSAndre Oppermann #else 37856a5f52eSGleb Smirnoff NULL, NULL, NULL, 37956a4e45aSAndre Oppermann #endif 38056a5f52eSGleb Smirnoff UMA_ALIGN_PTR, 0); 381ba63339aSAlan Cox uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 38245fe0bf7SPawel Jakub Dawidek if (nmbjumbo16 > 0) 38345fe0bf7SPawel Jakub Dawidek nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 3846e0b6746SPawel Jakub Dawidek uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 385e60b2fcbSGleb Smirnoff uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); 38656a4e45aSAndre Oppermann 387*82334850SJohn Baldwin zone_extpgs = uma_zcreate(MBUF_EXTPGS_MEM_NAME, 388*82334850SJohn Baldwin sizeof(struct mbuf_ext_pgs), 389*82334850SJohn Baldwin #ifdef INVARIANTS 390*82334850SJohn Baldwin trash_ctor, trash_dtor, trash_init, trash_fini, 391*82334850SJohn Baldwin #else 392*82334850SJohn Baldwin NULL, NULL, NULL, NULL, 393*82334850SJohn Baldwin #endif 394*82334850SJohn Baldwin UMA_ALIGN_CACHE, 0); 395*82334850SJohn Baldwin 396099a0e58SBosko Milekic /* 397099a0e58SBosko Milekic * Hook event handler for low-memory situation, used to 398099a0e58SBosko Milekic * drain protocols and push data back to the caches (UMA 399099a0e58SBosko Milekic * later pushes it back to VM). 400099a0e58SBosko Milekic */ 401099a0e58SBosko Milekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 402099a0e58SBosko Milekic EVENTHANDLER_PRI_FIRST); 403fb3bc596SJohn Baldwin 404fb3bc596SJohn Baldwin snd_tag_count = counter_u64_alloc(M_WAITOK); 405099a0e58SBosko Milekic } 40637140716SAndre Oppermann SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 407099a0e58SBosko Milekic 4085475ca5aSMark Johnston #ifdef NETDUMP 4095475ca5aSMark Johnston /* 4105475ca5aSMark Johnston * netdump makes use of a pre-allocated pool of mbufs and clusters. When 4115475ca5aSMark Johnston * netdump is configured, we initialize a set of UMA cache zones which return 4125475ca5aSMark Johnston * items from this pool. At panic-time, the regular UMA zone pointers are 4135475ca5aSMark Johnston * overwritten with those of the cache zones so that drivers may allocate and 4145475ca5aSMark Johnston * free mbufs and clusters without attempting to allocate physical memory. 4155475ca5aSMark Johnston * 4165475ca5aSMark Johnston * We keep mbufs and clusters in a pair of mbuf queues. In particular, for 4175475ca5aSMark Johnston * the purpose of caching clusters, we treat them as mbufs. 4185475ca5aSMark Johnston */ 4195475ca5aSMark Johnston static struct mbufq nd_mbufq = 4205475ca5aSMark Johnston { STAILQ_HEAD_INITIALIZER(nd_mbufq.mq_head), 0, INT_MAX }; 4215475ca5aSMark Johnston static struct mbufq nd_clustq = 4225475ca5aSMark Johnston { STAILQ_HEAD_INITIALIZER(nd_clustq.mq_head), 0, INT_MAX }; 4235475ca5aSMark Johnston 4245475ca5aSMark Johnston static int nd_clsize; 4255475ca5aSMark Johnston static uma_zone_t nd_zone_mbuf; 4265475ca5aSMark Johnston static uma_zone_t nd_zone_clust; 4275475ca5aSMark Johnston static uma_zone_t nd_zone_pack; 4285475ca5aSMark Johnston 4295475ca5aSMark Johnston static int 4305475ca5aSMark Johnston nd_buf_import(void *arg, void **store, int count, int domain __unused, 4315475ca5aSMark Johnston int flags) 4325475ca5aSMark Johnston { 4335475ca5aSMark Johnston struct mbufq *q; 4345475ca5aSMark Johnston struct mbuf *m; 4355475ca5aSMark Johnston int i; 4365475ca5aSMark Johnston 4375475ca5aSMark Johnston q = arg; 4385475ca5aSMark Johnston 4395475ca5aSMark Johnston for (i = 0; i < count; i++) { 4405475ca5aSMark Johnston m = mbufq_dequeue(q); 4415475ca5aSMark Johnston if (m == NULL) 4425475ca5aSMark Johnston break; 4435475ca5aSMark Johnston trash_init(m, q == &nd_mbufq ? MSIZE : nd_clsize, flags); 4445475ca5aSMark Johnston store[i] = m; 4455475ca5aSMark Johnston } 4460d1467b1SConrad Meyer KASSERT((flags & M_WAITOK) == 0 || i == count, 4470d1467b1SConrad Meyer ("%s: ran out of pre-allocated mbufs", __func__)); 4485475ca5aSMark Johnston return (i); 4495475ca5aSMark Johnston } 4505475ca5aSMark Johnston 4515475ca5aSMark Johnston static void 4525475ca5aSMark Johnston nd_buf_release(void *arg, void **store, int count) 4535475ca5aSMark Johnston { 4545475ca5aSMark Johnston struct mbufq *q; 4555475ca5aSMark Johnston struct mbuf *m; 4565475ca5aSMark Johnston int i; 4575475ca5aSMark Johnston 4585475ca5aSMark Johnston q = arg; 4595475ca5aSMark Johnston 4605475ca5aSMark Johnston for (i = 0; i < count; i++) { 4615475ca5aSMark Johnston m = store[i]; 4625475ca5aSMark Johnston (void)mbufq_enqueue(q, m); 4635475ca5aSMark Johnston } 4645475ca5aSMark Johnston } 4655475ca5aSMark Johnston 4665475ca5aSMark Johnston static int 4675475ca5aSMark Johnston nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, 4685475ca5aSMark Johnston int flags __unused) 4695475ca5aSMark Johnston { 4705475ca5aSMark Johnston struct mbuf *m; 4715475ca5aSMark Johnston void *clust; 4725475ca5aSMark Johnston int i; 4735475ca5aSMark Johnston 4745475ca5aSMark Johnston for (i = 0; i < count; i++) { 4755475ca5aSMark Johnston m = m_get(MT_DATA, M_NOWAIT); 4765475ca5aSMark Johnston if (m == NULL) 4775475ca5aSMark Johnston break; 4785475ca5aSMark Johnston clust = uma_zalloc(nd_zone_clust, M_NOWAIT); 4795475ca5aSMark Johnston if (clust == NULL) { 4805475ca5aSMark Johnston m_free(m); 4815475ca5aSMark Johnston break; 4825475ca5aSMark Johnston } 4835475ca5aSMark Johnston mb_ctor_clust(clust, nd_clsize, m, 0); 4845475ca5aSMark Johnston store[i] = m; 4855475ca5aSMark Johnston } 4860d1467b1SConrad Meyer KASSERT((flags & M_WAITOK) == 0 || i == count, 4870d1467b1SConrad Meyer ("%s: ran out of pre-allocated mbufs", __func__)); 4885475ca5aSMark Johnston return (i); 4895475ca5aSMark Johnston } 4905475ca5aSMark Johnston 4915475ca5aSMark Johnston static void 4925475ca5aSMark Johnston nd_pack_release(void *arg __unused, void **store, int count) 4935475ca5aSMark Johnston { 4945475ca5aSMark Johnston struct mbuf *m; 4955475ca5aSMark Johnston void *clust; 4965475ca5aSMark Johnston int i; 4975475ca5aSMark Johnston 4985475ca5aSMark Johnston for (i = 0; i < count; i++) { 4995475ca5aSMark Johnston m = store[i]; 5005475ca5aSMark Johnston clust = m->m_ext.ext_buf; 5015475ca5aSMark Johnston uma_zfree(nd_zone_clust, clust); 5025475ca5aSMark Johnston uma_zfree(nd_zone_mbuf, m); 5035475ca5aSMark Johnston } 5045475ca5aSMark Johnston } 5055475ca5aSMark Johnston 5065475ca5aSMark Johnston /* 5075475ca5aSMark Johnston * Free the pre-allocated mbufs and clusters reserved for netdump, and destroy 5085475ca5aSMark Johnston * the corresponding UMA cache zones. 5095475ca5aSMark Johnston */ 5105475ca5aSMark Johnston void 5115475ca5aSMark Johnston netdump_mbuf_drain(void) 5125475ca5aSMark Johnston { 5135475ca5aSMark Johnston struct mbuf *m; 5145475ca5aSMark Johnston void *item; 5155475ca5aSMark Johnston 5165475ca5aSMark Johnston if (nd_zone_mbuf != NULL) { 5175475ca5aSMark Johnston uma_zdestroy(nd_zone_mbuf); 5185475ca5aSMark Johnston nd_zone_mbuf = NULL; 5195475ca5aSMark Johnston } 5205475ca5aSMark Johnston if (nd_zone_clust != NULL) { 5215475ca5aSMark Johnston uma_zdestroy(nd_zone_clust); 5225475ca5aSMark Johnston nd_zone_clust = NULL; 5235475ca5aSMark Johnston } 5245475ca5aSMark Johnston if (nd_zone_pack != NULL) { 5255475ca5aSMark Johnston uma_zdestroy(nd_zone_pack); 5265475ca5aSMark Johnston nd_zone_pack = NULL; 5275475ca5aSMark Johnston } 5285475ca5aSMark Johnston 5295475ca5aSMark Johnston while ((m = mbufq_dequeue(&nd_mbufq)) != NULL) 5305475ca5aSMark Johnston m_free(m); 5315475ca5aSMark Johnston while ((item = mbufq_dequeue(&nd_clustq)) != NULL) 5325475ca5aSMark Johnston uma_zfree(m_getzone(nd_clsize), item); 5335475ca5aSMark Johnston } 5345475ca5aSMark Johnston 5355475ca5aSMark Johnston /* 5365475ca5aSMark Johnston * Callback invoked immediately prior to starting a netdump. 5375475ca5aSMark Johnston */ 5385475ca5aSMark Johnston void 5395475ca5aSMark Johnston netdump_mbuf_dump(void) 5405475ca5aSMark Johnston { 5415475ca5aSMark Johnston 5425475ca5aSMark Johnston /* 5435475ca5aSMark Johnston * All cluster zones return buffers of the size requested by the 5445475ca5aSMark Johnston * drivers. It's up to the driver to reinitialize the zones if the 5455475ca5aSMark Johnston * MTU of a netdump-enabled interface changes. 5465475ca5aSMark Johnston */ 5475475ca5aSMark Johnston printf("netdump: overwriting mbuf zone pointers\n"); 5485475ca5aSMark Johnston zone_mbuf = nd_zone_mbuf; 5495475ca5aSMark Johnston zone_clust = nd_zone_clust; 5505475ca5aSMark Johnston zone_pack = nd_zone_pack; 5515475ca5aSMark Johnston zone_jumbop = nd_zone_clust; 5525475ca5aSMark Johnston zone_jumbo9 = nd_zone_clust; 5535475ca5aSMark Johnston zone_jumbo16 = nd_zone_clust; 5545475ca5aSMark Johnston } 5555475ca5aSMark Johnston 5565475ca5aSMark Johnston /* 5575475ca5aSMark Johnston * Reinitialize the netdump mbuf+cluster pool and cache zones. 5585475ca5aSMark Johnston */ 5595475ca5aSMark Johnston void 5605475ca5aSMark Johnston netdump_mbuf_reinit(int nmbuf, int nclust, int clsize) 5615475ca5aSMark Johnston { 5625475ca5aSMark Johnston struct mbuf *m; 5635475ca5aSMark Johnston void *item; 5645475ca5aSMark Johnston 5655475ca5aSMark Johnston netdump_mbuf_drain(); 5665475ca5aSMark Johnston 5675475ca5aSMark Johnston nd_clsize = clsize; 5685475ca5aSMark Johnston 5695475ca5aSMark Johnston nd_zone_mbuf = uma_zcache_create("netdump_" MBUF_MEM_NAME, 5705475ca5aSMark Johnston MSIZE, mb_ctor_mbuf, mb_dtor_mbuf, 5715475ca5aSMark Johnston #ifdef INVARIANTS 5725475ca5aSMark Johnston trash_init, trash_fini, 5735475ca5aSMark Johnston #else 5745475ca5aSMark Johnston NULL, NULL, 5755475ca5aSMark Johnston #endif 5765475ca5aSMark Johnston nd_buf_import, nd_buf_release, 5775475ca5aSMark Johnston &nd_mbufq, UMA_ZONE_NOBUCKET); 5785475ca5aSMark Johnston 5795475ca5aSMark Johnston nd_zone_clust = uma_zcache_create("netdump_" MBUF_CLUSTER_MEM_NAME, 5805475ca5aSMark Johnston clsize, mb_ctor_clust, 5815475ca5aSMark Johnston #ifdef INVARIANTS 5825475ca5aSMark Johnston trash_dtor, trash_init, trash_fini, 5835475ca5aSMark Johnston #else 5845475ca5aSMark Johnston NULL, NULL, NULL, 5855475ca5aSMark Johnston #endif 5865475ca5aSMark Johnston nd_buf_import, nd_buf_release, 5875475ca5aSMark Johnston &nd_clustq, UMA_ZONE_NOBUCKET); 5885475ca5aSMark Johnston 5895475ca5aSMark Johnston nd_zone_pack = uma_zcache_create("netdump_" MBUF_PACKET_MEM_NAME, 5905475ca5aSMark Johnston MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL, 5915475ca5aSMark Johnston nd_pack_import, nd_pack_release, 5925475ca5aSMark Johnston NULL, UMA_ZONE_NOBUCKET); 5935475ca5aSMark Johnston 5945475ca5aSMark Johnston while (nmbuf-- > 0) { 5955475ca5aSMark Johnston m = m_get(MT_DATA, M_WAITOK); 5965475ca5aSMark Johnston uma_zfree(nd_zone_mbuf, m); 5975475ca5aSMark Johnston } 5985475ca5aSMark Johnston while (nclust-- > 0) { 5995475ca5aSMark Johnston item = uma_zalloc(m_getzone(nd_clsize), M_WAITOK); 6005475ca5aSMark Johnston uma_zfree(nd_zone_clust, item); 6015475ca5aSMark Johnston } 6025475ca5aSMark Johnston } 6035475ca5aSMark Johnston #endif /* NETDUMP */ 6045475ca5aSMark Johnston 605099a0e58SBosko Milekic /* 606ba63339aSAlan Cox * UMA backend page allocator for the jumbo frame zones. 607ba63339aSAlan Cox * 608ba63339aSAlan Cox * Allocates kernel virtual memory that is backed by contiguous physical 609ba63339aSAlan Cox * pages. 610ba63339aSAlan Cox */ 611ba63339aSAlan Cox static void * 612ab3185d1SJeff Roberson mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, 613ab3185d1SJeff Roberson int wait) 614ba63339aSAlan Cox { 615ba63339aSAlan Cox 6167630c265SAlan Cox /* Inform UMA that this allocator uses kernel_map/object. */ 6177630c265SAlan Cox *flags = UMA_SLAB_KERNEL; 6189978bd99SMark Johnston return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain), 6199978bd99SMark Johnston bytes, wait, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, 6209978bd99SMark Johnston VM_MEMATTR_DEFAULT)); 621ba63339aSAlan Cox } 622ba63339aSAlan Cox 623ba63339aSAlan Cox /* 624099a0e58SBosko Milekic * Constructor for Mbuf master zone. 625099a0e58SBosko Milekic * 626099a0e58SBosko Milekic * The 'arg' pointer points to a mb_args structure which 627099a0e58SBosko Milekic * contains call-specific information required to support the 62856a4e45aSAndre Oppermann * mbuf allocation API. See mbuf.h. 629099a0e58SBosko Milekic */ 630b23f72e9SBrian Feldman static int 631b23f72e9SBrian Feldman mb_ctor_mbuf(void *mem, int size, void *arg, int how) 632099a0e58SBosko Milekic { 633099a0e58SBosko Milekic struct mbuf *m; 634099a0e58SBosko Milekic struct mb_args *args; 635b23f72e9SBrian Feldman int error; 636099a0e58SBosko Milekic int flags; 637099a0e58SBosko Milekic short type; 638099a0e58SBosko Milekic 639121f0509SMike Silbersack #ifdef INVARIANTS 640121f0509SMike Silbersack trash_ctor(mem, size, arg, how); 641121f0509SMike Silbersack #endif 642099a0e58SBosko Milekic args = (struct mb_args *)arg; 643099a0e58SBosko Milekic type = args->type; 644099a0e58SBosko Milekic 64556a4e45aSAndre Oppermann /* 64656a4e45aSAndre Oppermann * The mbuf is initialized later. The caller has the 647fcf90618SGleb Smirnoff * responsibility to set up any MAC labels too. 64856a4e45aSAndre Oppermann */ 64956a4e45aSAndre Oppermann if (type == MT_NOINIT) 65056a4e45aSAndre Oppermann return (0); 65156a4e45aSAndre Oppermann 652afb295ccSAndre Oppermann m = (struct mbuf *)mem; 653afb295ccSAndre Oppermann flags = args->flags; 654fddd4f62SNavdeep Parhar MPASS((flags & M_NOFREE) == 0); 655afb295ccSAndre Oppermann 656b4b12e52SGleb Smirnoff error = m_init(m, how, type, flags); 657afb295ccSAndre Oppermann 658b23f72e9SBrian Feldman return (error); 659099a0e58SBosko Milekic } 660099a0e58SBosko Milekic 661099a0e58SBosko Milekic /* 66256a4e45aSAndre Oppermann * The Mbuf master zone destructor. 663099a0e58SBosko Milekic */ 664099a0e58SBosko Milekic static void 665099a0e58SBosko Milekic mb_dtor_mbuf(void *mem, int size, void *arg) 666099a0e58SBosko Milekic { 667099a0e58SBosko Milekic struct mbuf *m; 668629b9e08SKip Macy unsigned long flags; 669099a0e58SBosko Milekic 670099a0e58SBosko Milekic m = (struct mbuf *)mem; 671629b9e08SKip Macy flags = (unsigned long)arg; 672629b9e08SKip Macy 673a9fa76f2SNavdeep Parhar KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 6744c7070dbSScott Long if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 675099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 676121f0509SMike Silbersack #ifdef INVARIANTS 677121f0509SMike Silbersack trash_dtor(mem, size, arg); 678121f0509SMike Silbersack #endif 679099a0e58SBosko Milekic } 680099a0e58SBosko Milekic 68156a4e45aSAndre Oppermann /* 68256a4e45aSAndre Oppermann * The Mbuf Packet zone destructor. 68356a4e45aSAndre Oppermann */ 684099a0e58SBosko Milekic static void 685099a0e58SBosko Milekic mb_dtor_pack(void *mem, int size, void *arg) 686099a0e58SBosko Milekic { 687099a0e58SBosko Milekic struct mbuf *m; 688099a0e58SBosko Milekic 689099a0e58SBosko Milekic m = (struct mbuf *)mem; 690099a0e58SBosko Milekic if ((m->m_flags & M_PKTHDR) != 0) 691099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 69256a4e45aSAndre Oppermann 69356a4e45aSAndre Oppermann /* Make sure we've got a clean cluster back. */ 69456a4e45aSAndre Oppermann KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 69556a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 69656a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 697cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 698cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 69956a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 70049d46b61SGleb Smirnoff KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 701121f0509SMike Silbersack #ifdef INVARIANTS 702121f0509SMike Silbersack trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 703121f0509SMike Silbersack #endif 7046c125b8dSMohan Srinivasan /* 705ef44c8d2SDavid E. O'Brien * If there are processes blocked on zone_clust, waiting for pages 706ef44c8d2SDavid E. O'Brien * to be freed up, * cause them to be woken up by draining the 707ef44c8d2SDavid E. O'Brien * packet zone. We are exposed to a race here * (in the check for 708ef44c8d2SDavid E. O'Brien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 709ef44c8d2SDavid E. O'Brien * is deliberate. We don't want to acquire the zone lock for every 710ef44c8d2SDavid E. O'Brien * mbuf free. 7116c125b8dSMohan Srinivasan */ 7126c125b8dSMohan Srinivasan if (uma_zone_exhausted_nolock(zone_clust)) 7136c125b8dSMohan Srinivasan zone_drain(zone_pack); 714099a0e58SBosko Milekic } 715099a0e58SBosko Milekic 716099a0e58SBosko Milekic /* 717ec63cb90SAndre Oppermann * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 718099a0e58SBosko Milekic * 719099a0e58SBosko Milekic * Here the 'arg' pointer points to the Mbuf which we 72056a4e45aSAndre Oppermann * are configuring cluster storage for. If 'arg' is 72156a4e45aSAndre Oppermann * empty we allocate just the cluster without setting 72256a4e45aSAndre Oppermann * the mbuf to it. See mbuf.h. 723099a0e58SBosko Milekic */ 724b23f72e9SBrian Feldman static int 725b23f72e9SBrian Feldman mb_ctor_clust(void *mem, int size, void *arg, int how) 726099a0e58SBosko Milekic { 727099a0e58SBosko Milekic struct mbuf *m; 728099a0e58SBosko Milekic 729121f0509SMike Silbersack #ifdef INVARIANTS 730121f0509SMike Silbersack trash_ctor(mem, size, arg, how); 731121f0509SMike Silbersack #endif 7320f4d9d04SKip Macy m = (struct mbuf *)arg; 7330f4d9d04SKip Macy if (m != NULL) { 734e8fd18f3SGleb Smirnoff m->m_ext.ext_buf = (char *)mem; 735099a0e58SBosko Milekic m->m_data = m->m_ext.ext_buf; 736099a0e58SBosko Milekic m->m_flags |= M_EXT; 737099a0e58SBosko Milekic m->m_ext.ext_free = NULL; 738cf827063SPoul-Henning Kamp m->m_ext.ext_arg1 = NULL; 739cf827063SPoul-Henning Kamp m->m_ext.ext_arg2 = NULL; 74056a4e45aSAndre Oppermann m->m_ext.ext_size = size; 74156a5f52eSGleb Smirnoff m->m_ext.ext_type = m_gettype(size); 74256a5f52eSGleb Smirnoff m->m_ext.ext_flags = EXT_FLAG_EMBREF; 74356a5f52eSGleb Smirnoff m->m_ext.ext_count = 1; 74456a4e45aSAndre Oppermann } 7450f4d9d04SKip Macy 746b23f72e9SBrian Feldman return (0); 747099a0e58SBosko Milekic } 748099a0e58SBosko Milekic 74956a4e45aSAndre Oppermann /* 750099a0e58SBosko Milekic * The Packet secondary zone's init routine, executed on the 75156a4e45aSAndre Oppermann * object's transition from mbuf keg slab to zone cache. 752099a0e58SBosko Milekic */ 753b23f72e9SBrian Feldman static int 75456a4e45aSAndre Oppermann mb_zinit_pack(void *mem, int size, int how) 755099a0e58SBosko Milekic { 756099a0e58SBosko Milekic struct mbuf *m; 757099a0e58SBosko Milekic 75856a4e45aSAndre Oppermann m = (struct mbuf *)mem; /* m is virgin. */ 759a7bd90efSAndre Oppermann if (uma_zalloc_arg(zone_clust, m, how) == NULL || 760a7bd90efSAndre Oppermann m->m_ext.ext_buf == NULL) 761b23f72e9SBrian Feldman return (ENOMEM); 762cd5bb63bSAndre Oppermann m->m_ext.ext_type = EXT_PACKET; /* Override. */ 763121f0509SMike Silbersack #ifdef INVARIANTS 764121f0509SMike Silbersack trash_init(m->m_ext.ext_buf, MCLBYTES, how); 765121f0509SMike Silbersack #endif 766b23f72e9SBrian Feldman return (0); 767099a0e58SBosko Milekic } 768099a0e58SBosko Milekic 769099a0e58SBosko Milekic /* 770099a0e58SBosko Milekic * The Packet secondary zone's fini routine, executed on the 771099a0e58SBosko Milekic * object's transition from zone cache to keg slab. 772099a0e58SBosko Milekic */ 773099a0e58SBosko Milekic static void 77456a4e45aSAndre Oppermann mb_zfini_pack(void *mem, int size) 775099a0e58SBosko Milekic { 776099a0e58SBosko Milekic struct mbuf *m; 777099a0e58SBosko Milekic 778099a0e58SBosko Milekic m = (struct mbuf *)mem; 779121f0509SMike Silbersack #ifdef INVARIANTS 780121f0509SMike Silbersack trash_fini(m->m_ext.ext_buf, MCLBYTES); 781121f0509SMike Silbersack #endif 782099a0e58SBosko Milekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 783a7b844d2SMike Silbersack #ifdef INVARIANTS 784a7b844d2SMike Silbersack trash_dtor(mem, size, NULL); 785a7b844d2SMike Silbersack #endif 786099a0e58SBosko Milekic } 787099a0e58SBosko Milekic 788099a0e58SBosko Milekic /* 789099a0e58SBosko Milekic * The "packet" keg constructor. 790099a0e58SBosko Milekic */ 791b23f72e9SBrian Feldman static int 792b23f72e9SBrian Feldman mb_ctor_pack(void *mem, int size, void *arg, int how) 793099a0e58SBosko Milekic { 794099a0e58SBosko Milekic struct mbuf *m; 795099a0e58SBosko Milekic struct mb_args *args; 796ce28636bSAndre Oppermann int error, flags; 797099a0e58SBosko Milekic short type; 798099a0e58SBosko Milekic 799099a0e58SBosko Milekic m = (struct mbuf *)mem; 800099a0e58SBosko Milekic args = (struct mb_args *)arg; 801099a0e58SBosko Milekic flags = args->flags; 802099a0e58SBosko Milekic type = args->type; 803fddd4f62SNavdeep Parhar MPASS((flags & M_NOFREE) == 0); 804099a0e58SBosko Milekic 805121f0509SMike Silbersack #ifdef INVARIANTS 806121f0509SMike Silbersack trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 807121f0509SMike Silbersack #endif 808099a0e58SBosko Milekic 809b4b12e52SGleb Smirnoff error = m_init(m, how, type, flags); 810afb295ccSAndre Oppermann 81156a4e45aSAndre Oppermann /* m_ext is already initialized. */ 812afb295ccSAndre Oppermann m->m_data = m->m_ext.ext_buf; 813afb295ccSAndre Oppermann m->m_flags = (flags | M_EXT); 81456a4e45aSAndre Oppermann 815afb295ccSAndre Oppermann return (error); 816099a0e58SBosko Milekic } 817099a0e58SBosko Milekic 818099a0e58SBosko Milekic /* 819e60b2fcbSGleb Smirnoff * This is the protocol drain routine. Called by UMA whenever any of the 820e60b2fcbSGleb Smirnoff * mbuf zones is closed to its limit. 821099a0e58SBosko Milekic * 822099a0e58SBosko Milekic * No locks should be held when this is called. The drain routines have to 823099a0e58SBosko Milekic * presently acquire some locks which raises the possibility of lock order 824099a0e58SBosko Milekic * reversal. 825099a0e58SBosko Milekic */ 826099a0e58SBosko Milekic static void 827e60b2fcbSGleb Smirnoff mb_reclaim(uma_zone_t zone __unused, int pending __unused) 828099a0e58SBosko Milekic { 829099a0e58SBosko Milekic struct domain *dp; 830099a0e58SBosko Milekic struct protosw *pr; 831099a0e58SBosko Milekic 832e60b2fcbSGleb Smirnoff WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); 833099a0e58SBosko Milekic 834099a0e58SBosko Milekic for (dp = domains; dp != NULL; dp = dp->dom_next) 835099a0e58SBosko Milekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 836099a0e58SBosko Milekic if (pr->pr_drain != NULL) 837099a0e58SBosko Milekic (*pr->pr_drain)(); 838099a0e58SBosko Milekic } 8395e4bc63bSGleb Smirnoff 8405e4bc63bSGleb Smirnoff /* 841*82334850SJohn Baldwin * Free "count" units of I/O from an mbuf chain. They could be held 842*82334850SJohn Baldwin * in EXT_PGS or just as a normal mbuf. This code is intended to be 843*82334850SJohn Baldwin * called in an error path (I/O error, closed connection, etc). 844*82334850SJohn Baldwin */ 845*82334850SJohn Baldwin void 846*82334850SJohn Baldwin mb_free_notready(struct mbuf *m, int count) 847*82334850SJohn Baldwin { 848*82334850SJohn Baldwin int i; 849*82334850SJohn Baldwin 850*82334850SJohn Baldwin for (i = 0; i < count && m != NULL; i++) { 851*82334850SJohn Baldwin if ((m->m_flags & M_EXT) != 0 && 852*82334850SJohn Baldwin m->m_ext.ext_type == EXT_PGS) { 853*82334850SJohn Baldwin m->m_ext.ext_pgs->nrdy--; 854*82334850SJohn Baldwin if (m->m_ext.ext_pgs->nrdy != 0) 855*82334850SJohn Baldwin continue; 856*82334850SJohn Baldwin } 857*82334850SJohn Baldwin m = m_free(m); 858*82334850SJohn Baldwin } 859*82334850SJohn Baldwin KASSERT(i == count, ("Removed only %d items from %p", i, m)); 860*82334850SJohn Baldwin } 861*82334850SJohn Baldwin 862*82334850SJohn Baldwin /* 863*82334850SJohn Baldwin * Compress an unmapped mbuf into a simple mbuf when it holds a small 864*82334850SJohn Baldwin * amount of data. This is used as a DOS defense to avoid having 865*82334850SJohn Baldwin * small packets tie up wired pages, an ext_pgs structure, and an 866*82334850SJohn Baldwin * mbuf. Since this converts the existing mbuf in place, it can only 867*82334850SJohn Baldwin * be used if there are no other references to 'm'. 868*82334850SJohn Baldwin */ 869*82334850SJohn Baldwin int 870*82334850SJohn Baldwin mb_unmapped_compress(struct mbuf *m) 871*82334850SJohn Baldwin { 872*82334850SJohn Baldwin volatile u_int *refcnt; 873*82334850SJohn Baldwin struct mbuf m_temp; 874*82334850SJohn Baldwin 875*82334850SJohn Baldwin /* 876*82334850SJohn Baldwin * Assert that 'm' does not have a packet header. If 'm' had 877*82334850SJohn Baldwin * a packet header, it would only be able to hold MHLEN bytes 878*82334850SJohn Baldwin * and m_data would have to be initialized differently. 879*82334850SJohn Baldwin */ 880*82334850SJohn Baldwin KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) && 881*82334850SJohn Baldwin m->m_ext.ext_type == EXT_PGS, 882*82334850SJohn Baldwin ("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m)); 883*82334850SJohn Baldwin KASSERT(m->m_len <= MLEN, ("m_len too large %p", m)); 884*82334850SJohn Baldwin 885*82334850SJohn Baldwin if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 886*82334850SJohn Baldwin refcnt = &m->m_ext.ext_count; 887*82334850SJohn Baldwin } else { 888*82334850SJohn Baldwin KASSERT(m->m_ext.ext_cnt != NULL, 889*82334850SJohn Baldwin ("%s: no refcounting pointer on %p", __func__, m)); 890*82334850SJohn Baldwin refcnt = m->m_ext.ext_cnt; 891*82334850SJohn Baldwin } 892*82334850SJohn Baldwin 893*82334850SJohn Baldwin if (*refcnt != 1) 894*82334850SJohn Baldwin return (EBUSY); 895*82334850SJohn Baldwin 896*82334850SJohn Baldwin /* 897*82334850SJohn Baldwin * Copy mbuf header and m_ext portion of 'm' to 'm_temp' to 898*82334850SJohn Baldwin * create a "fake" EXT_PGS mbuf that can be used with 899*82334850SJohn Baldwin * m_copydata() as well as the ext_free callback. 900*82334850SJohn Baldwin */ 901*82334850SJohn Baldwin memcpy(&m_temp, m, offsetof(struct mbuf, m_ext) + sizeof (m->m_ext)); 902*82334850SJohn Baldwin m_temp.m_next = NULL; 903*82334850SJohn Baldwin m_temp.m_nextpkt = NULL; 904*82334850SJohn Baldwin 905*82334850SJohn Baldwin /* Turn 'm' into a "normal" mbuf. */ 906*82334850SJohn Baldwin m->m_flags &= ~(M_EXT | M_RDONLY | M_NOMAP); 907*82334850SJohn Baldwin m->m_data = m->m_dat; 908*82334850SJohn Baldwin 909*82334850SJohn Baldwin /* Copy data from template's ext_pgs. */ 910*82334850SJohn Baldwin m_copydata(&m_temp, 0, m_temp.m_len, mtod(m, caddr_t)); 911*82334850SJohn Baldwin 912*82334850SJohn Baldwin /* Free the backing pages. */ 913*82334850SJohn Baldwin m_temp.m_ext.ext_free(&m_temp); 914*82334850SJohn Baldwin 915*82334850SJohn Baldwin /* Finally, free the ext_pgs struct. */ 916*82334850SJohn Baldwin uma_zfree(zone_extpgs, m_temp.m_ext.ext_pgs); 917*82334850SJohn Baldwin return (0); 918*82334850SJohn Baldwin } 919*82334850SJohn Baldwin 920*82334850SJohn Baldwin /* 921*82334850SJohn Baldwin * These next few routines are used to permit downgrading an unmapped 922*82334850SJohn Baldwin * mbuf to a chain of mapped mbufs. This is used when an interface 923*82334850SJohn Baldwin * doesn't supported unmapped mbufs or if checksums need to be 924*82334850SJohn Baldwin * computed in software. 925*82334850SJohn Baldwin * 926*82334850SJohn Baldwin * Each unmapped mbuf is converted to a chain of mbufs. First, any 927*82334850SJohn Baldwin * TLS header data is stored in a regular mbuf. Second, each page of 928*82334850SJohn Baldwin * unmapped data is stored in an mbuf with an EXT_SFBUF external 929*82334850SJohn Baldwin * cluster. These mbufs use an sf_buf to provide a valid KVA for the 930*82334850SJohn Baldwin * associated physical page. They also hold a reference on the 931*82334850SJohn Baldwin * original EXT_PGS mbuf to ensure the physical page doesn't go away. 932*82334850SJohn Baldwin * Finally, any TLS trailer data is stored in a regular mbuf. 933*82334850SJohn Baldwin * 934*82334850SJohn Baldwin * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF 935*82334850SJohn Baldwin * mbufs. It frees the associated sf_buf and releases its reference 936*82334850SJohn Baldwin * on the original EXT_PGS mbuf. 937*82334850SJohn Baldwin * 938*82334850SJohn Baldwin * _mb_unmapped_to_ext() is a helper function that converts a single 939*82334850SJohn Baldwin * unmapped mbuf into a chain of mbufs. 940*82334850SJohn Baldwin * 941*82334850SJohn Baldwin * mb_unmapped_to_ext() is the public function that walks an mbuf 942*82334850SJohn Baldwin * chain converting any unmapped mbufs to mapped mbufs. It returns 943*82334850SJohn Baldwin * the new chain of unmapped mbufs on success. On failure it frees 944*82334850SJohn Baldwin * the original mbuf chain and returns NULL. 945*82334850SJohn Baldwin */ 946*82334850SJohn Baldwin static void 947*82334850SJohn Baldwin mb_unmapped_free_mext(struct mbuf *m) 948*82334850SJohn Baldwin { 949*82334850SJohn Baldwin struct sf_buf *sf; 950*82334850SJohn Baldwin struct mbuf *old_m; 951*82334850SJohn Baldwin 952*82334850SJohn Baldwin sf = m->m_ext.ext_arg1; 953*82334850SJohn Baldwin sf_buf_free(sf); 954*82334850SJohn Baldwin 955*82334850SJohn Baldwin /* Drop the reference on the backing EXT_PGS mbuf. */ 956*82334850SJohn Baldwin old_m = m->m_ext.ext_arg2; 957*82334850SJohn Baldwin mb_free_ext(old_m); 958*82334850SJohn Baldwin } 959*82334850SJohn Baldwin 960*82334850SJohn Baldwin static struct mbuf * 961*82334850SJohn Baldwin _mb_unmapped_to_ext(struct mbuf *m) 962*82334850SJohn Baldwin { 963*82334850SJohn Baldwin struct mbuf_ext_pgs *ext_pgs; 964*82334850SJohn Baldwin struct mbuf *m_new, *top, *prev, *mref; 965*82334850SJohn Baldwin struct sf_buf *sf; 966*82334850SJohn Baldwin vm_page_t pg; 967*82334850SJohn Baldwin int i, len, off, pglen, pgoff, seglen, segoff; 968*82334850SJohn Baldwin volatile u_int *refcnt; 969*82334850SJohn Baldwin u_int ref_inc = 0; 970*82334850SJohn Baldwin 971*82334850SJohn Baldwin MBUF_EXT_PGS_ASSERT(m); 972*82334850SJohn Baldwin ext_pgs = m->m_ext.ext_pgs; 973*82334850SJohn Baldwin len = m->m_len; 974*82334850SJohn Baldwin KASSERT(ext_pgs->tls == NULL, ("%s: can't convert TLS mbuf %p", 975*82334850SJohn Baldwin __func__, m)); 976*82334850SJohn Baldwin 977*82334850SJohn Baldwin /* See if this is the mbuf that holds the embedded refcount. */ 978*82334850SJohn Baldwin if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 979*82334850SJohn Baldwin refcnt = &m->m_ext.ext_count; 980*82334850SJohn Baldwin mref = m; 981*82334850SJohn Baldwin } else { 982*82334850SJohn Baldwin KASSERT(m->m_ext.ext_cnt != NULL, 983*82334850SJohn Baldwin ("%s: no refcounting pointer on %p", __func__, m)); 984*82334850SJohn Baldwin refcnt = m->m_ext.ext_cnt; 985*82334850SJohn Baldwin mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 986*82334850SJohn Baldwin } 987*82334850SJohn Baldwin 988*82334850SJohn Baldwin /* Skip over any data removed from the front. */ 989*82334850SJohn Baldwin off = mtod(m, vm_offset_t); 990*82334850SJohn Baldwin 991*82334850SJohn Baldwin top = NULL; 992*82334850SJohn Baldwin if (ext_pgs->hdr_len != 0) { 993*82334850SJohn Baldwin if (off >= ext_pgs->hdr_len) { 994*82334850SJohn Baldwin off -= ext_pgs->hdr_len; 995*82334850SJohn Baldwin } else { 996*82334850SJohn Baldwin seglen = ext_pgs->hdr_len - off; 997*82334850SJohn Baldwin segoff = off; 998*82334850SJohn Baldwin seglen = min(seglen, len); 999*82334850SJohn Baldwin off = 0; 1000*82334850SJohn Baldwin len -= seglen; 1001*82334850SJohn Baldwin m_new = m_get(M_NOWAIT, MT_DATA); 1002*82334850SJohn Baldwin if (m_new == NULL) 1003*82334850SJohn Baldwin goto fail; 1004*82334850SJohn Baldwin m_new->m_len = seglen; 1005*82334850SJohn Baldwin prev = top = m_new; 1006*82334850SJohn Baldwin memcpy(mtod(m_new, void *), &ext_pgs->hdr[segoff], 1007*82334850SJohn Baldwin seglen); 1008*82334850SJohn Baldwin } 1009*82334850SJohn Baldwin } 1010*82334850SJohn Baldwin pgoff = ext_pgs->first_pg_off; 1011*82334850SJohn Baldwin for (i = 0; i < ext_pgs->npgs && len > 0; i++) { 1012*82334850SJohn Baldwin pglen = mbuf_ext_pg_len(ext_pgs, i, pgoff); 1013*82334850SJohn Baldwin if (off >= pglen) { 1014*82334850SJohn Baldwin off -= pglen; 1015*82334850SJohn Baldwin pgoff = 0; 1016*82334850SJohn Baldwin continue; 1017*82334850SJohn Baldwin } 1018*82334850SJohn Baldwin seglen = pglen - off; 1019*82334850SJohn Baldwin segoff = pgoff + off; 1020*82334850SJohn Baldwin off = 0; 1021*82334850SJohn Baldwin seglen = min(seglen, len); 1022*82334850SJohn Baldwin len -= seglen; 1023*82334850SJohn Baldwin 1024*82334850SJohn Baldwin pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); 1025*82334850SJohn Baldwin m_new = m_get(M_NOWAIT, MT_DATA); 1026*82334850SJohn Baldwin if (m_new == NULL) 1027*82334850SJohn Baldwin goto fail; 1028*82334850SJohn Baldwin if (top == NULL) { 1029*82334850SJohn Baldwin top = prev = m_new; 1030*82334850SJohn Baldwin } else { 1031*82334850SJohn Baldwin prev->m_next = m_new; 1032*82334850SJohn Baldwin prev = m_new; 1033*82334850SJohn Baldwin } 1034*82334850SJohn Baldwin sf = sf_buf_alloc(pg, SFB_NOWAIT); 1035*82334850SJohn Baldwin if (sf == NULL) 1036*82334850SJohn Baldwin goto fail; 1037*82334850SJohn Baldwin 1038*82334850SJohn Baldwin ref_inc++; 1039*82334850SJohn Baldwin m_extadd(m_new, (char *)sf_buf_kva(sf), PAGE_SIZE, 1040*82334850SJohn Baldwin mb_unmapped_free_mext, sf, mref, M_RDONLY, EXT_SFBUF); 1041*82334850SJohn Baldwin m_new->m_data += segoff; 1042*82334850SJohn Baldwin m_new->m_len = seglen; 1043*82334850SJohn Baldwin 1044*82334850SJohn Baldwin pgoff = 0; 1045*82334850SJohn Baldwin }; 1046*82334850SJohn Baldwin if (len != 0) { 1047*82334850SJohn Baldwin KASSERT((off + len) <= ext_pgs->trail_len, 1048*82334850SJohn Baldwin ("off + len > trail (%d + %d > %d)", off, len, 1049*82334850SJohn Baldwin ext_pgs->trail_len)); 1050*82334850SJohn Baldwin m_new = m_get(M_NOWAIT, MT_DATA); 1051*82334850SJohn Baldwin if (m_new == NULL) 1052*82334850SJohn Baldwin goto fail; 1053*82334850SJohn Baldwin if (top == NULL) 1054*82334850SJohn Baldwin top = m_new; 1055*82334850SJohn Baldwin else 1056*82334850SJohn Baldwin prev->m_next = m_new; 1057*82334850SJohn Baldwin m_new->m_len = len; 1058*82334850SJohn Baldwin memcpy(mtod(m_new, void *), &ext_pgs->trail[off], len); 1059*82334850SJohn Baldwin } 1060*82334850SJohn Baldwin 1061*82334850SJohn Baldwin if (ref_inc != 0) { 1062*82334850SJohn Baldwin /* 1063*82334850SJohn Baldwin * Obtain an additional reference on the old mbuf for 1064*82334850SJohn Baldwin * each created EXT_SFBUF mbuf. They will be dropped 1065*82334850SJohn Baldwin * in mb_unmapped_free_mext(). 1066*82334850SJohn Baldwin */ 1067*82334850SJohn Baldwin if (*refcnt == 1) 1068*82334850SJohn Baldwin *refcnt += ref_inc; 1069*82334850SJohn Baldwin else 1070*82334850SJohn Baldwin atomic_add_int(refcnt, ref_inc); 1071*82334850SJohn Baldwin } 1072*82334850SJohn Baldwin m_free(m); 1073*82334850SJohn Baldwin return (top); 1074*82334850SJohn Baldwin 1075*82334850SJohn Baldwin fail: 1076*82334850SJohn Baldwin if (ref_inc != 0) { 1077*82334850SJohn Baldwin /* 1078*82334850SJohn Baldwin * Obtain an additional reference on the old mbuf for 1079*82334850SJohn Baldwin * each created EXT_SFBUF mbuf. They will be 1080*82334850SJohn Baldwin * immediately dropped when these mbufs are freed 1081*82334850SJohn Baldwin * below. 1082*82334850SJohn Baldwin */ 1083*82334850SJohn Baldwin if (*refcnt == 1) 1084*82334850SJohn Baldwin *refcnt += ref_inc; 1085*82334850SJohn Baldwin else 1086*82334850SJohn Baldwin atomic_add_int(refcnt, ref_inc); 1087*82334850SJohn Baldwin } 1088*82334850SJohn Baldwin m_free(m); 1089*82334850SJohn Baldwin m_freem(top); 1090*82334850SJohn Baldwin return (NULL); 1091*82334850SJohn Baldwin } 1092*82334850SJohn Baldwin 1093*82334850SJohn Baldwin struct mbuf * 1094*82334850SJohn Baldwin mb_unmapped_to_ext(struct mbuf *top) 1095*82334850SJohn Baldwin { 1096*82334850SJohn Baldwin struct mbuf *m, *next, *prev = NULL; 1097*82334850SJohn Baldwin 1098*82334850SJohn Baldwin prev = NULL; 1099*82334850SJohn Baldwin for (m = top; m != NULL; m = next) { 1100*82334850SJohn Baldwin /* m might be freed, so cache the next pointer. */ 1101*82334850SJohn Baldwin next = m->m_next; 1102*82334850SJohn Baldwin if (m->m_flags & M_NOMAP) { 1103*82334850SJohn Baldwin if (prev != NULL) { 1104*82334850SJohn Baldwin /* 1105*82334850SJohn Baldwin * Remove 'm' from the new chain so 1106*82334850SJohn Baldwin * that the 'top' chain terminates 1107*82334850SJohn Baldwin * before 'm' in case 'top' is freed 1108*82334850SJohn Baldwin * due to an error. 1109*82334850SJohn Baldwin */ 1110*82334850SJohn Baldwin prev->m_next = NULL; 1111*82334850SJohn Baldwin } 1112*82334850SJohn Baldwin m = _mb_unmapped_to_ext(m); 1113*82334850SJohn Baldwin if (m == NULL) { 1114*82334850SJohn Baldwin m_freem(top); 1115*82334850SJohn Baldwin m_freem(next); 1116*82334850SJohn Baldwin return (NULL); 1117*82334850SJohn Baldwin } 1118*82334850SJohn Baldwin if (prev == NULL) { 1119*82334850SJohn Baldwin top = m; 1120*82334850SJohn Baldwin } else { 1121*82334850SJohn Baldwin prev->m_next = m; 1122*82334850SJohn Baldwin } 1123*82334850SJohn Baldwin 1124*82334850SJohn Baldwin /* 1125*82334850SJohn Baldwin * Replaced one mbuf with a chain, so we must 1126*82334850SJohn Baldwin * find the end of chain. 1127*82334850SJohn Baldwin */ 1128*82334850SJohn Baldwin prev = m_last(m); 1129*82334850SJohn Baldwin } else { 1130*82334850SJohn Baldwin if (prev != NULL) { 1131*82334850SJohn Baldwin prev->m_next = m; 1132*82334850SJohn Baldwin } 1133*82334850SJohn Baldwin prev = m; 1134*82334850SJohn Baldwin } 1135*82334850SJohn Baldwin } 1136*82334850SJohn Baldwin return (top); 1137*82334850SJohn Baldwin } 1138*82334850SJohn Baldwin 1139*82334850SJohn Baldwin /* 1140*82334850SJohn Baldwin * Allocate an empty EXT_PGS mbuf. The ext_free routine is 1141*82334850SJohn Baldwin * responsible for freeing any pages backing this mbuf when it is 1142*82334850SJohn Baldwin * freed. 1143*82334850SJohn Baldwin */ 1144*82334850SJohn Baldwin struct mbuf * 1145*82334850SJohn Baldwin mb_alloc_ext_pgs(int how, bool pkthdr, m_ext_free_t ext_free) 1146*82334850SJohn Baldwin { 1147*82334850SJohn Baldwin struct mbuf *m; 1148*82334850SJohn Baldwin struct mbuf_ext_pgs *ext_pgs; 1149*82334850SJohn Baldwin 1150*82334850SJohn Baldwin if (pkthdr) 1151*82334850SJohn Baldwin m = m_gethdr(how, MT_DATA); 1152*82334850SJohn Baldwin else 1153*82334850SJohn Baldwin m = m_get(how, MT_DATA); 1154*82334850SJohn Baldwin if (m == NULL) 1155*82334850SJohn Baldwin return (NULL); 1156*82334850SJohn Baldwin 1157*82334850SJohn Baldwin ext_pgs = uma_zalloc(zone_extpgs, how); 1158*82334850SJohn Baldwin if (ext_pgs == NULL) { 1159*82334850SJohn Baldwin m_free(m); 1160*82334850SJohn Baldwin return (NULL); 1161*82334850SJohn Baldwin } 1162*82334850SJohn Baldwin ext_pgs->npgs = 0; 1163*82334850SJohn Baldwin ext_pgs->nrdy = 0; 1164*82334850SJohn Baldwin ext_pgs->first_pg_off = 0; 1165*82334850SJohn Baldwin ext_pgs->last_pg_len = 0; 1166*82334850SJohn Baldwin ext_pgs->hdr_len = 0; 1167*82334850SJohn Baldwin ext_pgs->trail_len = 0; 1168*82334850SJohn Baldwin ext_pgs->tls = NULL; 1169*82334850SJohn Baldwin ext_pgs->so = NULL; 1170*82334850SJohn Baldwin m->m_data = NULL; 1171*82334850SJohn Baldwin m->m_flags |= (M_EXT | M_RDONLY | M_NOMAP); 1172*82334850SJohn Baldwin m->m_ext.ext_type = EXT_PGS; 1173*82334850SJohn Baldwin m->m_ext.ext_flags = EXT_FLAG_EMBREF; 1174*82334850SJohn Baldwin m->m_ext.ext_count = 1; 1175*82334850SJohn Baldwin m->m_ext.ext_pgs = ext_pgs; 1176*82334850SJohn Baldwin m->m_ext.ext_size = 0; 1177*82334850SJohn Baldwin m->m_ext.ext_free = ext_free; 1178*82334850SJohn Baldwin return (m); 1179*82334850SJohn Baldwin } 1180*82334850SJohn Baldwin 1181*82334850SJohn Baldwin #ifdef INVARIANT_SUPPORT 1182*82334850SJohn Baldwin void 1183*82334850SJohn Baldwin mb_ext_pgs_check(struct mbuf_ext_pgs *ext_pgs) 1184*82334850SJohn Baldwin { 1185*82334850SJohn Baldwin 1186*82334850SJohn Baldwin /* 1187*82334850SJohn Baldwin * NB: This expects a non-empty buffer (npgs > 0 and 1188*82334850SJohn Baldwin * last_pg_len > 0). 1189*82334850SJohn Baldwin */ 1190*82334850SJohn Baldwin KASSERT(ext_pgs->npgs > 0, 1191*82334850SJohn Baldwin ("ext_pgs with no valid pages: %p", ext_pgs)); 1192*82334850SJohn Baldwin KASSERT(ext_pgs->npgs <= nitems(ext_pgs->pa), 1193*82334850SJohn Baldwin ("ext_pgs with too many pages: %p", ext_pgs)); 1194*82334850SJohn Baldwin KASSERT(ext_pgs->nrdy <= ext_pgs->npgs, 1195*82334850SJohn Baldwin ("ext_pgs with too many ready pages: %p", ext_pgs)); 1196*82334850SJohn Baldwin KASSERT(ext_pgs->first_pg_off < PAGE_SIZE, 1197*82334850SJohn Baldwin ("ext_pgs with too large page offset: %p", ext_pgs)); 1198*82334850SJohn Baldwin KASSERT(ext_pgs->last_pg_len > 0, 1199*82334850SJohn Baldwin ("ext_pgs with zero last page length: %p", ext_pgs)); 1200*82334850SJohn Baldwin KASSERT(ext_pgs->last_pg_len <= PAGE_SIZE, 1201*82334850SJohn Baldwin ("ext_pgs with too large last page length: %p", ext_pgs)); 1202*82334850SJohn Baldwin if (ext_pgs->npgs == 1) { 1203*82334850SJohn Baldwin KASSERT(ext_pgs->first_pg_off + ext_pgs->last_pg_len <= 1204*82334850SJohn Baldwin PAGE_SIZE, ("ext_pgs with single page too large: %p", 1205*82334850SJohn Baldwin ext_pgs)); 1206*82334850SJohn Baldwin } 1207*82334850SJohn Baldwin KASSERT(ext_pgs->hdr_len <= sizeof(ext_pgs->hdr), 1208*82334850SJohn Baldwin ("ext_pgs with too large header length: %p", ext_pgs)); 1209*82334850SJohn Baldwin KASSERT(ext_pgs->trail_len <= sizeof(ext_pgs->trail), 1210*82334850SJohn Baldwin ("ext_pgs with too large header length: %p", ext_pgs)); 1211*82334850SJohn Baldwin } 1212*82334850SJohn Baldwin #endif 1213*82334850SJohn Baldwin 1214*82334850SJohn Baldwin /* 12155e4bc63bSGleb Smirnoff * Clean up after mbufs with M_EXT storage attached to them if the 12165e4bc63bSGleb Smirnoff * reference count hits 1. 12175e4bc63bSGleb Smirnoff */ 12185e4bc63bSGleb Smirnoff void 12195e4bc63bSGleb Smirnoff mb_free_ext(struct mbuf *m) 12205e4bc63bSGleb Smirnoff { 122156a5f52eSGleb Smirnoff volatile u_int *refcnt; 122256a5f52eSGleb Smirnoff struct mbuf *mref; 12235e4bc63bSGleb Smirnoff int freembuf; 12245e4bc63bSGleb Smirnoff 12255e4bc63bSGleb Smirnoff KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 12265e4bc63bSGleb Smirnoff 122756a5f52eSGleb Smirnoff /* See if this is the mbuf that holds the embedded refcount. */ 122856a5f52eSGleb Smirnoff if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 122956a5f52eSGleb Smirnoff refcnt = &m->m_ext.ext_count; 123056a5f52eSGleb Smirnoff mref = m; 123156a5f52eSGleb Smirnoff } else { 123256a5f52eSGleb Smirnoff KASSERT(m->m_ext.ext_cnt != NULL, 123356a5f52eSGleb Smirnoff ("%s: no refcounting pointer on %p", __func__, m)); 123456a5f52eSGleb Smirnoff refcnt = m->m_ext.ext_cnt; 123556a5f52eSGleb Smirnoff mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 123656a5f52eSGleb Smirnoff } 123756a5f52eSGleb Smirnoff 12385e4bc63bSGleb Smirnoff /* 123956a5f52eSGleb Smirnoff * Check if the header is embedded in the cluster. It is 124056a5f52eSGleb Smirnoff * important that we can't touch any of the mbuf fields 124156a5f52eSGleb Smirnoff * after we have freed the external storage, since mbuf 124217cd649fSGleb Smirnoff * could have been embedded in it. For now, the mbufs 124317cd649fSGleb Smirnoff * embedded into the cluster are always of type EXT_EXTREF, 124417cd649fSGleb Smirnoff * and for this type we won't free the mref. 12455e4bc63bSGleb Smirnoff */ 124617cd649fSGleb Smirnoff if (m->m_flags & M_NOFREE) { 124717cd649fSGleb Smirnoff freembuf = 0; 1248eec189c7SGleb Smirnoff KASSERT(m->m_ext.ext_type == EXT_EXTREF || 1249eec189c7SGleb Smirnoff m->m_ext.ext_type == EXT_RXRING, 125017cd649fSGleb Smirnoff ("%s: no-free mbuf %p has wrong type", __func__, m)); 125117cd649fSGleb Smirnoff } else 125217cd649fSGleb Smirnoff freembuf = 1; 12535e4bc63bSGleb Smirnoff 125456a5f52eSGleb Smirnoff /* Free attached storage if this mbuf is the only reference to it. */ 125556a5f52eSGleb Smirnoff if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { 12565e4bc63bSGleb Smirnoff switch (m->m_ext.ext_type) { 125756a5f52eSGleb Smirnoff case EXT_PACKET: 125856a5f52eSGleb Smirnoff /* The packet zone is special. */ 125956a5f52eSGleb Smirnoff if (*refcnt == 0) 126056a5f52eSGleb Smirnoff *refcnt = 1; 126156a5f52eSGleb Smirnoff uma_zfree(zone_pack, mref); 12625e4bc63bSGleb Smirnoff break; 12635e4bc63bSGleb Smirnoff case EXT_CLUSTER: 12645e4bc63bSGleb Smirnoff uma_zfree(zone_clust, m->m_ext.ext_buf); 126556a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 12665e4bc63bSGleb Smirnoff break; 12675e4bc63bSGleb Smirnoff case EXT_JUMBOP: 12685e4bc63bSGleb Smirnoff uma_zfree(zone_jumbop, m->m_ext.ext_buf); 126956a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 12705e4bc63bSGleb Smirnoff break; 12715e4bc63bSGleb Smirnoff case EXT_JUMBO9: 12725e4bc63bSGleb Smirnoff uma_zfree(zone_jumbo9, m->m_ext.ext_buf); 127356a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 12745e4bc63bSGleb Smirnoff break; 12755e4bc63bSGleb Smirnoff case EXT_JUMBO16: 12765e4bc63bSGleb Smirnoff uma_zfree(zone_jumbo16, m->m_ext.ext_buf); 127756a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 127856a5f52eSGleb Smirnoff break; 1279*82334850SJohn Baldwin case EXT_PGS: 1280*82334850SJohn Baldwin uma_zfree(zone_extpgs, mref->m_ext.ext_pgs); 1281*82334850SJohn Baldwin uma_zfree(zone_mbuf, mref); 1282*82334850SJohn Baldwin break; 128356a5f52eSGleb Smirnoff case EXT_SFBUF: 12845e4bc63bSGleb Smirnoff case EXT_NET_DRV: 12855e4bc63bSGleb Smirnoff case EXT_MOD_TYPE: 12865e4bc63bSGleb Smirnoff case EXT_DISPOSABLE: 128707e87a1dSGleb Smirnoff KASSERT(mref->m_ext.ext_free != NULL, 12880ea37a86SGleb Smirnoff ("%s: ext_free not set", __func__)); 128907e87a1dSGleb Smirnoff mref->m_ext.ext_free(mref); 129056a5f52eSGleb Smirnoff uma_zfree(zone_mbuf, mref); 12910ea37a86SGleb Smirnoff break; 12925e4bc63bSGleb Smirnoff case EXT_EXTREF: 12935e4bc63bSGleb Smirnoff KASSERT(m->m_ext.ext_free != NULL, 12945e4bc63bSGleb Smirnoff ("%s: ext_free not set", __func__)); 1295e8fd18f3SGleb Smirnoff m->m_ext.ext_free(m); 12965e4bc63bSGleb Smirnoff break; 1297eec189c7SGleb Smirnoff case EXT_RXRING: 1298eec189c7SGleb Smirnoff KASSERT(m->m_ext.ext_free == NULL, 1299eec189c7SGleb Smirnoff ("%s: ext_free is set", __func__)); 1300eec189c7SGleb Smirnoff break; 13015e4bc63bSGleb Smirnoff default: 13025e4bc63bSGleb Smirnoff KASSERT(m->m_ext.ext_type == 0, 13035e4bc63bSGleb Smirnoff ("%s: unknown ext_type", __func__)); 13045e4bc63bSGleb Smirnoff } 13055e4bc63bSGleb Smirnoff } 13065e4bc63bSGleb Smirnoff 130756a5f52eSGleb Smirnoff if (freembuf && m != mref) 13085e4bc63bSGleb Smirnoff uma_zfree(zone_mbuf, m); 13095e4bc63bSGleb Smirnoff } 13105e4bc63bSGleb Smirnoff 13115e4bc63bSGleb Smirnoff /* 13125e4bc63bSGleb Smirnoff * Official mbuf(9) allocation KPI for stack and drivers: 13135e4bc63bSGleb Smirnoff * 13145e4bc63bSGleb Smirnoff * m_get() - a single mbuf without any attachments, sys/mbuf.h. 13155e4bc63bSGleb Smirnoff * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. 13165e4bc63bSGleb Smirnoff * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. 13175e4bc63bSGleb Smirnoff * m_clget() - attach cluster to already allocated mbuf. 13185e4bc63bSGleb Smirnoff * m_cljget() - attach jumbo cluster to already allocated mbuf. 13195e4bc63bSGleb Smirnoff * m_get2() - allocate minimum mbuf that would fit size argument. 13205e4bc63bSGleb Smirnoff * m_getm2() - allocate a chain of mbufs/clusters. 13215e4bc63bSGleb Smirnoff * m_extadd() - attach external cluster to mbuf. 13225e4bc63bSGleb Smirnoff * 13235e4bc63bSGleb Smirnoff * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. 13245e4bc63bSGleb Smirnoff * m_freem() - free chain of mbufs. 13255e4bc63bSGleb Smirnoff */ 13265e4bc63bSGleb Smirnoff 13275e4bc63bSGleb Smirnoff int 13285e4bc63bSGleb Smirnoff m_clget(struct mbuf *m, int how) 13295e4bc63bSGleb Smirnoff { 13305e4bc63bSGleb Smirnoff 13315e4bc63bSGleb Smirnoff KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 13325e4bc63bSGleb Smirnoff __func__, m)); 13335e4bc63bSGleb Smirnoff m->m_ext.ext_buf = (char *)NULL; 13345e4bc63bSGleb Smirnoff uma_zalloc_arg(zone_clust, m, how); 13355e4bc63bSGleb Smirnoff /* 13365e4bc63bSGleb Smirnoff * On a cluster allocation failure, drain the packet zone and retry, 13375e4bc63bSGleb Smirnoff * we might be able to loosen a few clusters up on the drain. 13385e4bc63bSGleb Smirnoff */ 13395e4bc63bSGleb Smirnoff if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { 13405e4bc63bSGleb Smirnoff zone_drain(zone_pack); 13415e4bc63bSGleb Smirnoff uma_zalloc_arg(zone_clust, m, how); 13425e4bc63bSGleb Smirnoff } 1343480f4e94SGeorge V. Neville-Neil MBUF_PROBE2(m__clget, m, how); 13445e4bc63bSGleb Smirnoff return (m->m_flags & M_EXT); 13455e4bc63bSGleb Smirnoff } 13465e4bc63bSGleb Smirnoff 13475e4bc63bSGleb Smirnoff /* 13485e4bc63bSGleb Smirnoff * m_cljget() is different from m_clget() as it can allocate clusters without 13495e4bc63bSGleb Smirnoff * attaching them to an mbuf. In that case the return value is the pointer 13505e4bc63bSGleb Smirnoff * to the cluster of the requested size. If an mbuf was specified, it gets 13515e4bc63bSGleb Smirnoff * the cluster attached to it and the return value can be safely ignored. 13525e4bc63bSGleb Smirnoff * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 13535e4bc63bSGleb Smirnoff */ 13545e4bc63bSGleb Smirnoff void * 13555e4bc63bSGleb Smirnoff m_cljget(struct mbuf *m, int how, int size) 13565e4bc63bSGleb Smirnoff { 13575e4bc63bSGleb Smirnoff uma_zone_t zone; 1358480f4e94SGeorge V. Neville-Neil void *retval; 13595e4bc63bSGleb Smirnoff 13605e4bc63bSGleb Smirnoff if (m != NULL) { 13615e4bc63bSGleb Smirnoff KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 13625e4bc63bSGleb Smirnoff __func__, m)); 13635e4bc63bSGleb Smirnoff m->m_ext.ext_buf = NULL; 13645e4bc63bSGleb Smirnoff } 13655e4bc63bSGleb Smirnoff 13665e4bc63bSGleb Smirnoff zone = m_getzone(size); 1367480f4e94SGeorge V. Neville-Neil retval = uma_zalloc_arg(zone, m, how); 1368480f4e94SGeorge V. Neville-Neil 1369480f4e94SGeorge V. Neville-Neil MBUF_PROBE4(m__cljget, m, how, size, retval); 1370480f4e94SGeorge V. Neville-Neil 1371480f4e94SGeorge V. Neville-Neil return (retval); 13725e4bc63bSGleb Smirnoff } 13735e4bc63bSGleb Smirnoff 13745e4bc63bSGleb Smirnoff /* 13755e4bc63bSGleb Smirnoff * m_get2() allocates minimum mbuf that would fit "size" argument. 13765e4bc63bSGleb Smirnoff */ 13775e4bc63bSGleb Smirnoff struct mbuf * 13785e4bc63bSGleb Smirnoff m_get2(int size, int how, short type, int flags) 13795e4bc63bSGleb Smirnoff { 13805e4bc63bSGleb Smirnoff struct mb_args args; 13815e4bc63bSGleb Smirnoff struct mbuf *m, *n; 13825e4bc63bSGleb Smirnoff 13835e4bc63bSGleb Smirnoff args.flags = flags; 13845e4bc63bSGleb Smirnoff args.type = type; 13855e4bc63bSGleb Smirnoff 13865e4bc63bSGleb Smirnoff if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) 13875e4bc63bSGleb Smirnoff return (uma_zalloc_arg(zone_mbuf, &args, how)); 13885e4bc63bSGleb Smirnoff if (size <= MCLBYTES) 13895e4bc63bSGleb Smirnoff return (uma_zalloc_arg(zone_pack, &args, how)); 13905e4bc63bSGleb Smirnoff 13915e4bc63bSGleb Smirnoff if (size > MJUMPAGESIZE) 13925e4bc63bSGleb Smirnoff return (NULL); 13935e4bc63bSGleb Smirnoff 13945e4bc63bSGleb Smirnoff m = uma_zalloc_arg(zone_mbuf, &args, how); 13955e4bc63bSGleb Smirnoff if (m == NULL) 13965e4bc63bSGleb Smirnoff return (NULL); 13975e4bc63bSGleb Smirnoff 13985e4bc63bSGleb Smirnoff n = uma_zalloc_arg(zone_jumbop, m, how); 13995e4bc63bSGleb Smirnoff if (n == NULL) { 14005e4bc63bSGleb Smirnoff uma_zfree(zone_mbuf, m); 14015e4bc63bSGleb Smirnoff return (NULL); 14025e4bc63bSGleb Smirnoff } 14035e4bc63bSGleb Smirnoff 14045e4bc63bSGleb Smirnoff return (m); 14055e4bc63bSGleb Smirnoff } 14065e4bc63bSGleb Smirnoff 14075e4bc63bSGleb Smirnoff /* 14085e4bc63bSGleb Smirnoff * m_getjcl() returns an mbuf with a cluster of the specified size attached. 14095e4bc63bSGleb Smirnoff * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 14105e4bc63bSGleb Smirnoff */ 14115e4bc63bSGleb Smirnoff struct mbuf * 14125e4bc63bSGleb Smirnoff m_getjcl(int how, short type, int flags, int size) 14135e4bc63bSGleb Smirnoff { 14145e4bc63bSGleb Smirnoff struct mb_args args; 14155e4bc63bSGleb Smirnoff struct mbuf *m, *n; 14165e4bc63bSGleb Smirnoff uma_zone_t zone; 14175e4bc63bSGleb Smirnoff 14185e4bc63bSGleb Smirnoff if (size == MCLBYTES) 14195e4bc63bSGleb Smirnoff return m_getcl(how, type, flags); 14205e4bc63bSGleb Smirnoff 14215e4bc63bSGleb Smirnoff args.flags = flags; 14225e4bc63bSGleb Smirnoff args.type = type; 14235e4bc63bSGleb Smirnoff 14245e4bc63bSGleb Smirnoff m = uma_zalloc_arg(zone_mbuf, &args, how); 14255e4bc63bSGleb Smirnoff if (m == NULL) 14265e4bc63bSGleb Smirnoff return (NULL); 14275e4bc63bSGleb Smirnoff 14285e4bc63bSGleb Smirnoff zone = m_getzone(size); 14295e4bc63bSGleb Smirnoff n = uma_zalloc_arg(zone, m, how); 14305e4bc63bSGleb Smirnoff if (n == NULL) { 14315e4bc63bSGleb Smirnoff uma_zfree(zone_mbuf, m); 14325e4bc63bSGleb Smirnoff return (NULL); 14335e4bc63bSGleb Smirnoff } 14345e4bc63bSGleb Smirnoff return (m); 14355e4bc63bSGleb Smirnoff } 14365e4bc63bSGleb Smirnoff 14375e4bc63bSGleb Smirnoff /* 14385e4bc63bSGleb Smirnoff * Allocate a given length worth of mbufs and/or clusters (whatever fits 14395e4bc63bSGleb Smirnoff * best) and return a pointer to the top of the allocated chain. If an 14405e4bc63bSGleb Smirnoff * existing mbuf chain is provided, then we will append the new chain 144158c43838SAndriy Voskoboinyk * to the existing one and return a pointer to the provided mbuf. 14425e4bc63bSGleb Smirnoff */ 14435e4bc63bSGleb Smirnoff struct mbuf * 14445e4bc63bSGleb Smirnoff m_getm2(struct mbuf *m, int len, int how, short type, int flags) 14455e4bc63bSGleb Smirnoff { 14465e4bc63bSGleb Smirnoff struct mbuf *mb, *nm = NULL, *mtail = NULL; 14475e4bc63bSGleb Smirnoff 14485e4bc63bSGleb Smirnoff KASSERT(len >= 0, ("%s: len is < 0", __func__)); 14495e4bc63bSGleb Smirnoff 14505e4bc63bSGleb Smirnoff /* Validate flags. */ 14515e4bc63bSGleb Smirnoff flags &= (M_PKTHDR | M_EOR); 14525e4bc63bSGleb Smirnoff 14535e4bc63bSGleb Smirnoff /* Packet header mbuf must be first in chain. */ 14545e4bc63bSGleb Smirnoff if ((flags & M_PKTHDR) && m != NULL) 14555e4bc63bSGleb Smirnoff flags &= ~M_PKTHDR; 14565e4bc63bSGleb Smirnoff 14575e4bc63bSGleb Smirnoff /* Loop and append maximum sized mbufs to the chain tail. */ 14585e4bc63bSGleb Smirnoff while (len > 0) { 14595e4bc63bSGleb Smirnoff if (len > MCLBYTES) 14605e4bc63bSGleb Smirnoff mb = m_getjcl(how, type, (flags & M_PKTHDR), 14615e4bc63bSGleb Smirnoff MJUMPAGESIZE); 14625e4bc63bSGleb Smirnoff else if (len >= MINCLSIZE) 14635e4bc63bSGleb Smirnoff mb = m_getcl(how, type, (flags & M_PKTHDR)); 14645e4bc63bSGleb Smirnoff else if (flags & M_PKTHDR) 14655e4bc63bSGleb Smirnoff mb = m_gethdr(how, type); 14665e4bc63bSGleb Smirnoff else 14675e4bc63bSGleb Smirnoff mb = m_get(how, type); 14685e4bc63bSGleb Smirnoff 14695e4bc63bSGleb Smirnoff /* Fail the whole operation if one mbuf can't be allocated. */ 14705e4bc63bSGleb Smirnoff if (mb == NULL) { 14715e4bc63bSGleb Smirnoff if (nm != NULL) 14725e4bc63bSGleb Smirnoff m_freem(nm); 14735e4bc63bSGleb Smirnoff return (NULL); 14745e4bc63bSGleb Smirnoff } 14755e4bc63bSGleb Smirnoff 14765e4bc63bSGleb Smirnoff /* Book keeping. */ 14775e4bc63bSGleb Smirnoff len -= M_SIZE(mb); 14785e4bc63bSGleb Smirnoff if (mtail != NULL) 14795e4bc63bSGleb Smirnoff mtail->m_next = mb; 14805e4bc63bSGleb Smirnoff else 14815e4bc63bSGleb Smirnoff nm = mb; 14825e4bc63bSGleb Smirnoff mtail = mb; 14835e4bc63bSGleb Smirnoff flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 14845e4bc63bSGleb Smirnoff } 14855e4bc63bSGleb Smirnoff if (flags & M_EOR) 14865e4bc63bSGleb Smirnoff mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ 14875e4bc63bSGleb Smirnoff 14885e4bc63bSGleb Smirnoff /* If mbuf was supplied, append new chain to the end of it. */ 14895e4bc63bSGleb Smirnoff if (m != NULL) { 14905e4bc63bSGleb Smirnoff for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) 14915e4bc63bSGleb Smirnoff ; 14925e4bc63bSGleb Smirnoff mtail->m_next = nm; 14935e4bc63bSGleb Smirnoff mtail->m_flags &= ~M_EOR; 14945e4bc63bSGleb Smirnoff } else 14955e4bc63bSGleb Smirnoff m = nm; 14965e4bc63bSGleb Smirnoff 14975e4bc63bSGleb Smirnoff return (m); 14985e4bc63bSGleb Smirnoff } 14995e4bc63bSGleb Smirnoff 15005e4bc63bSGleb Smirnoff /*- 15015e4bc63bSGleb Smirnoff * Configure a provided mbuf to refer to the provided external storage 150256a5f52eSGleb Smirnoff * buffer and setup a reference count for said buffer. 15035e4bc63bSGleb Smirnoff * 15045e4bc63bSGleb Smirnoff * Arguments: 15055e4bc63bSGleb Smirnoff * mb The existing mbuf to which to attach the provided buffer. 15065e4bc63bSGleb Smirnoff * buf The address of the provided external storage buffer. 15075e4bc63bSGleb Smirnoff * size The size of the provided buffer. 15085e4bc63bSGleb Smirnoff * freef A pointer to a routine that is responsible for freeing the 15095e4bc63bSGleb Smirnoff * provided external storage buffer. 15105e4bc63bSGleb Smirnoff * args A pointer to an argument structure (of any type) to be passed 15115e4bc63bSGleb Smirnoff * to the provided freef routine (may be NULL). 15125e4bc63bSGleb Smirnoff * flags Any other flags to be passed to the provided mbuf. 15135e4bc63bSGleb Smirnoff * type The type that the external storage buffer should be 15145e4bc63bSGleb Smirnoff * labeled with. 15155e4bc63bSGleb Smirnoff * 15165e4bc63bSGleb Smirnoff * Returns: 15175e4bc63bSGleb Smirnoff * Nothing. 15185e4bc63bSGleb Smirnoff */ 151956a5f52eSGleb Smirnoff void 1520e8fd18f3SGleb Smirnoff m_extadd(struct mbuf *mb, char *buf, u_int size, m_ext_free_t freef, 1521e8fd18f3SGleb Smirnoff void *arg1, void *arg2, int flags, int type) 15225e4bc63bSGleb Smirnoff { 152356a5f52eSGleb Smirnoff 15245e4bc63bSGleb Smirnoff KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); 15255e4bc63bSGleb Smirnoff 15265e4bc63bSGleb Smirnoff mb->m_flags |= (M_EXT | flags); 15275e4bc63bSGleb Smirnoff mb->m_ext.ext_buf = buf; 15285e4bc63bSGleb Smirnoff mb->m_data = mb->m_ext.ext_buf; 15295e4bc63bSGleb Smirnoff mb->m_ext.ext_size = size; 15305e4bc63bSGleb Smirnoff mb->m_ext.ext_free = freef; 15315e4bc63bSGleb Smirnoff mb->m_ext.ext_arg1 = arg1; 15325e4bc63bSGleb Smirnoff mb->m_ext.ext_arg2 = arg2; 15335e4bc63bSGleb Smirnoff mb->m_ext.ext_type = type; 15345e4bc63bSGleb Smirnoff 153556a5f52eSGleb Smirnoff if (type != EXT_EXTREF) { 153656a5f52eSGleb Smirnoff mb->m_ext.ext_count = 1; 153756a5f52eSGleb Smirnoff mb->m_ext.ext_flags = EXT_FLAG_EMBREF; 153856a5f52eSGleb Smirnoff } else 153956a5f52eSGleb Smirnoff mb->m_ext.ext_flags = 0; 15405e4bc63bSGleb Smirnoff } 15415e4bc63bSGleb Smirnoff 15425e4bc63bSGleb Smirnoff /* 15435e4bc63bSGleb Smirnoff * Free an entire chain of mbufs and associated external buffers, if 15445e4bc63bSGleb Smirnoff * applicable. 15455e4bc63bSGleb Smirnoff */ 15465e4bc63bSGleb Smirnoff void 15475e4bc63bSGleb Smirnoff m_freem(struct mbuf *mb) 15485e4bc63bSGleb Smirnoff { 15495e4bc63bSGleb Smirnoff 1550c8f59118SGleb Smirnoff MBUF_PROBE1(m__freem, mb); 15515e4bc63bSGleb Smirnoff while (mb != NULL) 15525e4bc63bSGleb Smirnoff mb = m_free(mb); 15535e4bc63bSGleb Smirnoff } 1554fb3bc596SJohn Baldwin 1555fb3bc596SJohn Baldwin void 1556fb3bc596SJohn Baldwin m_snd_tag_init(struct m_snd_tag *mst, struct ifnet *ifp) 1557fb3bc596SJohn Baldwin { 1558fb3bc596SJohn Baldwin 1559fb3bc596SJohn Baldwin if_ref(ifp); 1560fb3bc596SJohn Baldwin mst->ifp = ifp; 1561fb3bc596SJohn Baldwin refcount_init(&mst->refcount, 1); 1562fb3bc596SJohn Baldwin counter_u64_add(snd_tag_count, 1); 1563fb3bc596SJohn Baldwin } 1564fb3bc596SJohn Baldwin 1565fb3bc596SJohn Baldwin void 1566fb3bc596SJohn Baldwin m_snd_tag_destroy(struct m_snd_tag *mst) 1567fb3bc596SJohn Baldwin { 1568fb3bc596SJohn Baldwin struct ifnet *ifp; 1569fb3bc596SJohn Baldwin 1570fb3bc596SJohn Baldwin ifp = mst->ifp; 1571fb3bc596SJohn Baldwin ifp->if_snd_tag_free(mst); 1572fb3bc596SJohn Baldwin if_rele(ifp); 1573fb3bc596SJohn Baldwin counter_u64_add(snd_tag_count, -1); 1574fb3bc596SJohn Baldwin } 1575