1099a0e58SBosko Milekic /*- 28076cb52SBosko Milekic * Copyright (c) 2004, 2005, 38076cb52SBosko Milekic * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4099a0e58SBosko Milekic * 5099a0e58SBosko Milekic * Redistribution and use in source and binary forms, with or without 6099a0e58SBosko Milekic * modification, are permitted provided that the following conditions 7099a0e58SBosko Milekic * are met: 8099a0e58SBosko Milekic * 1. Redistributions of source code must retain the above copyright 9099a0e58SBosko Milekic * notice unmodified, this list of conditions and the following 10099a0e58SBosko Milekic * disclaimer. 11099a0e58SBosko Milekic * 2. Redistributions in binary form must reproduce the above copyright 12099a0e58SBosko Milekic * notice, this list of conditions and the following disclaimer in the 13099a0e58SBosko Milekic * documentation and/or other materials provided with the distribution. 14099a0e58SBosko Milekic * 15099a0e58SBosko Milekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16099a0e58SBosko Milekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17099a0e58SBosko Milekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18099a0e58SBosko Milekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19099a0e58SBosko Milekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20099a0e58SBosko Milekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21099a0e58SBosko Milekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22099a0e58SBosko Milekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23099a0e58SBosko Milekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24099a0e58SBosko Milekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25099a0e58SBosko Milekic * SUCH DAMAGE. 26099a0e58SBosko Milekic */ 27099a0e58SBosko Milekic 28099a0e58SBosko Milekic #include <sys/cdefs.h> 29099a0e58SBosko Milekic __FBSDID("$FreeBSD$"); 30099a0e58SBosko Milekic 31099a0e58SBosko Milekic #include "opt_param.h" 32099a0e58SBosko Milekic 33099a0e58SBosko Milekic #include <sys/param.h> 34099a0e58SBosko Milekic #include <sys/malloc.h> 35099a0e58SBosko Milekic #include <sys/systm.h> 36099a0e58SBosko Milekic #include <sys/mbuf.h> 37099a0e58SBosko Milekic #include <sys/domain.h> 38099a0e58SBosko Milekic #include <sys/eventhandler.h> 39099a0e58SBosko Milekic #include <sys/kernel.h> 40099a0e58SBosko Milekic #include <sys/protosw.h> 41099a0e58SBosko Milekic #include <sys/smp.h> 42099a0e58SBosko Milekic #include <sys/sysctl.h> 43099a0e58SBosko Milekic 44aed55708SRobert Watson #include <security/mac/mac_framework.h> 45aed55708SRobert Watson 46099a0e58SBosko Milekic #include <vm/vm.h> 47c45c0034SAlan Cox #include <vm/vm_extern.h> 48c45c0034SAlan Cox #include <vm/vm_kern.h> 49099a0e58SBosko Milekic #include <vm/vm_page.h> 50099a0e58SBosko Milekic #include <vm/uma.h> 51121f0509SMike Silbersack #include <vm/uma_int.h> 52121f0509SMike Silbersack #include <vm/uma_dbg.h> 53099a0e58SBosko Milekic 54099a0e58SBosko Milekic /* 55099a0e58SBosko Milekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 56099a0e58SBosko Milekic * Zones. 57099a0e58SBosko Milekic * 58099a0e58SBosko Milekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 59099a0e58SBosko Milekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 60099a0e58SBosko Milekic * administrator so desires. 61099a0e58SBosko Milekic * 62099a0e58SBosko Milekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 63099a0e58SBosko Milekic * Zone. 64099a0e58SBosko Milekic * 65099a0e58SBosko Milekic * Additionally, FreeBSD provides a Packet Zone, which it 66099a0e58SBosko Milekic * configures as a Secondary Zone to the Mbuf Master Zone, 67099a0e58SBosko Milekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 68099a0e58SBosko Milekic * 69099a0e58SBosko Milekic * Thus common-case allocations and locking are simplified: 70099a0e58SBosko Milekic * 71099a0e58SBosko Milekic * m_clget() m_getcl() 72099a0e58SBosko Milekic * | | 73099a0e58SBosko Milekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 74099a0e58SBosko Milekic * | | [ Packet ] | 75099a0e58SBosko Milekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 76099a0e58SBosko Milekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 77099a0e58SBosko Milekic * | \________ | 78099a0e58SBosko Milekic * [ Cluster Keg ] \ / 79099a0e58SBosko Milekic * | [ Mbuf Keg ] 80099a0e58SBosko Milekic * [ Cluster Slabs ] | 81099a0e58SBosko Milekic * | [ Mbuf Slabs ] 82099a0e58SBosko Milekic * \____________(VM)_________________/ 8356a4e45aSAndre Oppermann * 8456a4e45aSAndre Oppermann * 85fcf90618SGleb Smirnoff * Whenever an object is allocated with uma_zalloc() out of 8656a4e45aSAndre Oppermann * one of the Zones its _ctor_ function is executed. The same 87fcf90618SGleb Smirnoff * for any deallocation through uma_zfree() the _dtor_ function 8856a4e45aSAndre Oppermann * is executed. 8956a4e45aSAndre Oppermann * 9056a4e45aSAndre Oppermann * Caches are per-CPU and are filled from the Master Zone. 9156a4e45aSAndre Oppermann * 92fcf90618SGleb Smirnoff * Whenever an object is allocated from the underlying global 9356a4e45aSAndre Oppermann * memory pool it gets pre-initialized with the _zinit_ functions. 9456a4e45aSAndre Oppermann * When the Keg's are overfull objects get decomissioned with 9556a4e45aSAndre Oppermann * _zfini_ functions and free'd back to the global memory pool. 9656a4e45aSAndre Oppermann * 97099a0e58SBosko Milekic */ 98099a0e58SBosko Milekic 9956a4e45aSAndre Oppermann int nmbclusters; /* limits number of mbuf clusters */ 100ec63cb90SAndre Oppermann int nmbjumbop; /* limits number of page size jumbo clusters */ 10156a4e45aSAndre Oppermann int nmbjumbo9; /* limits number of 9k jumbo clusters */ 10256a4e45aSAndre Oppermann int nmbjumbo16; /* limits number of 16k jumbo clusters */ 103099a0e58SBosko Milekic struct mbstat mbstat; 104099a0e58SBosko Milekic 10562938659SBjoern A. Zeeb /* 10662938659SBjoern A. Zeeb * tunable_mbinit() has to be run before init_maxsockets() thus 10762938659SBjoern A. Zeeb * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets() 10862938659SBjoern A. Zeeb * runs at SI_ORDER_ANY. 10962938659SBjoern A. Zeeb */ 110099a0e58SBosko Milekic static void 111099a0e58SBosko Milekic tunable_mbinit(void *dummy) 112099a0e58SBosko Milekic { 1138aa7a581SKip Macy TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 114099a0e58SBosko Milekic 115099a0e58SBosko Milekic /* This has to be done before VM init. */ 1168aa7a581SKip Macy if (nmbclusters == 0) 117099a0e58SBosko Milekic nmbclusters = 1024 + maxusers * 64; 118cf70a46bSRandall Stewart nmbjumbop = nmbclusters / 2; 119cf70a46bSRandall Stewart nmbjumbo9 = nmbjumbop / 2; 120cf70a46bSRandall Stewart nmbjumbo16 = nmbjumbo9 / 2; 121099a0e58SBosko Milekic } 12262938659SBjoern A. Zeeb SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 123099a0e58SBosko Milekic 1244f590175SPaul Saab static int 1254f590175SPaul Saab sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 1264f590175SPaul Saab { 1274f590175SPaul Saab int error, newnmbclusters; 1284f590175SPaul Saab 1294f590175SPaul Saab newnmbclusters = nmbclusters; 130041b706bSDavid Malone error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 1314f590175SPaul Saab if (error == 0 && req->newptr) { 1324f590175SPaul Saab if (newnmbclusters > nmbclusters) { 1334f590175SPaul Saab nmbclusters = newnmbclusters; 1344f590175SPaul Saab uma_zone_set_max(zone_clust, nmbclusters); 1354f590175SPaul Saab EVENTHANDLER_INVOKE(nmbclusters_change); 1364f590175SPaul Saab } else 1374f590175SPaul Saab error = EINVAL; 1384f590175SPaul Saab } 1394f590175SPaul Saab return (error); 1404f590175SPaul Saab } 1414f590175SPaul Saab SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 1424f590175SPaul Saab &nmbclusters, 0, sysctl_nmbclusters, "IU", 143099a0e58SBosko Milekic "Maximum number of mbuf clusters allowed"); 144cf70a46bSRandall Stewart 145cf70a46bSRandall Stewart static int 146cf70a46bSRandall Stewart sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 147cf70a46bSRandall Stewart { 148cf70a46bSRandall Stewart int error, newnmbjumbop; 149cf70a46bSRandall Stewart 150cf70a46bSRandall Stewart newnmbjumbop = nmbjumbop; 151cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 152cf70a46bSRandall Stewart if (error == 0 && req->newptr) { 153cf70a46bSRandall Stewart if (newnmbjumbop> nmbjumbop) { 154cf70a46bSRandall Stewart nmbjumbop = newnmbjumbop; 155cf70a46bSRandall Stewart uma_zone_set_max(zone_jumbop, nmbjumbop); 156cf70a46bSRandall Stewart } else 157cf70a46bSRandall Stewart error = EINVAL; 158cf70a46bSRandall Stewart } 159cf70a46bSRandall Stewart return (error); 160cf70a46bSRandall Stewart } 161cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 162cf70a46bSRandall Stewart &nmbjumbop, 0, sysctl_nmbjumbop, "IU", 163ec63cb90SAndre Oppermann "Maximum number of mbuf page size jumbo clusters allowed"); 164cf70a46bSRandall Stewart 165cf70a46bSRandall Stewart 166cf70a46bSRandall Stewart static int 167cf70a46bSRandall Stewart sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 168cf70a46bSRandall Stewart { 169cf70a46bSRandall Stewart int error, newnmbjumbo9; 170cf70a46bSRandall Stewart 171cf70a46bSRandall Stewart newnmbjumbo9 = nmbjumbo9; 172cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 173cf70a46bSRandall Stewart if (error == 0 && req->newptr) { 174cf70a46bSRandall Stewart if (newnmbjumbo9> nmbjumbo9) { 175cf70a46bSRandall Stewart nmbjumbo9 = newnmbjumbo9; 176cf70a46bSRandall Stewart uma_zone_set_max(zone_jumbo9, nmbjumbo9); 177cf70a46bSRandall Stewart } else 178cf70a46bSRandall Stewart error = EINVAL; 179cf70a46bSRandall Stewart } 180cf70a46bSRandall Stewart return (error); 181cf70a46bSRandall Stewart } 182cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 183cf70a46bSRandall Stewart &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 18456a4e45aSAndre Oppermann "Maximum number of mbuf 9k jumbo clusters allowed"); 185cf70a46bSRandall Stewart 186cf70a46bSRandall Stewart static int 187cf70a46bSRandall Stewart sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 188cf70a46bSRandall Stewart { 189cf70a46bSRandall Stewart int error, newnmbjumbo16; 190cf70a46bSRandall Stewart 191cf70a46bSRandall Stewart newnmbjumbo16 = nmbjumbo16; 192cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 193cf70a46bSRandall Stewart if (error == 0 && req->newptr) { 194cf70a46bSRandall Stewart if (newnmbjumbo16> nmbjumbo16) { 195cf70a46bSRandall Stewart nmbjumbo16 = newnmbjumbo16; 196cf70a46bSRandall Stewart uma_zone_set_max(zone_jumbo16, nmbjumbo16); 197cf70a46bSRandall Stewart } else 198cf70a46bSRandall Stewart error = EINVAL; 199cf70a46bSRandall Stewart } 200cf70a46bSRandall Stewart return (error); 201cf70a46bSRandall Stewart } 202cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 203cf70a46bSRandall Stewart &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 20456a4e45aSAndre Oppermann "Maximum number of mbuf 16k jumbo clusters allowed"); 205cf70a46bSRandall Stewart 206cf70a46bSRandall Stewart 207cf70a46bSRandall Stewart 208099a0e58SBosko Milekic SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, 209099a0e58SBosko Milekic "Mbuf general information and statistics"); 210099a0e58SBosko Milekic 211099a0e58SBosko Milekic /* 212099a0e58SBosko Milekic * Zones from which we allocate. 213099a0e58SBosko Milekic */ 214099a0e58SBosko Milekic uma_zone_t zone_mbuf; 215099a0e58SBosko Milekic uma_zone_t zone_clust; 216099a0e58SBosko Milekic uma_zone_t zone_pack; 217ec63cb90SAndre Oppermann uma_zone_t zone_jumbop; 21856a4e45aSAndre Oppermann uma_zone_t zone_jumbo9; 21956a4e45aSAndre Oppermann uma_zone_t zone_jumbo16; 22056a4e45aSAndre Oppermann uma_zone_t zone_ext_refcnt; 221099a0e58SBosko Milekic 222099a0e58SBosko Milekic /* 223099a0e58SBosko Milekic * Local prototypes. 224099a0e58SBosko Milekic */ 225b23f72e9SBrian Feldman static int mb_ctor_mbuf(void *, int, void *, int); 226b23f72e9SBrian Feldman static int mb_ctor_clust(void *, int, void *, int); 227b23f72e9SBrian Feldman static int mb_ctor_pack(void *, int, void *, int); 228099a0e58SBosko Milekic static void mb_dtor_mbuf(void *, int, void *); 22956a4e45aSAndre Oppermann static void mb_dtor_clust(void *, int, void *); 23056a4e45aSAndre Oppermann static void mb_dtor_pack(void *, int, void *); 23156a4e45aSAndre Oppermann static int mb_zinit_pack(void *, int, int); 23256a4e45aSAndre Oppermann static void mb_zfini_pack(void *, int); 233099a0e58SBosko Milekic 234099a0e58SBosko Milekic static void mb_reclaim(void *); 235099a0e58SBosko Milekic static void mbuf_init(void *); 236ba63339aSAlan Cox static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); 237099a0e58SBosko Milekic 238a04946cfSBrian Somers /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ 239a04946cfSBrian Somers CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 240a04946cfSBrian Somers 241099a0e58SBosko Milekic /* 242099a0e58SBosko Milekic * Initialize FreeBSD Network buffer allocation. 243099a0e58SBosko Milekic */ 244237fdd78SRobert Watson SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 245099a0e58SBosko Milekic static void 246099a0e58SBosko Milekic mbuf_init(void *dummy) 247099a0e58SBosko Milekic { 248099a0e58SBosko Milekic 249099a0e58SBosko Milekic /* 250099a0e58SBosko Milekic * Configure UMA zones for Mbufs, Clusters, and Packets. 251099a0e58SBosko Milekic */ 25256a4e45aSAndre Oppermann zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 25356a4e45aSAndre Oppermann mb_ctor_mbuf, mb_dtor_mbuf, 254121f0509SMike Silbersack #ifdef INVARIANTS 25556a4e45aSAndre Oppermann trash_init, trash_fini, 256121f0509SMike Silbersack #else 25756a4e45aSAndre Oppermann NULL, NULL, 258121f0509SMike Silbersack #endif 25956a4e45aSAndre Oppermann MSIZE - 1, UMA_ZONE_MAXBUCKET); 26056a4e45aSAndre Oppermann 26168352adfSRobert Watson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 26256a4e45aSAndre Oppermann mb_ctor_clust, mb_dtor_clust, 263121f0509SMike Silbersack #ifdef INVARIANTS 26456a4e45aSAndre Oppermann trash_init, trash_fini, 265121f0509SMike Silbersack #else 26656a4e45aSAndre Oppermann NULL, NULL, 267121f0509SMike Silbersack #endif 26856a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 269099a0e58SBosko Milekic if (nmbclusters > 0) 270099a0e58SBosko Milekic uma_zone_set_max(zone_clust, nmbclusters); 271099a0e58SBosko Milekic 27256a4e45aSAndre Oppermann zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 27356a4e45aSAndre Oppermann mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 27456a4e45aSAndre Oppermann 275fcf90618SGleb Smirnoff /* Make jumbo frame zone too. Page size, 9k and 16k. */ 276ec63cb90SAndre Oppermann zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 277d5269a63SAndre Oppermann mb_ctor_clust, mb_dtor_clust, 278d5269a63SAndre Oppermann #ifdef INVARIANTS 279d5269a63SAndre Oppermann trash_init, trash_fini, 280d5269a63SAndre Oppermann #else 281d5269a63SAndre Oppermann NULL, NULL, 282d5269a63SAndre Oppermann #endif 283d5269a63SAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 284ec63cb90SAndre Oppermann if (nmbjumbop > 0) 285ec63cb90SAndre Oppermann uma_zone_set_max(zone_jumbop, nmbjumbop); 286d5269a63SAndre Oppermann 28756a4e45aSAndre Oppermann zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 28856a4e45aSAndre Oppermann mb_ctor_clust, mb_dtor_clust, 28956a4e45aSAndre Oppermann #ifdef INVARIANTS 29056a4e45aSAndre Oppermann trash_init, trash_fini, 29156a4e45aSAndre Oppermann #else 29256a4e45aSAndre Oppermann NULL, NULL, 29356a4e45aSAndre Oppermann #endif 29456a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 29556a4e45aSAndre Oppermann if (nmbjumbo9 > 0) 29656a4e45aSAndre Oppermann uma_zone_set_max(zone_jumbo9, nmbjumbo9); 297ba63339aSAlan Cox uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 29856a4e45aSAndre Oppermann 29956a4e45aSAndre Oppermann zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 30056a4e45aSAndre Oppermann mb_ctor_clust, mb_dtor_clust, 30156a4e45aSAndre Oppermann #ifdef INVARIANTS 30256a4e45aSAndre Oppermann trash_init, trash_fini, 30356a4e45aSAndre Oppermann #else 30456a4e45aSAndre Oppermann NULL, NULL, 30556a4e45aSAndre Oppermann #endif 30656a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 30756a4e45aSAndre Oppermann if (nmbjumbo16 > 0) 30856a4e45aSAndre Oppermann uma_zone_set_max(zone_jumbo16, nmbjumbo16); 309ba63339aSAlan Cox uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 31056a4e45aSAndre Oppermann 31156a4e45aSAndre Oppermann zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 31256a4e45aSAndre Oppermann NULL, NULL, 31356a4e45aSAndre Oppermann NULL, NULL, 31456a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 31556a4e45aSAndre Oppermann 31656a4e45aSAndre Oppermann /* uma_prealloc() goes here... */ 317099a0e58SBosko Milekic 318099a0e58SBosko Milekic /* 319099a0e58SBosko Milekic * Hook event handler for low-memory situation, used to 320099a0e58SBosko Milekic * drain protocols and push data back to the caches (UMA 321099a0e58SBosko Milekic * later pushes it back to VM). 322099a0e58SBosko Milekic */ 323099a0e58SBosko Milekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 324099a0e58SBosko Milekic EVENTHANDLER_PRI_FIRST); 325099a0e58SBosko Milekic 326099a0e58SBosko Milekic /* 327099a0e58SBosko Milekic * [Re]set counters and local statistics knobs. 328099a0e58SBosko Milekic * XXX Some of these should go and be replaced, but UMA stat 329099a0e58SBosko Milekic * gathering needs to be revised. 330099a0e58SBosko Milekic */ 331099a0e58SBosko Milekic mbstat.m_mbufs = 0; 332099a0e58SBosko Milekic mbstat.m_mclusts = 0; 333099a0e58SBosko Milekic mbstat.m_drain = 0; 334099a0e58SBosko Milekic mbstat.m_msize = MSIZE; 335099a0e58SBosko Milekic mbstat.m_mclbytes = MCLBYTES; 336099a0e58SBosko Milekic mbstat.m_minclsize = MINCLSIZE; 337099a0e58SBosko Milekic mbstat.m_mlen = MLEN; 338099a0e58SBosko Milekic mbstat.m_mhlen = MHLEN; 339099a0e58SBosko Milekic mbstat.m_numtypes = MT_NTYPES; 340099a0e58SBosko Milekic 341099a0e58SBosko Milekic mbstat.m_mcfail = mbstat.m_mpfail = 0; 342099a0e58SBosko Milekic mbstat.sf_iocnt = 0; 343099a0e58SBosko Milekic mbstat.sf_allocwait = mbstat.sf_allocfail = 0; 344099a0e58SBosko Milekic } 345099a0e58SBosko Milekic 346099a0e58SBosko Milekic /* 347ba63339aSAlan Cox * UMA backend page allocator for the jumbo frame zones. 348ba63339aSAlan Cox * 349ba63339aSAlan Cox * Allocates kernel virtual memory that is backed by contiguous physical 350ba63339aSAlan Cox * pages. 351ba63339aSAlan Cox */ 352ba63339aSAlan Cox static void * 353ba63339aSAlan Cox mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 354ba63339aSAlan Cox { 355ba63339aSAlan Cox 3567630c265SAlan Cox /* Inform UMA that this allocator uses kernel_map/object. */ 3577630c265SAlan Cox *flags = UMA_SLAB_KERNEL; 358c45c0034SAlan Cox return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 359c45c0034SAlan Cox (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0)); 360ba63339aSAlan Cox } 361ba63339aSAlan Cox 362ba63339aSAlan Cox /* 363099a0e58SBosko Milekic * Constructor for Mbuf master zone. 364099a0e58SBosko Milekic * 365099a0e58SBosko Milekic * The 'arg' pointer points to a mb_args structure which 366099a0e58SBosko Milekic * contains call-specific information required to support the 36756a4e45aSAndre Oppermann * mbuf allocation API. See mbuf.h. 368099a0e58SBosko Milekic */ 369b23f72e9SBrian Feldman static int 370b23f72e9SBrian Feldman mb_ctor_mbuf(void *mem, int size, void *arg, int how) 371099a0e58SBosko Milekic { 372099a0e58SBosko Milekic struct mbuf *m; 373099a0e58SBosko Milekic struct mb_args *args; 374b23f72e9SBrian Feldman #ifdef MAC 375b23f72e9SBrian Feldman int error; 376b23f72e9SBrian Feldman #endif 377099a0e58SBosko Milekic int flags; 378099a0e58SBosko Milekic short type; 379099a0e58SBosko Milekic 380121f0509SMike Silbersack #ifdef INVARIANTS 381121f0509SMike Silbersack trash_ctor(mem, size, arg, how); 382121f0509SMike Silbersack #endif 383099a0e58SBosko Milekic m = (struct mbuf *)mem; 384099a0e58SBosko Milekic args = (struct mb_args *)arg; 385099a0e58SBosko Milekic flags = args->flags; 386099a0e58SBosko Milekic type = args->type; 387099a0e58SBosko Milekic 38856a4e45aSAndre Oppermann /* 38956a4e45aSAndre Oppermann * The mbuf is initialized later. The caller has the 390fcf90618SGleb Smirnoff * responsibility to set up any MAC labels too. 39156a4e45aSAndre Oppermann */ 39256a4e45aSAndre Oppermann if (type == MT_NOINIT) 39356a4e45aSAndre Oppermann return (0); 39456a4e45aSAndre Oppermann 395099a0e58SBosko Milekic m->m_next = NULL; 396099a0e58SBosko Milekic m->m_nextpkt = NULL; 39756a4e45aSAndre Oppermann m->m_len = 0; 3986bc72ab9SBosko Milekic m->m_flags = flags; 39956a4e45aSAndre Oppermann m->m_type = type; 400099a0e58SBosko Milekic if (flags & M_PKTHDR) { 401099a0e58SBosko Milekic m->m_data = m->m_pktdat; 402099a0e58SBosko Milekic m->m_pkthdr.rcvif = NULL; 40356a4e45aSAndre Oppermann m->m_pkthdr.header = NULL; 4048aa7a581SKip Macy m->m_pkthdr.len = 0; 405099a0e58SBosko Milekic m->m_pkthdr.csum_flags = 0; 40656a4e45aSAndre Oppermann m->m_pkthdr.csum_data = 0; 407a855e2b4SAndre Oppermann m->m_pkthdr.tso_segsz = 0; 408a855e2b4SAndre Oppermann m->m_pkthdr.ether_vtag = 0; 409877e8812SRobert Watson m->m_pkthdr.flowid = 0; 410099a0e58SBosko Milekic SLIST_INIT(&m->m_pkthdr.tags); 411099a0e58SBosko Milekic #ifdef MAC 412099a0e58SBosko Milekic /* If the label init fails, fail the alloc */ 41330d239bcSRobert Watson error = mac_mbuf_init(m, how); 414b23f72e9SBrian Feldman if (error) 415b23f72e9SBrian Feldman return (error); 416099a0e58SBosko Milekic #endif 4176bc72ab9SBosko Milekic } else 418099a0e58SBosko Milekic m->m_data = m->m_dat; 419b23f72e9SBrian Feldman return (0); 420099a0e58SBosko Milekic } 421099a0e58SBosko Milekic 422099a0e58SBosko Milekic /* 42356a4e45aSAndre Oppermann * The Mbuf master zone destructor. 424099a0e58SBosko Milekic */ 425099a0e58SBosko Milekic static void 426099a0e58SBosko Milekic mb_dtor_mbuf(void *mem, int size, void *arg) 427099a0e58SBosko Milekic { 428099a0e58SBosko Milekic struct mbuf *m; 429629b9e08SKip Macy unsigned long flags; 430099a0e58SBosko Milekic 431099a0e58SBosko Milekic m = (struct mbuf *)mem; 432629b9e08SKip Macy flags = (unsigned long)arg; 433629b9e08SKip Macy 434629b9e08SKip Macy if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) 435099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 43656a4e45aSAndre Oppermann KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); 437457869b9SKip Macy KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 438121f0509SMike Silbersack #ifdef INVARIANTS 439121f0509SMike Silbersack trash_dtor(mem, size, arg); 440121f0509SMike Silbersack #endif 441099a0e58SBosko Milekic } 442099a0e58SBosko Milekic 44356a4e45aSAndre Oppermann /* 44456a4e45aSAndre Oppermann * The Mbuf Packet zone destructor. 44556a4e45aSAndre Oppermann */ 446099a0e58SBosko Milekic static void 447099a0e58SBosko Milekic mb_dtor_pack(void *mem, int size, void *arg) 448099a0e58SBosko Milekic { 449099a0e58SBosko Milekic struct mbuf *m; 450099a0e58SBosko Milekic 451099a0e58SBosko Milekic m = (struct mbuf *)mem; 452099a0e58SBosko Milekic if ((m->m_flags & M_PKTHDR) != 0) 453099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 45456a4e45aSAndre Oppermann 45556a4e45aSAndre Oppermann /* Make sure we've got a clean cluster back. */ 45656a4e45aSAndre Oppermann KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 45756a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 45856a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 459cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 460cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 46156a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 46249d46b61SGleb Smirnoff KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 46356a4e45aSAndre Oppermann KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); 464121f0509SMike Silbersack #ifdef INVARIANTS 465121f0509SMike Silbersack trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 466121f0509SMike Silbersack #endif 4676c125b8dSMohan Srinivasan /* 468ef44c8d2SDavid E. O'Brien * If there are processes blocked on zone_clust, waiting for pages 469ef44c8d2SDavid E. O'Brien * to be freed up, * cause them to be woken up by draining the 470ef44c8d2SDavid E. O'Brien * packet zone. We are exposed to a race here * (in the check for 471ef44c8d2SDavid E. O'Brien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 472ef44c8d2SDavid E. O'Brien * is deliberate. We don't want to acquire the zone lock for every 473ef44c8d2SDavid E. O'Brien * mbuf free. 4746c125b8dSMohan Srinivasan */ 4756c125b8dSMohan Srinivasan if (uma_zone_exhausted_nolock(zone_clust)) 4766c125b8dSMohan Srinivasan zone_drain(zone_pack); 477099a0e58SBosko Milekic } 478099a0e58SBosko Milekic 479099a0e58SBosko Milekic /* 480ec63cb90SAndre Oppermann * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 481099a0e58SBosko Milekic * 482099a0e58SBosko Milekic * Here the 'arg' pointer points to the Mbuf which we 48356a4e45aSAndre Oppermann * are configuring cluster storage for. If 'arg' is 48456a4e45aSAndre Oppermann * empty we allocate just the cluster without setting 48556a4e45aSAndre Oppermann * the mbuf to it. See mbuf.h. 486099a0e58SBosko Milekic */ 487b23f72e9SBrian Feldman static int 488b23f72e9SBrian Feldman mb_ctor_clust(void *mem, int size, void *arg, int how) 489099a0e58SBosko Milekic { 490099a0e58SBosko Milekic struct mbuf *m; 49156a4e45aSAndre Oppermann u_int *refcnt; 4920f4d9d04SKip Macy int type; 4930f4d9d04SKip Macy uma_zone_t zone; 494099a0e58SBosko Milekic 495121f0509SMike Silbersack #ifdef INVARIANTS 496121f0509SMike Silbersack trash_ctor(mem, size, arg, how); 497121f0509SMike Silbersack #endif 49856a4e45aSAndre Oppermann switch (size) { 49956a4e45aSAndre Oppermann case MCLBYTES: 50056a4e45aSAndre Oppermann type = EXT_CLUSTER; 5010f4d9d04SKip Macy zone = zone_clust; 50256a4e45aSAndre Oppermann break; 503ec63cb90SAndre Oppermann #if MJUMPAGESIZE != MCLBYTES 504ec63cb90SAndre Oppermann case MJUMPAGESIZE: 505ec63cb90SAndre Oppermann type = EXT_JUMBOP; 5060f4d9d04SKip Macy zone = zone_jumbop; 507d5269a63SAndre Oppermann break; 50836ae3fd3SAndre Oppermann #endif 50956a4e45aSAndre Oppermann case MJUM9BYTES: 51056a4e45aSAndre Oppermann type = EXT_JUMBO9; 5110f4d9d04SKip Macy zone = zone_jumbo9; 51256a4e45aSAndre Oppermann break; 51356a4e45aSAndre Oppermann case MJUM16BYTES: 51456a4e45aSAndre Oppermann type = EXT_JUMBO16; 5150f4d9d04SKip Macy zone = zone_jumbo16; 51656a4e45aSAndre Oppermann break; 51756a4e45aSAndre Oppermann default: 51856a4e45aSAndre Oppermann panic("unknown cluster size"); 51956a4e45aSAndre Oppermann break; 52056a4e45aSAndre Oppermann } 5210f4d9d04SKip Macy 5220f4d9d04SKip Macy m = (struct mbuf *)arg; 5230f4d9d04SKip Macy refcnt = uma_find_refcnt(zone, mem); 5240f4d9d04SKip Macy *refcnt = 1; 5250f4d9d04SKip Macy if (m != NULL) { 526099a0e58SBosko Milekic m->m_ext.ext_buf = (caddr_t)mem; 527099a0e58SBosko Milekic m->m_data = m->m_ext.ext_buf; 528099a0e58SBosko Milekic m->m_flags |= M_EXT; 529099a0e58SBosko Milekic m->m_ext.ext_free = NULL; 530cf827063SPoul-Henning Kamp m->m_ext.ext_arg1 = NULL; 531cf827063SPoul-Henning Kamp m->m_ext.ext_arg2 = NULL; 53256a4e45aSAndre Oppermann m->m_ext.ext_size = size; 53356a4e45aSAndre Oppermann m->m_ext.ext_type = type; 5340f4d9d04SKip Macy m->m_ext.ref_cnt = refcnt; 53556a4e45aSAndre Oppermann } 5360f4d9d04SKip Macy 537b23f72e9SBrian Feldman return (0); 538099a0e58SBosko Milekic } 539099a0e58SBosko Milekic 54056a4e45aSAndre Oppermann /* 54156a4e45aSAndre Oppermann * The Mbuf Cluster zone destructor. 54256a4e45aSAndre Oppermann */ 543099a0e58SBosko Milekic static void 544099a0e58SBosko Milekic mb_dtor_clust(void *mem, int size, void *arg) 545099a0e58SBosko Milekic { 546121f0509SMike Silbersack #ifdef INVARIANTS 5470f4d9d04SKip Macy uma_zone_t zone; 5480f4d9d04SKip Macy 5490f4d9d04SKip Macy zone = m_getzone(size); 5500f4d9d04SKip Macy KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, 5510f4d9d04SKip Macy ("%s: refcnt incorrect %u", __func__, 5520f4d9d04SKip Macy *(uma_find_refcnt(zone, mem))) ); 5530f4d9d04SKip Macy 554121f0509SMike Silbersack trash_dtor(mem, size, arg); 555121f0509SMike Silbersack #endif 556099a0e58SBosko Milekic } 557099a0e58SBosko Milekic 558099a0e58SBosko Milekic /* 559099a0e58SBosko Milekic * The Packet secondary zone's init routine, executed on the 56056a4e45aSAndre Oppermann * object's transition from mbuf keg slab to zone cache. 561099a0e58SBosko Milekic */ 562b23f72e9SBrian Feldman static int 56356a4e45aSAndre Oppermann mb_zinit_pack(void *mem, int size, int how) 564099a0e58SBosko Milekic { 565099a0e58SBosko Milekic struct mbuf *m; 566099a0e58SBosko Milekic 56756a4e45aSAndre Oppermann m = (struct mbuf *)mem; /* m is virgin. */ 568a7bd90efSAndre Oppermann if (uma_zalloc_arg(zone_clust, m, how) == NULL || 569a7bd90efSAndre Oppermann m->m_ext.ext_buf == NULL) 570b23f72e9SBrian Feldman return (ENOMEM); 571cd5bb63bSAndre Oppermann m->m_ext.ext_type = EXT_PACKET; /* Override. */ 572121f0509SMike Silbersack #ifdef INVARIANTS 573121f0509SMike Silbersack trash_init(m->m_ext.ext_buf, MCLBYTES, how); 574121f0509SMike Silbersack #endif 575b23f72e9SBrian Feldman return (0); 576099a0e58SBosko Milekic } 577099a0e58SBosko Milekic 578099a0e58SBosko Milekic /* 579099a0e58SBosko Milekic * The Packet secondary zone's fini routine, executed on the 580099a0e58SBosko Milekic * object's transition from zone cache to keg slab. 581099a0e58SBosko Milekic */ 582099a0e58SBosko Milekic static void 58356a4e45aSAndre Oppermann mb_zfini_pack(void *mem, int size) 584099a0e58SBosko Milekic { 585099a0e58SBosko Milekic struct mbuf *m; 586099a0e58SBosko Milekic 587099a0e58SBosko Milekic m = (struct mbuf *)mem; 588121f0509SMike Silbersack #ifdef INVARIANTS 589121f0509SMike Silbersack trash_fini(m->m_ext.ext_buf, MCLBYTES); 590121f0509SMike Silbersack #endif 591099a0e58SBosko Milekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 592a7b844d2SMike Silbersack #ifdef INVARIANTS 593a7b844d2SMike Silbersack trash_dtor(mem, size, NULL); 594a7b844d2SMike Silbersack #endif 595099a0e58SBosko Milekic } 596099a0e58SBosko Milekic 597099a0e58SBosko Milekic /* 598099a0e58SBosko Milekic * The "packet" keg constructor. 599099a0e58SBosko Milekic */ 600b23f72e9SBrian Feldman static int 601b23f72e9SBrian Feldman mb_ctor_pack(void *mem, int size, void *arg, int how) 602099a0e58SBosko Milekic { 603099a0e58SBosko Milekic struct mbuf *m; 604099a0e58SBosko Milekic struct mb_args *args; 605b23f72e9SBrian Feldman #ifdef MAC 606b23f72e9SBrian Feldman int error; 607b23f72e9SBrian Feldman #endif 608b23f72e9SBrian Feldman int flags; 609099a0e58SBosko Milekic short type; 610099a0e58SBosko Milekic 611099a0e58SBosko Milekic m = (struct mbuf *)mem; 612099a0e58SBosko Milekic args = (struct mb_args *)arg; 613099a0e58SBosko Milekic flags = args->flags; 614099a0e58SBosko Milekic type = args->type; 615099a0e58SBosko Milekic 616121f0509SMike Silbersack #ifdef INVARIANTS 617121f0509SMike Silbersack trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 618121f0509SMike Silbersack #endif 619099a0e58SBosko Milekic m->m_next = NULL; 6206bc72ab9SBosko Milekic m->m_nextpkt = NULL; 621099a0e58SBosko Milekic m->m_data = m->m_ext.ext_buf; 62256a4e45aSAndre Oppermann m->m_len = 0; 62356a4e45aSAndre Oppermann m->m_flags = (flags | M_EXT); 62456a4e45aSAndre Oppermann m->m_type = type; 625099a0e58SBosko Milekic 626099a0e58SBosko Milekic if (flags & M_PKTHDR) { 627099a0e58SBosko Milekic m->m_pkthdr.rcvif = NULL; 62856a4e45aSAndre Oppermann m->m_pkthdr.len = 0; 62956a4e45aSAndre Oppermann m->m_pkthdr.header = NULL; 630099a0e58SBosko Milekic m->m_pkthdr.csum_flags = 0; 63156a4e45aSAndre Oppermann m->m_pkthdr.csum_data = 0; 632a855e2b4SAndre Oppermann m->m_pkthdr.tso_segsz = 0; 633a855e2b4SAndre Oppermann m->m_pkthdr.ether_vtag = 0; 634877e8812SRobert Watson m->m_pkthdr.flowid = 0; 635099a0e58SBosko Milekic SLIST_INIT(&m->m_pkthdr.tags); 636099a0e58SBosko Milekic #ifdef MAC 637099a0e58SBosko Milekic /* If the label init fails, fail the alloc */ 63830d239bcSRobert Watson error = mac_mbuf_init(m, how); 639b23f72e9SBrian Feldman if (error) 640b23f72e9SBrian Feldman return (error); 641099a0e58SBosko Milekic #endif 642099a0e58SBosko Milekic } 64356a4e45aSAndre Oppermann /* m_ext is already initialized. */ 64456a4e45aSAndre Oppermann 645b23f72e9SBrian Feldman return (0); 646099a0e58SBosko Milekic } 647099a0e58SBosko Milekic 648099a0e58SBosko Milekic /* 649099a0e58SBosko Milekic * This is the protocol drain routine. 650099a0e58SBosko Milekic * 651099a0e58SBosko Milekic * No locks should be held when this is called. The drain routines have to 652099a0e58SBosko Milekic * presently acquire some locks which raises the possibility of lock order 653099a0e58SBosko Milekic * reversal. 654099a0e58SBosko Milekic */ 655099a0e58SBosko Milekic static void 656099a0e58SBosko Milekic mb_reclaim(void *junk) 657099a0e58SBosko Milekic { 658099a0e58SBosko Milekic struct domain *dp; 659099a0e58SBosko Milekic struct protosw *pr; 660099a0e58SBosko Milekic 661099a0e58SBosko Milekic WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, 662099a0e58SBosko Milekic "mb_reclaim()"); 663099a0e58SBosko Milekic 664099a0e58SBosko Milekic for (dp = domains; dp != NULL; dp = dp->dom_next) 665099a0e58SBosko Milekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 666099a0e58SBosko Milekic if (pr->pr_drain != NULL) 667099a0e58SBosko Milekic (*pr->pr_drain)(); 668099a0e58SBosko Milekic } 669