1099a0e58SBosko Milekic /*- 28076cb52SBosko Milekic * Copyright (c) 2004, 2005, 38076cb52SBosko Milekic * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4099a0e58SBosko Milekic * 5099a0e58SBosko Milekic * Redistribution and use in source and binary forms, with or without 6099a0e58SBosko Milekic * modification, are permitted provided that the following conditions 7099a0e58SBosko Milekic * are met: 8099a0e58SBosko Milekic * 1. Redistributions of source code must retain the above copyright 9099a0e58SBosko Milekic * notice unmodified, this list of conditions and the following 10099a0e58SBosko Milekic * disclaimer. 11099a0e58SBosko Milekic * 2. Redistributions in binary form must reproduce the above copyright 12099a0e58SBosko Milekic * notice, this list of conditions and the following disclaimer in the 13099a0e58SBosko Milekic * documentation and/or other materials provided with the distribution. 14099a0e58SBosko Milekic * 15099a0e58SBosko Milekic * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16099a0e58SBosko Milekic * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17099a0e58SBosko Milekic * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18099a0e58SBosko Milekic * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19099a0e58SBosko Milekic * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20099a0e58SBosko Milekic * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21099a0e58SBosko Milekic * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22099a0e58SBosko Milekic * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23099a0e58SBosko Milekic * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24099a0e58SBosko Milekic * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25099a0e58SBosko Milekic * SUCH DAMAGE. 26099a0e58SBosko Milekic */ 27099a0e58SBosko Milekic 28099a0e58SBosko Milekic #include <sys/cdefs.h> 29099a0e58SBosko Milekic __FBSDID("$FreeBSD$"); 30099a0e58SBosko Milekic 31099a0e58SBosko Milekic #include "opt_mac.h" 32099a0e58SBosko Milekic #include "opt_param.h" 33099a0e58SBosko Milekic 34099a0e58SBosko Milekic #include <sys/param.h> 35099a0e58SBosko Milekic #include <sys/malloc.h> 36099a0e58SBosko Milekic #include <sys/systm.h> 37099a0e58SBosko Milekic #include <sys/mbuf.h> 38099a0e58SBosko Milekic #include <sys/domain.h> 39099a0e58SBosko Milekic #include <sys/eventhandler.h> 40099a0e58SBosko Milekic #include <sys/kernel.h> 41099a0e58SBosko Milekic #include <sys/protosw.h> 42099a0e58SBosko Milekic #include <sys/smp.h> 43099a0e58SBosko Milekic #include <sys/sysctl.h> 44099a0e58SBosko Milekic 45aed55708SRobert Watson #include <security/mac/mac_framework.h> 46aed55708SRobert Watson 47099a0e58SBosko Milekic #include <vm/vm.h> 48099a0e58SBosko Milekic #include <vm/vm_page.h> 49099a0e58SBosko Milekic #include <vm/uma.h> 50121f0509SMike Silbersack #include <vm/uma_int.h> 51121f0509SMike Silbersack #include <vm/uma_dbg.h> 52099a0e58SBosko Milekic 53099a0e58SBosko Milekic /* 54099a0e58SBosko Milekic * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 55099a0e58SBosko Milekic * Zones. 56099a0e58SBosko Milekic * 57099a0e58SBosko Milekic * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 58099a0e58SBosko Milekic * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 59099a0e58SBosko Milekic * administrator so desires. 60099a0e58SBosko Milekic * 61099a0e58SBosko Milekic * Mbufs are allocated from a UMA Master Zone called the Mbuf 62099a0e58SBosko Milekic * Zone. 63099a0e58SBosko Milekic * 64099a0e58SBosko Milekic * Additionally, FreeBSD provides a Packet Zone, which it 65099a0e58SBosko Milekic * configures as a Secondary Zone to the Mbuf Master Zone, 66099a0e58SBosko Milekic * thus sharing backend Slab kegs with the Mbuf Master Zone. 67099a0e58SBosko Milekic * 68099a0e58SBosko Milekic * Thus common-case allocations and locking are simplified: 69099a0e58SBosko Milekic * 70099a0e58SBosko Milekic * m_clget() m_getcl() 71099a0e58SBosko Milekic * | | 72099a0e58SBosko Milekic * | .------------>[(Packet Cache)] m_get(), m_gethdr() 73099a0e58SBosko Milekic * | | [ Packet ] | 74099a0e58SBosko Milekic * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 75099a0e58SBosko Milekic * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 76099a0e58SBosko Milekic * | \________ | 77099a0e58SBosko Milekic * [ Cluster Keg ] \ / 78099a0e58SBosko Milekic * | [ Mbuf Keg ] 79099a0e58SBosko Milekic * [ Cluster Slabs ] | 80099a0e58SBosko Milekic * | [ Mbuf Slabs ] 81099a0e58SBosko Milekic * \____________(VM)_________________/ 8256a4e45aSAndre Oppermann * 8356a4e45aSAndre Oppermann * 84fcf90618SGleb Smirnoff * Whenever an object is allocated with uma_zalloc() out of 8556a4e45aSAndre Oppermann * one of the Zones its _ctor_ function is executed. The same 86fcf90618SGleb Smirnoff * for any deallocation through uma_zfree() the _dtor_ function 8756a4e45aSAndre Oppermann * is executed. 8856a4e45aSAndre Oppermann * 8956a4e45aSAndre Oppermann * Caches are per-CPU and are filled from the Master Zone. 9056a4e45aSAndre Oppermann * 91fcf90618SGleb Smirnoff * Whenever an object is allocated from the underlying global 9256a4e45aSAndre Oppermann * memory pool it gets pre-initialized with the _zinit_ functions. 9356a4e45aSAndre Oppermann * When the Keg's are overfull objects get decomissioned with 9456a4e45aSAndre Oppermann * _zfini_ functions and free'd back to the global memory pool. 9556a4e45aSAndre Oppermann * 96099a0e58SBosko Milekic */ 97099a0e58SBosko Milekic 9856a4e45aSAndre Oppermann int nmbclusters; /* limits number of mbuf clusters */ 99ec63cb90SAndre Oppermann int nmbjumbop; /* limits number of page size jumbo clusters */ 10056a4e45aSAndre Oppermann int nmbjumbo9; /* limits number of 9k jumbo clusters */ 10156a4e45aSAndre Oppermann int nmbjumbo16; /* limits number of 16k jumbo clusters */ 102099a0e58SBosko Milekic struct mbstat mbstat; 103099a0e58SBosko Milekic 104099a0e58SBosko Milekic static void 105099a0e58SBosko Milekic tunable_mbinit(void *dummy) 106099a0e58SBosko Milekic { 107099a0e58SBosko Milekic 108099a0e58SBosko Milekic /* This has to be done before VM init. */ 109099a0e58SBosko Milekic nmbclusters = 1024 + maxusers * 64; 110cf70a46bSRandall Stewart nmbjumbop = nmbclusters / 2; 111cf70a46bSRandall Stewart nmbjumbo9 = nmbjumbop / 2; 112cf70a46bSRandall Stewart nmbjumbo16 = nmbjumbo9 / 2; 113099a0e58SBosko Milekic TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 114099a0e58SBosko Milekic } 115099a0e58SBosko Milekic SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL); 116099a0e58SBosko Milekic 11756a4e45aSAndre Oppermann /* XXX: These should be tuneables. Can't change UMA limits on the fly. */ 1184f590175SPaul Saab static int 1194f590175SPaul Saab sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 1204f590175SPaul Saab { 1214f590175SPaul Saab int error, newnmbclusters; 1224f590175SPaul Saab 1234f590175SPaul Saab newnmbclusters = nmbclusters; 124041b706bSDavid Malone error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 1254f590175SPaul Saab if (error == 0 && req->newptr) { 1264f590175SPaul Saab if (newnmbclusters > nmbclusters) { 1274f590175SPaul Saab nmbclusters = newnmbclusters; 1284f590175SPaul Saab uma_zone_set_max(zone_clust, nmbclusters); 1294f590175SPaul Saab EVENTHANDLER_INVOKE(nmbclusters_change); 1304f590175SPaul Saab } else 1314f590175SPaul Saab error = EINVAL; 1324f590175SPaul Saab } 1334f590175SPaul Saab return (error); 1344f590175SPaul Saab } 1354f590175SPaul Saab SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 1364f590175SPaul Saab &nmbclusters, 0, sysctl_nmbclusters, "IU", 137099a0e58SBosko Milekic "Maximum number of mbuf clusters allowed"); 138cf70a46bSRandall Stewart 139cf70a46bSRandall Stewart static int 140cf70a46bSRandall Stewart sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 141cf70a46bSRandall Stewart { 142cf70a46bSRandall Stewart int error, newnmbjumbop; 143cf70a46bSRandall Stewart 144cf70a46bSRandall Stewart newnmbjumbop = nmbjumbop; 145cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 146cf70a46bSRandall Stewart if (error == 0 && req->newptr) { 147cf70a46bSRandall Stewart if (newnmbjumbop> nmbjumbop) { 148cf70a46bSRandall Stewart nmbjumbop = newnmbjumbop; 149cf70a46bSRandall Stewart uma_zone_set_max(zone_jumbop, nmbjumbop); 150cf70a46bSRandall Stewart } else 151cf70a46bSRandall Stewart error = EINVAL; 152cf70a46bSRandall Stewart } 153cf70a46bSRandall Stewart return (error); 154cf70a46bSRandall Stewart } 155cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 156cf70a46bSRandall Stewart &nmbjumbop, 0, sysctl_nmbjumbop, "IU", 157ec63cb90SAndre Oppermann "Maximum number of mbuf page size jumbo clusters allowed"); 158cf70a46bSRandall Stewart 159cf70a46bSRandall Stewart 160cf70a46bSRandall Stewart static int 161cf70a46bSRandall Stewart sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 162cf70a46bSRandall Stewart { 163cf70a46bSRandall Stewart int error, newnmbjumbo9; 164cf70a46bSRandall Stewart 165cf70a46bSRandall Stewart newnmbjumbo9 = nmbjumbo9; 166cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 167cf70a46bSRandall Stewart if (error == 0 && req->newptr) { 168cf70a46bSRandall Stewart if (newnmbjumbo9> nmbjumbo9) { 169cf70a46bSRandall Stewart nmbjumbo9 = newnmbjumbo9; 170cf70a46bSRandall Stewart uma_zone_set_max(zone_jumbo9, nmbjumbo9); 171cf70a46bSRandall Stewart } else 172cf70a46bSRandall Stewart error = EINVAL; 173cf70a46bSRandall Stewart } 174cf70a46bSRandall Stewart return (error); 175cf70a46bSRandall Stewart } 176cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 177cf70a46bSRandall Stewart &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 17856a4e45aSAndre Oppermann "Maximum number of mbuf 9k jumbo clusters allowed"); 179cf70a46bSRandall Stewart 180cf70a46bSRandall Stewart static int 181cf70a46bSRandall Stewart sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 182cf70a46bSRandall Stewart { 183cf70a46bSRandall Stewart int error, newnmbjumbo16; 184cf70a46bSRandall Stewart 185cf70a46bSRandall Stewart newnmbjumbo16 = nmbjumbo16; 186cf70a46bSRandall Stewart error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 187cf70a46bSRandall Stewart if (error == 0 && req->newptr) { 188cf70a46bSRandall Stewart if (newnmbjumbo16> nmbjumbo16) { 189cf70a46bSRandall Stewart nmbjumbo16 = newnmbjumbo16; 190cf70a46bSRandall Stewart uma_zone_set_max(zone_jumbo16, nmbjumbo16); 191cf70a46bSRandall Stewart } else 192cf70a46bSRandall Stewart error = EINVAL; 193cf70a46bSRandall Stewart } 194cf70a46bSRandall Stewart return (error); 195cf70a46bSRandall Stewart } 196cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 197cf70a46bSRandall Stewart &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 19856a4e45aSAndre Oppermann "Maximum number of mbuf 16k jumbo clusters allowed"); 199cf70a46bSRandall Stewart 200cf70a46bSRandall Stewart 201cf70a46bSRandall Stewart 202099a0e58SBosko Milekic SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, 203099a0e58SBosko Milekic "Mbuf general information and statistics"); 204099a0e58SBosko Milekic 205099a0e58SBosko Milekic /* 206099a0e58SBosko Milekic * Zones from which we allocate. 207099a0e58SBosko Milekic */ 208099a0e58SBosko Milekic uma_zone_t zone_mbuf; 209099a0e58SBosko Milekic uma_zone_t zone_clust; 210099a0e58SBosko Milekic uma_zone_t zone_pack; 211ec63cb90SAndre Oppermann uma_zone_t zone_jumbop; 21256a4e45aSAndre Oppermann uma_zone_t zone_jumbo9; 21356a4e45aSAndre Oppermann uma_zone_t zone_jumbo16; 21456a4e45aSAndre Oppermann uma_zone_t zone_ext_refcnt; 215099a0e58SBosko Milekic 216099a0e58SBosko Milekic /* 217099a0e58SBosko Milekic * Local prototypes. 218099a0e58SBosko Milekic */ 219b23f72e9SBrian Feldman static int mb_ctor_mbuf(void *, int, void *, int); 220b23f72e9SBrian Feldman static int mb_ctor_clust(void *, int, void *, int); 221b23f72e9SBrian Feldman static int mb_ctor_pack(void *, int, void *, int); 222099a0e58SBosko Milekic static void mb_dtor_mbuf(void *, int, void *); 22356a4e45aSAndre Oppermann static void mb_dtor_clust(void *, int, void *); 22456a4e45aSAndre Oppermann static void mb_dtor_pack(void *, int, void *); 22556a4e45aSAndre Oppermann static int mb_zinit_pack(void *, int, int); 22656a4e45aSAndre Oppermann static void mb_zfini_pack(void *, int); 227099a0e58SBosko Milekic 228099a0e58SBosko Milekic static void mb_reclaim(void *); 229099a0e58SBosko Milekic static void mbuf_init(void *); 230ba63339aSAlan Cox static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); 231ba63339aSAlan Cox static void mbuf_jumbo_free(void *, int, u_int8_t); 232ba63339aSAlan Cox 233ba63339aSAlan Cox static MALLOC_DEFINE(M_JUMBOFRAME, "jumboframes", "mbuf jumbo frame buffers"); 234099a0e58SBosko Milekic 235a04946cfSBrian Somers /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ 236a04946cfSBrian Somers CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 237a04946cfSBrian Somers 238099a0e58SBosko Milekic /* 239099a0e58SBosko Milekic * Initialize FreeBSD Network buffer allocation. 240099a0e58SBosko Milekic */ 241099a0e58SBosko Milekic SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL) 242099a0e58SBosko Milekic static void 243099a0e58SBosko Milekic mbuf_init(void *dummy) 244099a0e58SBosko Milekic { 245099a0e58SBosko Milekic 246099a0e58SBosko Milekic /* 247099a0e58SBosko Milekic * Configure UMA zones for Mbufs, Clusters, and Packets. 248099a0e58SBosko Milekic */ 24956a4e45aSAndre Oppermann zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 25056a4e45aSAndre Oppermann mb_ctor_mbuf, mb_dtor_mbuf, 251121f0509SMike Silbersack #ifdef INVARIANTS 25256a4e45aSAndre Oppermann trash_init, trash_fini, 253121f0509SMike Silbersack #else 25456a4e45aSAndre Oppermann NULL, NULL, 255121f0509SMike Silbersack #endif 25656a4e45aSAndre Oppermann MSIZE - 1, UMA_ZONE_MAXBUCKET); 25756a4e45aSAndre Oppermann 25868352adfSRobert Watson zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 25956a4e45aSAndre Oppermann mb_ctor_clust, mb_dtor_clust, 260121f0509SMike Silbersack #ifdef INVARIANTS 26156a4e45aSAndre Oppermann trash_init, trash_fini, 262121f0509SMike Silbersack #else 26356a4e45aSAndre Oppermann NULL, NULL, 264121f0509SMike Silbersack #endif 26556a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 266099a0e58SBosko Milekic if (nmbclusters > 0) 267099a0e58SBosko Milekic uma_zone_set_max(zone_clust, nmbclusters); 268099a0e58SBosko Milekic 26956a4e45aSAndre Oppermann zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 27056a4e45aSAndre Oppermann mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 27156a4e45aSAndre Oppermann 272fcf90618SGleb Smirnoff /* Make jumbo frame zone too. Page size, 9k and 16k. */ 273ec63cb90SAndre Oppermann zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 274d5269a63SAndre Oppermann mb_ctor_clust, mb_dtor_clust, 275d5269a63SAndre Oppermann #ifdef INVARIANTS 276d5269a63SAndre Oppermann trash_init, trash_fini, 277d5269a63SAndre Oppermann #else 278d5269a63SAndre Oppermann NULL, NULL, 279d5269a63SAndre Oppermann #endif 280d5269a63SAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 281ec63cb90SAndre Oppermann if (nmbjumbop > 0) 282ec63cb90SAndre Oppermann uma_zone_set_max(zone_jumbop, nmbjumbop); 283d5269a63SAndre Oppermann 28456a4e45aSAndre Oppermann zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 28556a4e45aSAndre Oppermann mb_ctor_clust, mb_dtor_clust, 28656a4e45aSAndre Oppermann #ifdef INVARIANTS 28756a4e45aSAndre Oppermann trash_init, trash_fini, 28856a4e45aSAndre Oppermann #else 28956a4e45aSAndre Oppermann NULL, NULL, 29056a4e45aSAndre Oppermann #endif 29156a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 29256a4e45aSAndre Oppermann if (nmbjumbo9 > 0) 29356a4e45aSAndre Oppermann uma_zone_set_max(zone_jumbo9, nmbjumbo9); 294ba63339aSAlan Cox uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 295ba63339aSAlan Cox uma_zone_set_freef(zone_jumbo9, mbuf_jumbo_free); 29656a4e45aSAndre Oppermann 29756a4e45aSAndre Oppermann zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 29856a4e45aSAndre Oppermann mb_ctor_clust, mb_dtor_clust, 29956a4e45aSAndre Oppermann #ifdef INVARIANTS 30056a4e45aSAndre Oppermann trash_init, trash_fini, 30156a4e45aSAndre Oppermann #else 30256a4e45aSAndre Oppermann NULL, NULL, 30356a4e45aSAndre Oppermann #endif 30456a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 30556a4e45aSAndre Oppermann if (nmbjumbo16 > 0) 30656a4e45aSAndre Oppermann uma_zone_set_max(zone_jumbo16, nmbjumbo16); 307ba63339aSAlan Cox uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 308ba63339aSAlan Cox uma_zone_set_freef(zone_jumbo16, mbuf_jumbo_free); 30956a4e45aSAndre Oppermann 31056a4e45aSAndre Oppermann zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 31156a4e45aSAndre Oppermann NULL, NULL, 31256a4e45aSAndre Oppermann NULL, NULL, 31356a4e45aSAndre Oppermann UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 31456a4e45aSAndre Oppermann 31556a4e45aSAndre Oppermann /* uma_prealloc() goes here... */ 316099a0e58SBosko Milekic 317099a0e58SBosko Milekic /* 318099a0e58SBosko Milekic * Hook event handler for low-memory situation, used to 319099a0e58SBosko Milekic * drain protocols and push data back to the caches (UMA 320099a0e58SBosko Milekic * later pushes it back to VM). 321099a0e58SBosko Milekic */ 322099a0e58SBosko Milekic EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 323099a0e58SBosko Milekic EVENTHANDLER_PRI_FIRST); 324099a0e58SBosko Milekic 325099a0e58SBosko Milekic /* 326099a0e58SBosko Milekic * [Re]set counters and local statistics knobs. 327099a0e58SBosko Milekic * XXX Some of these should go and be replaced, but UMA stat 328099a0e58SBosko Milekic * gathering needs to be revised. 329099a0e58SBosko Milekic */ 330099a0e58SBosko Milekic mbstat.m_mbufs = 0; 331099a0e58SBosko Milekic mbstat.m_mclusts = 0; 332099a0e58SBosko Milekic mbstat.m_drain = 0; 333099a0e58SBosko Milekic mbstat.m_msize = MSIZE; 334099a0e58SBosko Milekic mbstat.m_mclbytes = MCLBYTES; 335099a0e58SBosko Milekic mbstat.m_minclsize = MINCLSIZE; 336099a0e58SBosko Milekic mbstat.m_mlen = MLEN; 337099a0e58SBosko Milekic mbstat.m_mhlen = MHLEN; 338099a0e58SBosko Milekic mbstat.m_numtypes = MT_NTYPES; 339099a0e58SBosko Milekic 340099a0e58SBosko Milekic mbstat.m_mcfail = mbstat.m_mpfail = 0; 341099a0e58SBosko Milekic mbstat.sf_iocnt = 0; 342099a0e58SBosko Milekic mbstat.sf_allocwait = mbstat.sf_allocfail = 0; 343099a0e58SBosko Milekic } 344099a0e58SBosko Milekic 345099a0e58SBosko Milekic /* 346ba63339aSAlan Cox * UMA backend page allocator for the jumbo frame zones. 347ba63339aSAlan Cox * 348ba63339aSAlan Cox * Allocates kernel virtual memory that is backed by contiguous physical 349ba63339aSAlan Cox * pages. 350ba63339aSAlan Cox */ 351ba63339aSAlan Cox static void * 352ba63339aSAlan Cox mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 353ba63339aSAlan Cox { 354ba63339aSAlan Cox 355ba63339aSAlan Cox *flags = UMA_SLAB_PRIV; 356ba63339aSAlan Cox return (contigmalloc(bytes, M_JUMBOFRAME, wait, (vm_paddr_t)0, 357ba63339aSAlan Cox ~(vm_paddr_t)0, 1, 0)); 358ba63339aSAlan Cox } 359ba63339aSAlan Cox 360ba63339aSAlan Cox /* 361ba63339aSAlan Cox * UMA backend page deallocator for the jumbo frame zones. 362ba63339aSAlan Cox */ 363ba63339aSAlan Cox static void 364ba63339aSAlan Cox mbuf_jumbo_free(void *mem, int size, u_int8_t flags) 365ba63339aSAlan Cox { 366ba63339aSAlan Cox 367ba63339aSAlan Cox contigfree(mem, size, M_JUMBOFRAME); 368ba63339aSAlan Cox } 369ba63339aSAlan Cox 370ba63339aSAlan Cox /* 371099a0e58SBosko Milekic * Constructor for Mbuf master zone. 372099a0e58SBosko Milekic * 373099a0e58SBosko Milekic * The 'arg' pointer points to a mb_args structure which 374099a0e58SBosko Milekic * contains call-specific information required to support the 37556a4e45aSAndre Oppermann * mbuf allocation API. See mbuf.h. 376099a0e58SBosko Milekic */ 377b23f72e9SBrian Feldman static int 378b23f72e9SBrian Feldman mb_ctor_mbuf(void *mem, int size, void *arg, int how) 379099a0e58SBosko Milekic { 380099a0e58SBosko Milekic struct mbuf *m; 381099a0e58SBosko Milekic struct mb_args *args; 382b23f72e9SBrian Feldman #ifdef MAC 383b23f72e9SBrian Feldman int error; 384b23f72e9SBrian Feldman #endif 385099a0e58SBosko Milekic int flags; 386099a0e58SBosko Milekic short type; 387099a0e58SBosko Milekic 388121f0509SMike Silbersack #ifdef INVARIANTS 389121f0509SMike Silbersack trash_ctor(mem, size, arg, how); 390121f0509SMike Silbersack #endif 391099a0e58SBosko Milekic m = (struct mbuf *)mem; 392099a0e58SBosko Milekic args = (struct mb_args *)arg; 393099a0e58SBosko Milekic flags = args->flags; 394099a0e58SBosko Milekic type = args->type; 395099a0e58SBosko Milekic 39656a4e45aSAndre Oppermann /* 39756a4e45aSAndre Oppermann * The mbuf is initialized later. The caller has the 398fcf90618SGleb Smirnoff * responsibility to set up any MAC labels too. 39956a4e45aSAndre Oppermann */ 40056a4e45aSAndre Oppermann if (type == MT_NOINIT) 40156a4e45aSAndre Oppermann return (0); 40256a4e45aSAndre Oppermann 403099a0e58SBosko Milekic m->m_next = NULL; 404099a0e58SBosko Milekic m->m_nextpkt = NULL; 40556a4e45aSAndre Oppermann m->m_len = 0; 4066bc72ab9SBosko Milekic m->m_flags = flags; 40756a4e45aSAndre Oppermann m->m_type = type; 408099a0e58SBosko Milekic if (flags & M_PKTHDR) { 409099a0e58SBosko Milekic m->m_data = m->m_pktdat; 410099a0e58SBosko Milekic m->m_pkthdr.rcvif = NULL; 41156a4e45aSAndre Oppermann m->m_pkthdr.len = 0; 41256a4e45aSAndre Oppermann m->m_pkthdr.header = NULL; 413099a0e58SBosko Milekic m->m_pkthdr.csum_flags = 0; 41456a4e45aSAndre Oppermann m->m_pkthdr.csum_data = 0; 415a855e2b4SAndre Oppermann m->m_pkthdr.tso_segsz = 0; 416a855e2b4SAndre Oppermann m->m_pkthdr.ether_vtag = 0; 417099a0e58SBosko Milekic SLIST_INIT(&m->m_pkthdr.tags); 418099a0e58SBosko Milekic #ifdef MAC 419099a0e58SBosko Milekic /* If the label init fails, fail the alloc */ 42030d239bcSRobert Watson error = mac_mbuf_init(m, how); 421b23f72e9SBrian Feldman if (error) 422b23f72e9SBrian Feldman return (error); 423099a0e58SBosko Milekic #endif 4246bc72ab9SBosko Milekic } else 425099a0e58SBosko Milekic m->m_data = m->m_dat; 426b23f72e9SBrian Feldman return (0); 427099a0e58SBosko Milekic } 428099a0e58SBosko Milekic 429099a0e58SBosko Milekic /* 43056a4e45aSAndre Oppermann * The Mbuf master zone destructor. 431099a0e58SBosko Milekic */ 432099a0e58SBosko Milekic static void 433099a0e58SBosko Milekic mb_dtor_mbuf(void *mem, int size, void *arg) 434099a0e58SBosko Milekic { 435099a0e58SBosko Milekic struct mbuf *m; 436629b9e08SKip Macy unsigned long flags; 437099a0e58SBosko Milekic 438099a0e58SBosko Milekic m = (struct mbuf *)mem; 439629b9e08SKip Macy flags = (unsigned long)arg; 440629b9e08SKip Macy 441629b9e08SKip Macy if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0) 442099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 44356a4e45aSAndre Oppermann KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); 444457869b9SKip Macy KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 445121f0509SMike Silbersack #ifdef INVARIANTS 446121f0509SMike Silbersack trash_dtor(mem, size, arg); 447121f0509SMike Silbersack #endif 448099a0e58SBosko Milekic } 449099a0e58SBosko Milekic 45056a4e45aSAndre Oppermann /* 45156a4e45aSAndre Oppermann * The Mbuf Packet zone destructor. 45256a4e45aSAndre Oppermann */ 453099a0e58SBosko Milekic static void 454099a0e58SBosko Milekic mb_dtor_pack(void *mem, int size, void *arg) 455099a0e58SBosko Milekic { 456099a0e58SBosko Milekic struct mbuf *m; 457099a0e58SBosko Milekic 458099a0e58SBosko Milekic m = (struct mbuf *)mem; 459099a0e58SBosko Milekic if ((m->m_flags & M_PKTHDR) != 0) 460099a0e58SBosko Milekic m_tag_delete_chain(m, NULL); 46156a4e45aSAndre Oppermann 46256a4e45aSAndre Oppermann /* Make sure we've got a clean cluster back. */ 46356a4e45aSAndre Oppermann KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 46456a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 46556a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 466cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 467cf827063SPoul-Henning Kamp KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 46856a4e45aSAndre Oppermann KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 46949d46b61SGleb Smirnoff KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 47056a4e45aSAndre Oppermann KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); 471121f0509SMike Silbersack #ifdef INVARIANTS 472121f0509SMike Silbersack trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 473121f0509SMike Silbersack #endif 4746c125b8dSMohan Srinivasan /* 475ef44c8d2SDavid E. O'Brien * If there are processes blocked on zone_clust, waiting for pages 476ef44c8d2SDavid E. O'Brien * to be freed up, * cause them to be woken up by draining the 477ef44c8d2SDavid E. O'Brien * packet zone. We are exposed to a race here * (in the check for 478ef44c8d2SDavid E. O'Brien * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 479ef44c8d2SDavid E. O'Brien * is deliberate. We don't want to acquire the zone lock for every 480ef44c8d2SDavid E. O'Brien * mbuf free. 4816c125b8dSMohan Srinivasan */ 4826c125b8dSMohan Srinivasan if (uma_zone_exhausted_nolock(zone_clust)) 4836c125b8dSMohan Srinivasan zone_drain(zone_pack); 484099a0e58SBosko Milekic } 485099a0e58SBosko Milekic 486099a0e58SBosko Milekic /* 487ec63cb90SAndre Oppermann * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 488099a0e58SBosko Milekic * 489099a0e58SBosko Milekic * Here the 'arg' pointer points to the Mbuf which we 49056a4e45aSAndre Oppermann * are configuring cluster storage for. If 'arg' is 49156a4e45aSAndre Oppermann * empty we allocate just the cluster without setting 49256a4e45aSAndre Oppermann * the mbuf to it. See mbuf.h. 493099a0e58SBosko Milekic */ 494b23f72e9SBrian Feldman static int 495b23f72e9SBrian Feldman mb_ctor_clust(void *mem, int size, void *arg, int how) 496099a0e58SBosko Milekic { 497099a0e58SBosko Milekic struct mbuf *m; 49856a4e45aSAndre Oppermann u_int *refcnt; 4990f4d9d04SKip Macy int type; 5000f4d9d04SKip Macy uma_zone_t zone; 501099a0e58SBosko Milekic 502121f0509SMike Silbersack #ifdef INVARIANTS 503121f0509SMike Silbersack trash_ctor(mem, size, arg, how); 504121f0509SMike Silbersack #endif 50556a4e45aSAndre Oppermann switch (size) { 50656a4e45aSAndre Oppermann case MCLBYTES: 50756a4e45aSAndre Oppermann type = EXT_CLUSTER; 5080f4d9d04SKip Macy zone = zone_clust; 50956a4e45aSAndre Oppermann break; 510ec63cb90SAndre Oppermann #if MJUMPAGESIZE != MCLBYTES 511ec63cb90SAndre Oppermann case MJUMPAGESIZE: 512ec63cb90SAndre Oppermann type = EXT_JUMBOP; 5130f4d9d04SKip Macy zone = zone_jumbop; 514d5269a63SAndre Oppermann break; 51536ae3fd3SAndre Oppermann #endif 51656a4e45aSAndre Oppermann case MJUM9BYTES: 51756a4e45aSAndre Oppermann type = EXT_JUMBO9; 5180f4d9d04SKip Macy zone = zone_jumbo9; 51956a4e45aSAndre Oppermann break; 52056a4e45aSAndre Oppermann case MJUM16BYTES: 52156a4e45aSAndre Oppermann type = EXT_JUMBO16; 5220f4d9d04SKip Macy zone = zone_jumbo16; 52356a4e45aSAndre Oppermann break; 52456a4e45aSAndre Oppermann default: 52556a4e45aSAndre Oppermann panic("unknown cluster size"); 52656a4e45aSAndre Oppermann break; 52756a4e45aSAndre Oppermann } 5280f4d9d04SKip Macy 5290f4d9d04SKip Macy m = (struct mbuf *)arg; 5300f4d9d04SKip Macy refcnt = uma_find_refcnt(zone, mem); 5310f4d9d04SKip Macy *refcnt = 1; 5320f4d9d04SKip Macy if (m != NULL) { 533099a0e58SBosko Milekic m->m_ext.ext_buf = (caddr_t)mem; 534099a0e58SBosko Milekic m->m_data = m->m_ext.ext_buf; 535099a0e58SBosko Milekic m->m_flags |= M_EXT; 536099a0e58SBosko Milekic m->m_ext.ext_free = NULL; 537cf827063SPoul-Henning Kamp m->m_ext.ext_arg1 = NULL; 538cf827063SPoul-Henning Kamp m->m_ext.ext_arg2 = NULL; 53956a4e45aSAndre Oppermann m->m_ext.ext_size = size; 54056a4e45aSAndre Oppermann m->m_ext.ext_type = type; 5410f4d9d04SKip Macy m->m_ext.ref_cnt = refcnt; 54256a4e45aSAndre Oppermann } 5430f4d9d04SKip Macy 544b23f72e9SBrian Feldman return (0); 545099a0e58SBosko Milekic } 546099a0e58SBosko Milekic 54756a4e45aSAndre Oppermann /* 54856a4e45aSAndre Oppermann * The Mbuf Cluster zone destructor. 54956a4e45aSAndre Oppermann */ 550099a0e58SBosko Milekic static void 551099a0e58SBosko Milekic mb_dtor_clust(void *mem, int size, void *arg) 552099a0e58SBosko Milekic { 553121f0509SMike Silbersack #ifdef INVARIANTS 5540f4d9d04SKip Macy uma_zone_t zone; 5550f4d9d04SKip Macy 5560f4d9d04SKip Macy zone = m_getzone(size); 5570f4d9d04SKip Macy KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, 5580f4d9d04SKip Macy ("%s: refcnt incorrect %u", __func__, 5590f4d9d04SKip Macy *(uma_find_refcnt(zone, mem))) ); 5600f4d9d04SKip Macy 561121f0509SMike Silbersack trash_dtor(mem, size, arg); 562121f0509SMike Silbersack #endif 563099a0e58SBosko Milekic } 564099a0e58SBosko Milekic 565099a0e58SBosko Milekic /* 566099a0e58SBosko Milekic * The Packet secondary zone's init routine, executed on the 56756a4e45aSAndre Oppermann * object's transition from mbuf keg slab to zone cache. 568099a0e58SBosko Milekic */ 569b23f72e9SBrian Feldman static int 57056a4e45aSAndre Oppermann mb_zinit_pack(void *mem, int size, int how) 571099a0e58SBosko Milekic { 572099a0e58SBosko Milekic struct mbuf *m; 573099a0e58SBosko Milekic 57456a4e45aSAndre Oppermann m = (struct mbuf *)mem; /* m is virgin. */ 575a7bd90efSAndre Oppermann if (uma_zalloc_arg(zone_clust, m, how) == NULL || 576a7bd90efSAndre Oppermann m->m_ext.ext_buf == NULL) 577b23f72e9SBrian Feldman return (ENOMEM); 578cd5bb63bSAndre Oppermann m->m_ext.ext_type = EXT_PACKET; /* Override. */ 579121f0509SMike Silbersack #ifdef INVARIANTS 580121f0509SMike Silbersack trash_init(m->m_ext.ext_buf, MCLBYTES, how); 581121f0509SMike Silbersack #endif 582b23f72e9SBrian Feldman return (0); 583099a0e58SBosko Milekic } 584099a0e58SBosko Milekic 585099a0e58SBosko Milekic /* 586099a0e58SBosko Milekic * The Packet secondary zone's fini routine, executed on the 587099a0e58SBosko Milekic * object's transition from zone cache to keg slab. 588099a0e58SBosko Milekic */ 589099a0e58SBosko Milekic static void 59056a4e45aSAndre Oppermann mb_zfini_pack(void *mem, int size) 591099a0e58SBosko Milekic { 592099a0e58SBosko Milekic struct mbuf *m; 593099a0e58SBosko Milekic 594099a0e58SBosko Milekic m = (struct mbuf *)mem; 595121f0509SMike Silbersack #ifdef INVARIANTS 596121f0509SMike Silbersack trash_fini(m->m_ext.ext_buf, MCLBYTES); 597121f0509SMike Silbersack #endif 598099a0e58SBosko Milekic uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 599a7b844d2SMike Silbersack #ifdef INVARIANTS 600a7b844d2SMike Silbersack trash_dtor(mem, size, NULL); 601a7b844d2SMike Silbersack #endif 602099a0e58SBosko Milekic } 603099a0e58SBosko Milekic 604099a0e58SBosko Milekic /* 605099a0e58SBosko Milekic * The "packet" keg constructor. 606099a0e58SBosko Milekic */ 607b23f72e9SBrian Feldman static int 608b23f72e9SBrian Feldman mb_ctor_pack(void *mem, int size, void *arg, int how) 609099a0e58SBosko Milekic { 610099a0e58SBosko Milekic struct mbuf *m; 611099a0e58SBosko Milekic struct mb_args *args; 612b23f72e9SBrian Feldman #ifdef MAC 613b23f72e9SBrian Feldman int error; 614b23f72e9SBrian Feldman #endif 615b23f72e9SBrian Feldman int flags; 616099a0e58SBosko Milekic short type; 617099a0e58SBosko Milekic 618099a0e58SBosko Milekic m = (struct mbuf *)mem; 619099a0e58SBosko Milekic args = (struct mb_args *)arg; 620099a0e58SBosko Milekic flags = args->flags; 621099a0e58SBosko Milekic type = args->type; 622099a0e58SBosko Milekic 623121f0509SMike Silbersack #ifdef INVARIANTS 624121f0509SMike Silbersack trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 625121f0509SMike Silbersack #endif 626099a0e58SBosko Milekic m->m_next = NULL; 6276bc72ab9SBosko Milekic m->m_nextpkt = NULL; 628099a0e58SBosko Milekic m->m_data = m->m_ext.ext_buf; 62956a4e45aSAndre Oppermann m->m_len = 0; 63056a4e45aSAndre Oppermann m->m_flags = (flags | M_EXT); 63156a4e45aSAndre Oppermann m->m_type = type; 632099a0e58SBosko Milekic 633099a0e58SBosko Milekic if (flags & M_PKTHDR) { 634099a0e58SBosko Milekic m->m_pkthdr.rcvif = NULL; 63556a4e45aSAndre Oppermann m->m_pkthdr.len = 0; 63656a4e45aSAndre Oppermann m->m_pkthdr.header = NULL; 637099a0e58SBosko Milekic m->m_pkthdr.csum_flags = 0; 63856a4e45aSAndre Oppermann m->m_pkthdr.csum_data = 0; 639a855e2b4SAndre Oppermann m->m_pkthdr.tso_segsz = 0; 640a855e2b4SAndre Oppermann m->m_pkthdr.ether_vtag = 0; 641099a0e58SBosko Milekic SLIST_INIT(&m->m_pkthdr.tags); 642099a0e58SBosko Milekic #ifdef MAC 643099a0e58SBosko Milekic /* If the label init fails, fail the alloc */ 64430d239bcSRobert Watson error = mac_mbuf_init(m, how); 645b23f72e9SBrian Feldman if (error) 646b23f72e9SBrian Feldman return (error); 647099a0e58SBosko Milekic #endif 648099a0e58SBosko Milekic } 64956a4e45aSAndre Oppermann /* m_ext is already initialized. */ 65056a4e45aSAndre Oppermann 651b23f72e9SBrian Feldman return (0); 652099a0e58SBosko Milekic } 653099a0e58SBosko Milekic 654099a0e58SBosko Milekic /* 655099a0e58SBosko Milekic * This is the protocol drain routine. 656099a0e58SBosko Milekic * 657099a0e58SBosko Milekic * No locks should be held when this is called. The drain routines have to 658099a0e58SBosko Milekic * presently acquire some locks which raises the possibility of lock order 659099a0e58SBosko Milekic * reversal. 660099a0e58SBosko Milekic */ 661099a0e58SBosko Milekic static void 662099a0e58SBosko Milekic mb_reclaim(void *junk) 663099a0e58SBosko Milekic { 664099a0e58SBosko Milekic struct domain *dp; 665099a0e58SBosko Milekic struct protosw *pr; 666099a0e58SBosko Milekic 667099a0e58SBosko Milekic WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, 668099a0e58SBosko Milekic "mb_reclaim()"); 669099a0e58SBosko Milekic 670099a0e58SBosko Milekic for (dp = domains; dp != NULL; dp = dp->dom_next) 671099a0e58SBosko Milekic for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 672099a0e58SBosko Milekic if (pr->pr_drain != NULL) 673099a0e58SBosko Milekic (*pr->pr_drain)(); 674099a0e58SBosko Milekic } 675