xref: /freebsd/sys/kern/kern_mbuf.c (revision 56a4e45ab3d2b15f708a4cf21b8cd5f8c772760c)
1099a0e58SBosko Milekic /*-
28076cb52SBosko Milekic  * Copyright (c) 2004, 2005,
38076cb52SBosko Milekic  * 	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
4099a0e58SBosko Milekic  *
5099a0e58SBosko Milekic  * Redistribution and use in source and binary forms, with or without
6099a0e58SBosko Milekic  * modification, are permitted provided that the following conditions
7099a0e58SBosko Milekic  * are met:
8099a0e58SBosko Milekic  * 1. Redistributions of source code must retain the above copyright
9099a0e58SBosko Milekic  *    notice unmodified, this list of conditions and the following
10099a0e58SBosko Milekic  *    disclaimer.
11099a0e58SBosko Milekic  * 2. Redistributions in binary form must reproduce the above copyright
12099a0e58SBosko Milekic  *    notice, this list of conditions and the following disclaimer in the
13099a0e58SBosko Milekic  *    documentation and/or other materials provided with the distribution.
14099a0e58SBosko Milekic  *
15099a0e58SBosko Milekic  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16099a0e58SBosko Milekic  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17099a0e58SBosko Milekic  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18099a0e58SBosko Milekic  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19099a0e58SBosko Milekic  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20099a0e58SBosko Milekic  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21099a0e58SBosko Milekic  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22099a0e58SBosko Milekic  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23099a0e58SBosko Milekic  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24099a0e58SBosko Milekic  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25099a0e58SBosko Milekic  * SUCH DAMAGE.
26099a0e58SBosko Milekic  */
27099a0e58SBosko Milekic 
28099a0e58SBosko Milekic #include <sys/cdefs.h>
29099a0e58SBosko Milekic __FBSDID("$FreeBSD$");
30099a0e58SBosko Milekic 
31099a0e58SBosko Milekic #include "opt_mac.h"
32099a0e58SBosko Milekic #include "opt_param.h"
33099a0e58SBosko Milekic 
34099a0e58SBosko Milekic #include <sys/param.h>
35099a0e58SBosko Milekic #include <sys/mac.h>
36099a0e58SBosko Milekic #include <sys/malloc.h>
37099a0e58SBosko Milekic #include <sys/systm.h>
38099a0e58SBosko Milekic #include <sys/mbuf.h>
39099a0e58SBosko Milekic #include <sys/domain.h>
40099a0e58SBosko Milekic #include <sys/eventhandler.h>
41099a0e58SBosko Milekic #include <sys/kernel.h>
42099a0e58SBosko Milekic #include <sys/protosw.h>
43099a0e58SBosko Milekic #include <sys/smp.h>
44099a0e58SBosko Milekic #include <sys/sysctl.h>
45099a0e58SBosko Milekic 
46099a0e58SBosko Milekic #include <vm/vm.h>
47099a0e58SBosko Milekic #include <vm/vm_page.h>
48099a0e58SBosko Milekic #include <vm/uma.h>
49121f0509SMike Silbersack #include <vm/uma_int.h>
50121f0509SMike Silbersack #include <vm/uma_dbg.h>
51099a0e58SBosko Milekic 
52099a0e58SBosko Milekic /*
53099a0e58SBosko Milekic  * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
54099a0e58SBosko Milekic  * Zones.
55099a0e58SBosko Milekic  *
56099a0e58SBosko Milekic  * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
57099a0e58SBosko Milekic  * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
58099a0e58SBosko Milekic  * administrator so desires.
59099a0e58SBosko Milekic  *
60099a0e58SBosko Milekic  * Mbufs are allocated from a UMA Master Zone called the Mbuf
61099a0e58SBosko Milekic  * Zone.
62099a0e58SBosko Milekic  *
63099a0e58SBosko Milekic  * Additionally, FreeBSD provides a Packet Zone, which it
64099a0e58SBosko Milekic  * configures as a Secondary Zone to the Mbuf Master Zone,
65099a0e58SBosko Milekic  * thus sharing backend Slab kegs with the Mbuf Master Zone.
66099a0e58SBosko Milekic  *
67099a0e58SBosko Milekic  * Thus common-case allocations and locking are simplified:
68099a0e58SBosko Milekic  *
69099a0e58SBosko Milekic  *  m_clget()                m_getcl()
70099a0e58SBosko Milekic  *    |                         |
71099a0e58SBosko Milekic  *    |   .------------>[(Packet Cache)]    m_get(), m_gethdr()
72099a0e58SBosko Milekic  *    |   |             [     Packet   ]            |
73099a0e58SBosko Milekic  *  [(Cluster Cache)]   [    Secondary ]   [ (Mbuf Cache)     ]
74099a0e58SBosko Milekic  *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Master Zone ]
75099a0e58SBosko Milekic  *        |                       \________         |
76099a0e58SBosko Milekic  *  [ Cluster Keg   ]                      \       /
77099a0e58SBosko Milekic  *        |    	                         [ Mbuf Keg   ]
78099a0e58SBosko Milekic  *  [ Cluster Slabs ]                         |
79099a0e58SBosko Milekic  *        |                              [ Mbuf Slabs ]
80099a0e58SBosko Milekic  *         \____________(VM)_________________/
8156a4e45aSAndre Oppermann  *
8256a4e45aSAndre Oppermann  *
8356a4e45aSAndre Oppermann  * Whenever a object is allocated with uma_zalloc() out of the
8456a4e45aSAndre Oppermann  * one of the Zones its _ctor_ function is executed.  The same
8556a4e45aSAndre Oppermann  * for any deallocation through uma_zfree() the _dror_ function
8656a4e45aSAndre Oppermann  * is executed.
8756a4e45aSAndre Oppermann  *
8856a4e45aSAndre Oppermann  * Caches are per-CPU and are filled from the Master Zone.
8956a4e45aSAndre Oppermann  *
9056a4e45aSAndre Oppermann  * Whenever a object is allocated from the underlying global
9156a4e45aSAndre Oppermann  * memory pool it gets pre-initialized with the _zinit_ functions.
9256a4e45aSAndre Oppermann  * When the Keg's are overfull objects get decomissioned with
9356a4e45aSAndre Oppermann  * _zfini_ functions and free'd back to the global memory pool.
9456a4e45aSAndre Oppermann  *
95099a0e58SBosko Milekic  */
96099a0e58SBosko Milekic 
9756a4e45aSAndre Oppermann int nmbclusters;		/* limits number of mbuf clusters */
9856a4e45aSAndre Oppermann int nmbjumbo9;			/* limits number of 9k jumbo clusters */
9956a4e45aSAndre Oppermann int nmbjumbo16;			/* limits number of 16k jumbo clusters */
100099a0e58SBosko Milekic struct mbstat mbstat;
101099a0e58SBosko Milekic 
102099a0e58SBosko Milekic static void
103099a0e58SBosko Milekic tunable_mbinit(void *dummy)
104099a0e58SBosko Milekic {
105099a0e58SBosko Milekic 
106099a0e58SBosko Milekic 	/* This has to be done before VM init. */
107099a0e58SBosko Milekic 	nmbclusters = 1024 + maxusers * 64;
108099a0e58SBosko Milekic 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
109099a0e58SBosko Milekic }
110099a0e58SBosko Milekic SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
111099a0e58SBosko Milekic 
112099a0e58SBosko Milekic SYSCTL_DECL(_kern_ipc);
11356a4e45aSAndre Oppermann /* XXX: These should be tuneables. Can't change UMA limits on the fly. */
114099a0e58SBosko Milekic SYSCTL_INT(_kern_ipc, OID_AUTO, nmbclusters, CTLFLAG_RW, &nmbclusters, 0,
115099a0e58SBosko Milekic     "Maximum number of mbuf clusters allowed");
11656a4e45aSAndre Oppermann SYSCTL_INT(_kern_ipc, OID_AUTO, nmbjumbo9, CTLFLAG_RW, &nmbjumbo9, 0,
11756a4e45aSAndre Oppermann     "Maximum number of mbuf 9k jumbo clusters allowed");
11856a4e45aSAndre Oppermann SYSCTL_INT(_kern_ipc, OID_AUTO, nmbjumbo16, CTLFLAG_RW, &nmbjumbo16, 0,
11956a4e45aSAndre Oppermann     "Maximum number of mbuf 16k jumbo clusters allowed");
120099a0e58SBosko Milekic SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
121099a0e58SBosko Milekic     "Mbuf general information and statistics");
122099a0e58SBosko Milekic 
123099a0e58SBosko Milekic /*
124099a0e58SBosko Milekic  * Zones from which we allocate.
125099a0e58SBosko Milekic  */
126099a0e58SBosko Milekic uma_zone_t	zone_mbuf;
127099a0e58SBosko Milekic uma_zone_t	zone_clust;
128099a0e58SBosko Milekic uma_zone_t	zone_pack;
12956a4e45aSAndre Oppermann uma_zone_t	zone_jumbo9;
13056a4e45aSAndre Oppermann uma_zone_t	zone_jumbo16;
13156a4e45aSAndre Oppermann uma_zone_t	zone_ext_refcnt;
132099a0e58SBosko Milekic 
133099a0e58SBosko Milekic /*
134099a0e58SBosko Milekic  * Local prototypes.
135099a0e58SBosko Milekic  */
136b23f72e9SBrian Feldman static int	mb_ctor_mbuf(void *, int, void *, int);
137b23f72e9SBrian Feldman static int	mb_ctor_clust(void *, int, void *, int);
138b23f72e9SBrian Feldman static int	mb_ctor_pack(void *, int, void *, int);
139099a0e58SBosko Milekic static void	mb_dtor_mbuf(void *, int, void *);
14056a4e45aSAndre Oppermann static void	mb_dtor_clust(void *, int, void *);
14156a4e45aSAndre Oppermann static void	mb_dtor_pack(void *, int, void *);
14256a4e45aSAndre Oppermann static int	mb_zinit_pack(void *, int, int);
14356a4e45aSAndre Oppermann static void	mb_zfini_pack(void *, int);
144099a0e58SBosko Milekic 
145099a0e58SBosko Milekic static void	mb_reclaim(void *);
146099a0e58SBosko Milekic static void	mbuf_init(void *);
147099a0e58SBosko Milekic 
148a04946cfSBrian Somers /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
149a04946cfSBrian Somers CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
150a04946cfSBrian Somers 
151099a0e58SBosko Milekic /*
152099a0e58SBosko Milekic  * Initialize FreeBSD Network buffer allocation.
153099a0e58SBosko Milekic  */
154099a0e58SBosko Milekic SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL)
155099a0e58SBosko Milekic static void
156099a0e58SBosko Milekic mbuf_init(void *dummy)
157099a0e58SBosko Milekic {
158099a0e58SBosko Milekic 
159099a0e58SBosko Milekic 	/*
160099a0e58SBosko Milekic 	 * Configure UMA zones for Mbufs, Clusters, and Packets.
161099a0e58SBosko Milekic 	 */
16256a4e45aSAndre Oppermann 	zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
16356a4e45aSAndre Oppermann 	    mb_ctor_mbuf, mb_dtor_mbuf,
164121f0509SMike Silbersack #ifdef INVARIANTS
16556a4e45aSAndre Oppermann 	    trash_init, trash_fini,
166121f0509SMike Silbersack #else
16756a4e45aSAndre Oppermann 	    NULL, NULL,
168121f0509SMike Silbersack #endif
16956a4e45aSAndre Oppermann 	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
17056a4e45aSAndre Oppermann 
17168352adfSRobert Watson 	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
17256a4e45aSAndre Oppermann 	    mb_ctor_clust, mb_dtor_clust,
173121f0509SMike Silbersack #ifdef INVARIANTS
17456a4e45aSAndre Oppermann 	    trash_init, trash_fini,
175121f0509SMike Silbersack #else
17656a4e45aSAndre Oppermann 	    NULL, NULL,
177121f0509SMike Silbersack #endif
17856a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
179099a0e58SBosko Milekic 	if (nmbclusters > 0)
180099a0e58SBosko Milekic 		uma_zone_set_max(zone_clust, nmbclusters);
181099a0e58SBosko Milekic 
18256a4e45aSAndre Oppermann 	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
18356a4e45aSAndre Oppermann 	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
18456a4e45aSAndre Oppermann 
18556a4e45aSAndre Oppermann 	/* Make jumbo frame zone too. 9k and 16k. */
18656a4e45aSAndre Oppermann 	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
18756a4e45aSAndre Oppermann 	    mb_ctor_clust, mb_dtor_clust,
18856a4e45aSAndre Oppermann #ifdef INVARIANTS
18956a4e45aSAndre Oppermann 	    trash_init, trash_fini,
19056a4e45aSAndre Oppermann #else
19156a4e45aSAndre Oppermann 	    NULL, NULL,
19256a4e45aSAndre Oppermann #endif
19356a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
19456a4e45aSAndre Oppermann 	if (nmbjumbo9 > 0)
19556a4e45aSAndre Oppermann 		uma_zone_set_max(zone_jumbo9, nmbjumbo9);
19656a4e45aSAndre Oppermann 
19756a4e45aSAndre Oppermann 	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
19856a4e45aSAndre Oppermann 	    mb_ctor_clust, mb_dtor_clust,
19956a4e45aSAndre Oppermann #ifdef INVARIANTS
20056a4e45aSAndre Oppermann 	    trash_init, trash_fini,
20156a4e45aSAndre Oppermann #else
20256a4e45aSAndre Oppermann 	    NULL, NULL,
20356a4e45aSAndre Oppermann #endif
20456a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
20556a4e45aSAndre Oppermann 	if (nmbjumbo16 > 0)
20656a4e45aSAndre Oppermann 		uma_zone_set_max(zone_jumbo16, nmbjumbo16);
20756a4e45aSAndre Oppermann 
20856a4e45aSAndre Oppermann 	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
20956a4e45aSAndre Oppermann 	    NULL, NULL,
21056a4e45aSAndre Oppermann 	    NULL, NULL,
21156a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
21256a4e45aSAndre Oppermann 
21356a4e45aSAndre Oppermann 	/* uma_prealloc() goes here... */
214099a0e58SBosko Milekic 
215099a0e58SBosko Milekic 	/*
216099a0e58SBosko Milekic 	 * Hook event handler for low-memory situation, used to
217099a0e58SBosko Milekic 	 * drain protocols and push data back to the caches (UMA
218099a0e58SBosko Milekic 	 * later pushes it back to VM).
219099a0e58SBosko Milekic 	 */
220099a0e58SBosko Milekic 	EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
221099a0e58SBosko Milekic 	    EVENTHANDLER_PRI_FIRST);
222099a0e58SBosko Milekic 
223099a0e58SBosko Milekic 	/*
224099a0e58SBosko Milekic 	 * [Re]set counters and local statistics knobs.
225099a0e58SBosko Milekic 	 * XXX Some of these should go and be replaced, but UMA stat
226099a0e58SBosko Milekic 	 * gathering needs to be revised.
227099a0e58SBosko Milekic 	 */
228099a0e58SBosko Milekic 	mbstat.m_mbufs = 0;
229099a0e58SBosko Milekic 	mbstat.m_mclusts = 0;
230099a0e58SBosko Milekic 	mbstat.m_drain = 0;
231099a0e58SBosko Milekic 	mbstat.m_msize = MSIZE;
232099a0e58SBosko Milekic 	mbstat.m_mclbytes = MCLBYTES;
233099a0e58SBosko Milekic 	mbstat.m_minclsize = MINCLSIZE;
234099a0e58SBosko Milekic 	mbstat.m_mlen = MLEN;
235099a0e58SBosko Milekic 	mbstat.m_mhlen = MHLEN;
236099a0e58SBosko Milekic 	mbstat.m_numtypes = MT_NTYPES;
237099a0e58SBosko Milekic 
238099a0e58SBosko Milekic 	mbstat.m_mcfail = mbstat.m_mpfail = 0;
239099a0e58SBosko Milekic 	mbstat.sf_iocnt = 0;
240099a0e58SBosko Milekic 	mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
241099a0e58SBosko Milekic }
242099a0e58SBosko Milekic 
243099a0e58SBosko Milekic /*
244099a0e58SBosko Milekic  * Constructor for Mbuf master zone.
245099a0e58SBosko Milekic  *
246099a0e58SBosko Milekic  * The 'arg' pointer points to a mb_args structure which
247099a0e58SBosko Milekic  * contains call-specific information required to support the
24856a4e45aSAndre Oppermann  * mbuf allocation API.  See mbuf.h.
249099a0e58SBosko Milekic  */
250b23f72e9SBrian Feldman static int
251b23f72e9SBrian Feldman mb_ctor_mbuf(void *mem, int size, void *arg, int how)
252099a0e58SBosko Milekic {
253099a0e58SBosko Milekic 	struct mbuf *m;
254099a0e58SBosko Milekic 	struct mb_args *args;
255b23f72e9SBrian Feldman #ifdef MAC
256b23f72e9SBrian Feldman 	int error;
257b23f72e9SBrian Feldman #endif
258099a0e58SBosko Milekic 	int flags;
259099a0e58SBosko Milekic 	short type;
260099a0e58SBosko Milekic 
261121f0509SMike Silbersack #ifdef INVARIANTS
262121f0509SMike Silbersack 	trash_ctor(mem, size, arg, how);
263121f0509SMike Silbersack #endif
264099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
265099a0e58SBosko Milekic 	args = (struct mb_args *)arg;
266099a0e58SBosko Milekic 	flags = args->flags;
267099a0e58SBosko Milekic 	type = args->type;
268099a0e58SBosko Milekic 
26956a4e45aSAndre Oppermann 	/*
27056a4e45aSAndre Oppermann 	 * The mbuf is initialized later.  The caller has the
27156a4e45aSAndre Oppermann 	 * responseability to setup any MAC labels too.
27256a4e45aSAndre Oppermann 	 */
27356a4e45aSAndre Oppermann 	if (type == MT_NOINIT)
27456a4e45aSAndre Oppermann 		return (0);
27556a4e45aSAndre Oppermann 
276099a0e58SBosko Milekic 	m->m_next = NULL;
277099a0e58SBosko Milekic 	m->m_nextpkt = NULL;
27856a4e45aSAndre Oppermann 	m->m_len = 0;
2796bc72ab9SBosko Milekic 	m->m_flags = flags;
28056a4e45aSAndre Oppermann 	m->m_type = type;
281099a0e58SBosko Milekic 	if (flags & M_PKTHDR) {
282099a0e58SBosko Milekic 		m->m_data = m->m_pktdat;
283099a0e58SBosko Milekic 		m->m_pkthdr.rcvif = NULL;
28456a4e45aSAndre Oppermann 		m->m_pkthdr.len = 0;
28556a4e45aSAndre Oppermann 		m->m_pkthdr.header = NULL;
286099a0e58SBosko Milekic 		m->m_pkthdr.csum_flags = 0;
28756a4e45aSAndre Oppermann 		m->m_pkthdr.csum_data = 0;
288099a0e58SBosko Milekic 		SLIST_INIT(&m->m_pkthdr.tags);
289099a0e58SBosko Milekic #ifdef MAC
290099a0e58SBosko Milekic 		/* If the label init fails, fail the alloc */
291b23f72e9SBrian Feldman 		error = mac_init_mbuf(m, how);
292b23f72e9SBrian Feldman 		if (error)
293b23f72e9SBrian Feldman 			return (error);
294099a0e58SBosko Milekic #endif
2956bc72ab9SBosko Milekic 	} else
296099a0e58SBosko Milekic 		m->m_data = m->m_dat;
297b23f72e9SBrian Feldman 	return (0);
298099a0e58SBosko Milekic }
299099a0e58SBosko Milekic 
300099a0e58SBosko Milekic /*
30156a4e45aSAndre Oppermann  * The Mbuf master zone destructor.
302099a0e58SBosko Milekic  */
303099a0e58SBosko Milekic static void
304099a0e58SBosko Milekic mb_dtor_mbuf(void *mem, int size, void *arg)
305099a0e58SBosko Milekic {
306099a0e58SBosko Milekic 	struct mbuf *m;
307099a0e58SBosko Milekic 
308099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
309099a0e58SBosko Milekic 	if ((m->m_flags & M_PKTHDR) != 0)
310099a0e58SBosko Milekic 		m_tag_delete_chain(m, NULL);
31156a4e45aSAndre Oppermann 	KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
312121f0509SMike Silbersack #ifdef INVARIANTS
313121f0509SMike Silbersack 	trash_dtor(mem, size, arg);
314121f0509SMike Silbersack #endif
315099a0e58SBosko Milekic }
316099a0e58SBosko Milekic 
31756a4e45aSAndre Oppermann /*
31856a4e45aSAndre Oppermann  * The Mbuf Packet zone destructor.
31956a4e45aSAndre Oppermann  */
320099a0e58SBosko Milekic static void
321099a0e58SBosko Milekic mb_dtor_pack(void *mem, int size, void *arg)
322099a0e58SBosko Milekic {
323099a0e58SBosko Milekic 	struct mbuf *m;
324099a0e58SBosko Milekic 
325099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
326099a0e58SBosko Milekic 	if ((m->m_flags & M_PKTHDR) != 0)
327099a0e58SBosko Milekic 		m_tag_delete_chain(m, NULL);
32856a4e45aSAndre Oppermann 
32956a4e45aSAndre Oppermann 	/* Make sure we've got a clean cluster back. */
33056a4e45aSAndre Oppermann 	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
33156a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
33256a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
33356a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__));
33456a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
33556a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_type == EXT_CLUSTER, ("%s: ext_type != EXT_CLUSTER", __func__));
33656a4e45aSAndre Oppermann 	KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
337121f0509SMike Silbersack #ifdef INVARIANTS
338121f0509SMike Silbersack 	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
339121f0509SMike Silbersack #endif
340099a0e58SBosko Milekic }
341099a0e58SBosko Milekic 
342099a0e58SBosko Milekic /*
34356a4e45aSAndre Oppermann  * The Cluster and Jumbo[9|16] zone constructor.
344099a0e58SBosko Milekic  *
345099a0e58SBosko Milekic  * Here the 'arg' pointer points to the Mbuf which we
34656a4e45aSAndre Oppermann  * are configuring cluster storage for.  If 'arg' is
34756a4e45aSAndre Oppermann  * empty we allocate just the cluster without setting
34856a4e45aSAndre Oppermann  * the mbuf to it.  See mbuf.h.
349099a0e58SBosko Milekic  */
350b23f72e9SBrian Feldman static int
351b23f72e9SBrian Feldman mb_ctor_clust(void *mem, int size, void *arg, int how)
352099a0e58SBosko Milekic {
353099a0e58SBosko Milekic 	struct mbuf *m;
35456a4e45aSAndre Oppermann 	u_int *refcnt;
35556a4e45aSAndre Oppermann 	int type = 0;
356099a0e58SBosko Milekic 
357121f0509SMike Silbersack #ifdef INVARIANTS
358121f0509SMike Silbersack 	trash_ctor(mem, size, arg, how);
359121f0509SMike Silbersack #endif
360099a0e58SBosko Milekic 	m = (struct mbuf *)arg;
36156a4e45aSAndre Oppermann 	if (m != NULL) {
36256a4e45aSAndre Oppermann 		switch (size) {
36356a4e45aSAndre Oppermann 		case MCLBYTES:
36456a4e45aSAndre Oppermann 			type = EXT_CLUSTER;
36556a4e45aSAndre Oppermann 			break;
36656a4e45aSAndre Oppermann 		case MJUM9BYTES:
36756a4e45aSAndre Oppermann 			type = EXT_JUMBO9;
36856a4e45aSAndre Oppermann 			break;
36956a4e45aSAndre Oppermann 		case MJUM16BYTES:
37056a4e45aSAndre Oppermann 			type = EXT_JUMBO16;
37156a4e45aSAndre Oppermann 			break;
37256a4e45aSAndre Oppermann 		default:
37356a4e45aSAndre Oppermann 			panic("unknown cluster size");
37456a4e45aSAndre Oppermann 			break;
37556a4e45aSAndre Oppermann 		}
376099a0e58SBosko Milekic 		m->m_ext.ext_buf = (caddr_t)mem;
377099a0e58SBosko Milekic 		m->m_data = m->m_ext.ext_buf;
378099a0e58SBosko Milekic 		m->m_flags |= M_EXT;
379099a0e58SBosko Milekic 		m->m_ext.ext_free = NULL;
380099a0e58SBosko Milekic 		m->m_ext.ext_args = NULL;
38156a4e45aSAndre Oppermann 		m->m_ext.ext_size = size;
38256a4e45aSAndre Oppermann 		m->m_ext.ext_type = type;
38356a4e45aSAndre Oppermann 		m->m_ext.ref_cnt = uma_find_refcnt(zone_clust, mem);
38456a4e45aSAndre Oppermann 		*m->m_ext.ref_cnt = 1;
38556a4e45aSAndre Oppermann 	} else {
38656a4e45aSAndre Oppermann 		refcnt =  uma_find_refcnt(zone_clust, mem);
38756a4e45aSAndre Oppermann 		*refcnt = 1;
38856a4e45aSAndre Oppermann 	}
389b23f72e9SBrian Feldman 	return (0);
390099a0e58SBosko Milekic }
391099a0e58SBosko Milekic 
39256a4e45aSAndre Oppermann /*
39356a4e45aSAndre Oppermann  * The Mbuf Cluster zone destructor.
39456a4e45aSAndre Oppermann  */
395099a0e58SBosko Milekic static void
396099a0e58SBosko Milekic mb_dtor_clust(void *mem, int size, void *arg)
397099a0e58SBosko Milekic {
39856a4e45aSAndre Oppermann 	u_int *refcnt;
39956a4e45aSAndre Oppermann 
40056a4e45aSAndre Oppermann 	refcnt = uma_find_refcnt(zone_clust, mem);
40156a4e45aSAndre Oppermann 	KASSERT(*refcnt == 1, ("%s: refcnt incorrect %u", __func__, *refcnt));
40256a4e45aSAndre Oppermann 	*refcnt = 0;
403121f0509SMike Silbersack #ifdef INVARIANTS
404121f0509SMike Silbersack 	trash_dtor(mem, size, arg);
405121f0509SMike Silbersack #endif
406099a0e58SBosko Milekic }
407099a0e58SBosko Milekic 
408099a0e58SBosko Milekic /*
409099a0e58SBosko Milekic  * The Packet secondary zone's init routine, executed on the
41056a4e45aSAndre Oppermann  * object's transition from mbuf keg slab to zone cache.
411099a0e58SBosko Milekic  */
412b23f72e9SBrian Feldman static int
41356a4e45aSAndre Oppermann mb_zinit_pack(void *mem, int size, int how)
414099a0e58SBosko Milekic {
415099a0e58SBosko Milekic 	struct mbuf *m;
416099a0e58SBosko Milekic 
41756a4e45aSAndre Oppermann 	m = (struct mbuf *)mem;		/* m is virgin. */
418b23f72e9SBrian Feldman 	uma_zalloc_arg(zone_clust, m, how);
419b23f72e9SBrian Feldman 	if (m->m_ext.ext_buf == NULL)
420b23f72e9SBrian Feldman 		return (ENOMEM);
421121f0509SMike Silbersack #ifdef INVARIANTS
422121f0509SMike Silbersack 	trash_init(m->m_ext.ext_buf, MCLBYTES, how);
423121f0509SMike Silbersack #endif
424b23f72e9SBrian Feldman 	return (0);
425099a0e58SBosko Milekic }
426099a0e58SBosko Milekic 
427099a0e58SBosko Milekic /*
428099a0e58SBosko Milekic  * The Packet secondary zone's fini routine, executed on the
429099a0e58SBosko Milekic  * object's transition from zone cache to keg slab.
430099a0e58SBosko Milekic  */
431099a0e58SBosko Milekic static void
43256a4e45aSAndre Oppermann mb_zfini_pack(void *mem, int size)
433099a0e58SBosko Milekic {
434099a0e58SBosko Milekic 	struct mbuf *m;
435099a0e58SBosko Milekic 
436099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
437121f0509SMike Silbersack #ifdef INVARIANTS
438121f0509SMike Silbersack 	trash_fini(m->m_ext.ext_buf, MCLBYTES);
439121f0509SMike Silbersack #endif
440099a0e58SBosko Milekic 	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
441a7b844d2SMike Silbersack #ifdef INVARIANTS
442a7b844d2SMike Silbersack 	trash_dtor(mem, size, NULL);
443a7b844d2SMike Silbersack #endif
444099a0e58SBosko Milekic }
445099a0e58SBosko Milekic 
446099a0e58SBosko Milekic /*
447099a0e58SBosko Milekic  * The "packet" keg constructor.
448099a0e58SBosko Milekic  */
449b23f72e9SBrian Feldman static int
450b23f72e9SBrian Feldman mb_ctor_pack(void *mem, int size, void *arg, int how)
451099a0e58SBosko Milekic {
452099a0e58SBosko Milekic 	struct mbuf *m;
453099a0e58SBosko Milekic 	struct mb_args *args;
454b23f72e9SBrian Feldman #ifdef MAC
455b23f72e9SBrian Feldman 	int error;
456b23f72e9SBrian Feldman #endif
457b23f72e9SBrian Feldman 	int flags;
458099a0e58SBosko Milekic 	short type;
459099a0e58SBosko Milekic 
460099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
461099a0e58SBosko Milekic 	args = (struct mb_args *)arg;
462099a0e58SBosko Milekic 	flags = args->flags;
463099a0e58SBosko Milekic 	type = args->type;
464099a0e58SBosko Milekic 
465121f0509SMike Silbersack #ifdef INVARIANTS
466121f0509SMike Silbersack 	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
467121f0509SMike Silbersack #endif
468099a0e58SBosko Milekic 	m->m_next = NULL;
4696bc72ab9SBosko Milekic 	m->m_nextpkt = NULL;
470099a0e58SBosko Milekic 	m->m_data = m->m_ext.ext_buf;
47156a4e45aSAndre Oppermann 	m->m_len = 0;
47256a4e45aSAndre Oppermann 	m->m_flags = (flags | M_EXT);
47356a4e45aSAndre Oppermann 	m->m_type = type;
474099a0e58SBosko Milekic 
475099a0e58SBosko Milekic 	if (flags & M_PKTHDR) {
476099a0e58SBosko Milekic 		m->m_pkthdr.rcvif = NULL;
47756a4e45aSAndre Oppermann 		m->m_pkthdr.len = 0;
47856a4e45aSAndre Oppermann 		m->m_pkthdr.header = NULL;
479099a0e58SBosko Milekic 		m->m_pkthdr.csum_flags = 0;
48056a4e45aSAndre Oppermann 		m->m_pkthdr.csum_data = 0;
481099a0e58SBosko Milekic 		SLIST_INIT(&m->m_pkthdr.tags);
482099a0e58SBosko Milekic #ifdef MAC
483099a0e58SBosko Milekic 		/* If the label init fails, fail the alloc */
484b23f72e9SBrian Feldman 		error = mac_init_mbuf(m, how);
485b23f72e9SBrian Feldman 		if (error)
486b23f72e9SBrian Feldman 			return (error);
487099a0e58SBosko Milekic #endif
488099a0e58SBosko Milekic 	}
48956a4e45aSAndre Oppermann 	/* m_ext is already initialized. */
49056a4e45aSAndre Oppermann 
491b23f72e9SBrian Feldman 	return (0);
492099a0e58SBosko Milekic }
493099a0e58SBosko Milekic 
494099a0e58SBosko Milekic /*
495099a0e58SBosko Milekic  * This is the protocol drain routine.
496099a0e58SBosko Milekic  *
497099a0e58SBosko Milekic  * No locks should be held when this is called.  The drain routines have to
498099a0e58SBosko Milekic  * presently acquire some locks which raises the possibility of lock order
499099a0e58SBosko Milekic  * reversal.
500099a0e58SBosko Milekic  */
501099a0e58SBosko Milekic static void
502099a0e58SBosko Milekic mb_reclaim(void *junk)
503099a0e58SBosko Milekic {
504099a0e58SBosko Milekic 	struct domain *dp;
505099a0e58SBosko Milekic 	struct protosw *pr;
506099a0e58SBosko Milekic 
507099a0e58SBosko Milekic 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
508099a0e58SBosko Milekic 	    "mb_reclaim()");
509099a0e58SBosko Milekic 
510099a0e58SBosko Milekic 	for (dp = domains; dp != NULL; dp = dp->dom_next)
511099a0e58SBosko Milekic 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
512099a0e58SBosko Milekic 			if (pr->pr_drain != NULL)
513099a0e58SBosko Milekic 				(*pr->pr_drain)();
514099a0e58SBosko Milekic }
515