xref: /freebsd/sys/kern/kern_mbuf.c (revision 54503a13d8711f183ffedbf6a994ae79108b791b)
1099a0e58SBosko Milekic /*-
28076cb52SBosko Milekic  * Copyright (c) 2004, 2005,
38076cb52SBosko Milekic  *	Bosko Milekic <bmilekic@FreeBSD.org>.  All rights reserved.
4099a0e58SBosko Milekic  *
5099a0e58SBosko Milekic  * Redistribution and use in source and binary forms, with or without
6099a0e58SBosko Milekic  * modification, are permitted provided that the following conditions
7099a0e58SBosko Milekic  * are met:
8099a0e58SBosko Milekic  * 1. Redistributions of source code must retain the above copyright
9099a0e58SBosko Milekic  *    notice unmodified, this list of conditions and the following
10099a0e58SBosko Milekic  *    disclaimer.
11099a0e58SBosko Milekic  * 2. Redistributions in binary form must reproduce the above copyright
12099a0e58SBosko Milekic  *    notice, this list of conditions and the following disclaimer in the
13099a0e58SBosko Milekic  *    documentation and/or other materials provided with the distribution.
14099a0e58SBosko Milekic  *
15099a0e58SBosko Milekic  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16099a0e58SBosko Milekic  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17099a0e58SBosko Milekic  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18099a0e58SBosko Milekic  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19099a0e58SBosko Milekic  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20099a0e58SBosko Milekic  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21099a0e58SBosko Milekic  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22099a0e58SBosko Milekic  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23099a0e58SBosko Milekic  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24099a0e58SBosko Milekic  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25099a0e58SBosko Milekic  * SUCH DAMAGE.
26099a0e58SBosko Milekic  */
27099a0e58SBosko Milekic 
28099a0e58SBosko Milekic #include <sys/cdefs.h>
29099a0e58SBosko Milekic __FBSDID("$FreeBSD$");
30099a0e58SBosko Milekic 
31099a0e58SBosko Milekic #include "opt_param.h"
32099a0e58SBosko Milekic 
33099a0e58SBosko Milekic #include <sys/param.h>
34099a0e58SBosko Milekic #include <sys/malloc.h>
35*54503a13SJonathan T. Looney #include <sys/types.h>
36099a0e58SBosko Milekic #include <sys/systm.h>
37099a0e58SBosko Milekic #include <sys/mbuf.h>
38099a0e58SBosko Milekic #include <sys/domain.h>
39099a0e58SBosko Milekic #include <sys/eventhandler.h>
40099a0e58SBosko Milekic #include <sys/kernel.h>
41*54503a13SJonathan T. Looney #include <sys/lock.h>
42*54503a13SJonathan T. Looney #include <sys/mutex.h>
43099a0e58SBosko Milekic #include <sys/protosw.h>
44099a0e58SBosko Milekic #include <sys/smp.h>
45099a0e58SBosko Milekic #include <sys/sysctl.h>
46099a0e58SBosko Milekic 
47aed55708SRobert Watson #include <security/mac/mac_framework.h>
48aed55708SRobert Watson 
49099a0e58SBosko Milekic #include <vm/vm.h>
50c45c0034SAlan Cox #include <vm/vm_extern.h>
51c45c0034SAlan Cox #include <vm/vm_kern.h>
52099a0e58SBosko Milekic #include <vm/vm_page.h>
5337140716SAndre Oppermann #include <vm/vm_map.h>
54099a0e58SBosko Milekic #include <vm/uma.h>
55121f0509SMike Silbersack #include <vm/uma_int.h>
56121f0509SMike Silbersack #include <vm/uma_dbg.h>
57099a0e58SBosko Milekic 
58099a0e58SBosko Milekic /*
59099a0e58SBosko Milekic  * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
60099a0e58SBosko Milekic  * Zones.
61099a0e58SBosko Milekic  *
62099a0e58SBosko Milekic  * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
63099a0e58SBosko Milekic  * Zone.  The Zone can be capped at kern.ipc.nmbclusters, if the
64099a0e58SBosko Milekic  * administrator so desires.
65099a0e58SBosko Milekic  *
66099a0e58SBosko Milekic  * Mbufs are allocated from a UMA Master Zone called the Mbuf
67099a0e58SBosko Milekic  * Zone.
68099a0e58SBosko Milekic  *
69099a0e58SBosko Milekic  * Additionally, FreeBSD provides a Packet Zone, which it
70099a0e58SBosko Milekic  * configures as a Secondary Zone to the Mbuf Master Zone,
71099a0e58SBosko Milekic  * thus sharing backend Slab kegs with the Mbuf Master Zone.
72099a0e58SBosko Milekic  *
73099a0e58SBosko Milekic  * Thus common-case allocations and locking are simplified:
74099a0e58SBosko Milekic  *
75099a0e58SBosko Milekic  *  m_clget()                m_getcl()
76099a0e58SBosko Milekic  *    |                         |
77099a0e58SBosko Milekic  *    |   .------------>[(Packet Cache)]    m_get(), m_gethdr()
78099a0e58SBosko Milekic  *    |   |             [     Packet   ]            |
79099a0e58SBosko Milekic  *  [(Cluster Cache)]   [    Secondary ]   [ (Mbuf Cache)     ]
80099a0e58SBosko Milekic  *  [ Cluster Zone  ]   [     Zone     ]   [ Mbuf Master Zone ]
81099a0e58SBosko Milekic  *        |                       \________         |
82099a0e58SBosko Milekic  *  [ Cluster Keg   ]                      \       /
83099a0e58SBosko Milekic  *        |	                         [ Mbuf Keg   ]
84099a0e58SBosko Milekic  *  [ Cluster Slabs ]                         |
85099a0e58SBosko Milekic  *        |                              [ Mbuf Slabs ]
86099a0e58SBosko Milekic  *         \____________(VM)_________________/
8756a4e45aSAndre Oppermann  *
8856a4e45aSAndre Oppermann  *
89fcf90618SGleb Smirnoff  * Whenever an object is allocated with uma_zalloc() out of
9056a4e45aSAndre Oppermann  * one of the Zones its _ctor_ function is executed.  The same
91fcf90618SGleb Smirnoff  * for any deallocation through uma_zfree() the _dtor_ function
9256a4e45aSAndre Oppermann  * is executed.
9356a4e45aSAndre Oppermann  *
9456a4e45aSAndre Oppermann  * Caches are per-CPU and are filled from the Master Zone.
9556a4e45aSAndre Oppermann  *
96fcf90618SGleb Smirnoff  * Whenever an object is allocated from the underlying global
9756a4e45aSAndre Oppermann  * memory pool it gets pre-initialized with the _zinit_ functions.
9856a4e45aSAndre Oppermann  * When the Keg's are overfull objects get decomissioned with
9956a4e45aSAndre Oppermann  * _zfini_ functions and free'd back to the global memory pool.
10056a4e45aSAndre Oppermann  *
101099a0e58SBosko Milekic  */
102099a0e58SBosko Milekic 
103ead46972SAndre Oppermann int nmbufs;			/* limits number of mbufs */
10456a4e45aSAndre Oppermann int nmbclusters;		/* limits number of mbuf clusters */
105ec63cb90SAndre Oppermann int nmbjumbop;			/* limits number of page size jumbo clusters */
10656a4e45aSAndre Oppermann int nmbjumbo9;			/* limits number of 9k jumbo clusters */
10756a4e45aSAndre Oppermann int nmbjumbo16;			/* limits number of 16k jumbo clusters */
108099a0e58SBosko Milekic 
109e0c00addSAndre Oppermann static quad_t maxmbufmem;	/* overall real memory limit for all mbufs */
110e0c00addSAndre Oppermann 
111af3b2549SHans Petter Selasky SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0,
112b6f49c23SHiren Panchasara     "Maximum real memory allocatable to various mbuf types");
113e0c00addSAndre Oppermann 
11462938659SBjoern A. Zeeb /*
11537140716SAndre Oppermann  * tunable_mbinit() has to be run before any mbuf allocations are done.
11662938659SBjoern A. Zeeb  */
117099a0e58SBosko Milekic static void
118099a0e58SBosko Milekic tunable_mbinit(void *dummy)
119099a0e58SBosko Milekic {
120e0c00addSAndre Oppermann 	quad_t realmem;
12137140716SAndre Oppermann 
12237140716SAndre Oppermann 	/*
12337140716SAndre Oppermann 	 * The default limit for all mbuf related memory is 1/2 of all
12437140716SAndre Oppermann 	 * available kernel memory (physical or kmem).
12537140716SAndre Oppermann 	 * At most it can be 3/4 of available kernel memory.
12637140716SAndre Oppermann 	 */
1275df87b21SJeff Roberson 	realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size);
12837140716SAndre Oppermann 	maxmbufmem = realmem / 2;
129e0c00addSAndre Oppermann 	TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
13037140716SAndre Oppermann 	if (maxmbufmem > realmem / 4 * 3)
13137140716SAndre Oppermann 		maxmbufmem = realmem / 4 * 3;
132099a0e58SBosko Milekic 
133812302c3SNavdeep Parhar 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
134416a434cSAndre Oppermann 	if (nmbclusters == 0)
135416a434cSAndre Oppermann 		nmbclusters = maxmbufmem / MCLBYTES / 4;
136812302c3SNavdeep Parhar 
137812302c3SNavdeep Parhar 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop);
138812302c3SNavdeep Parhar 	if (nmbjumbop == 0)
139416a434cSAndre Oppermann 		nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4;
140812302c3SNavdeep Parhar 
141812302c3SNavdeep Parhar 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9);
142812302c3SNavdeep Parhar 	if (nmbjumbo9 == 0)
143416a434cSAndre Oppermann 		nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6;
144812302c3SNavdeep Parhar 
145812302c3SNavdeep Parhar 	TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16);
146812302c3SNavdeep Parhar 	if (nmbjumbo16 == 0)
147416a434cSAndre Oppermann 		nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6;
148416a434cSAndre Oppermann 
149416a434cSAndre Oppermann 	/*
150416a434cSAndre Oppermann 	 * We need at least as many mbufs as we have clusters of
151416a434cSAndre Oppermann 	 * the various types added together.
152416a434cSAndre Oppermann 	 */
153416a434cSAndre Oppermann 	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
154416a434cSAndre Oppermann 	if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
155416a434cSAndre Oppermann 		nmbufs = lmax(maxmbufmem / MSIZE / 5,
156416a434cSAndre Oppermann 		    nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
157099a0e58SBosko Milekic }
15837140716SAndre Oppermann SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
159099a0e58SBosko Milekic 
1604f590175SPaul Saab static int
1614f590175SPaul Saab sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
1624f590175SPaul Saab {
1634f590175SPaul Saab 	int error, newnmbclusters;
1644f590175SPaul Saab 
1654f590175SPaul Saab 	newnmbclusters = nmbclusters;
166041b706bSDavid Malone 	error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
167d251e700SJohn Baldwin 	if (error == 0 && req->newptr && newnmbclusters != nmbclusters) {
168ead46972SAndre Oppermann 		if (newnmbclusters > nmbclusters &&
169ead46972SAndre Oppermann 		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
1704f590175SPaul Saab 			nmbclusters = newnmbclusters;
171bc4a1b8cSAndre Oppermann 			nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
1724f590175SPaul Saab 			EVENTHANDLER_INVOKE(nmbclusters_change);
1734f590175SPaul Saab 		} else
1744f590175SPaul Saab 			error = EINVAL;
1754f590175SPaul Saab 	}
1764f590175SPaul Saab 	return (error);
1774f590175SPaul Saab }
1784f590175SPaul Saab SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW,
1794f590175SPaul Saab &nmbclusters, 0, sysctl_nmbclusters, "IU",
180099a0e58SBosko Milekic     "Maximum number of mbuf clusters allowed");
181cf70a46bSRandall Stewart 
182cf70a46bSRandall Stewart static int
183cf70a46bSRandall Stewart sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
184cf70a46bSRandall Stewart {
185cf70a46bSRandall Stewart 	int error, newnmbjumbop;
186cf70a46bSRandall Stewart 
187cf70a46bSRandall Stewart 	newnmbjumbop = nmbjumbop;
188cf70a46bSRandall Stewart 	error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
189d251e700SJohn Baldwin 	if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) {
190ead46972SAndre Oppermann 		if (newnmbjumbop > nmbjumbop &&
191ead46972SAndre Oppermann 		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
192cf70a46bSRandall Stewart 			nmbjumbop = newnmbjumbop;
193bc4a1b8cSAndre Oppermann 			nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
194cf70a46bSRandall Stewart 		} else
195cf70a46bSRandall Stewart 			error = EINVAL;
196cf70a46bSRandall Stewart 	}
197cf70a46bSRandall Stewart 	return (error);
198cf70a46bSRandall Stewart }
199cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
200cf70a46bSRandall Stewart &nmbjumbop, 0, sysctl_nmbjumbop, "IU",
201ec63cb90SAndre Oppermann     "Maximum number of mbuf page size jumbo clusters allowed");
202cf70a46bSRandall Stewart 
203cf70a46bSRandall Stewart static int
204cf70a46bSRandall Stewart sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
205cf70a46bSRandall Stewart {
206cf70a46bSRandall Stewart 	int error, newnmbjumbo9;
207cf70a46bSRandall Stewart 
208cf70a46bSRandall Stewart 	newnmbjumbo9 = nmbjumbo9;
209cf70a46bSRandall Stewart 	error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
210d251e700SJohn Baldwin 	if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) {
211ead46972SAndre Oppermann 		if (newnmbjumbo9 > nmbjumbo9 &&
212ead46972SAndre Oppermann 		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
213cf70a46bSRandall Stewart 			nmbjumbo9 = newnmbjumbo9;
214bc4a1b8cSAndre Oppermann 			nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
215cf70a46bSRandall Stewart 		} else
216cf70a46bSRandall Stewart 			error = EINVAL;
217cf70a46bSRandall Stewart 	}
218cf70a46bSRandall Stewart 	return (error);
219cf70a46bSRandall Stewart }
220cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
221cf70a46bSRandall Stewart &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
22256a4e45aSAndre Oppermann     "Maximum number of mbuf 9k jumbo clusters allowed");
223cf70a46bSRandall Stewart 
224cf70a46bSRandall Stewart static int
225cf70a46bSRandall Stewart sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
226cf70a46bSRandall Stewart {
227cf70a46bSRandall Stewart 	int error, newnmbjumbo16;
228cf70a46bSRandall Stewart 
229cf70a46bSRandall Stewart 	newnmbjumbo16 = nmbjumbo16;
230cf70a46bSRandall Stewart 	error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
231d251e700SJohn Baldwin 	if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) {
232ead46972SAndre Oppermann 		if (newnmbjumbo16 > nmbjumbo16 &&
233ead46972SAndre Oppermann 		    nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) {
234cf70a46bSRandall Stewart 			nmbjumbo16 = newnmbjumbo16;
235bc4a1b8cSAndre Oppermann 			nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
236cf70a46bSRandall Stewart 		} else
237cf70a46bSRandall Stewart 			error = EINVAL;
238cf70a46bSRandall Stewart 	}
239cf70a46bSRandall Stewart 	return (error);
240cf70a46bSRandall Stewart }
241cf70a46bSRandall Stewart SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW,
242cf70a46bSRandall Stewart &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
24356a4e45aSAndre Oppermann     "Maximum number of mbuf 16k jumbo clusters allowed");
244cf70a46bSRandall Stewart 
245ead46972SAndre Oppermann static int
246ead46972SAndre Oppermann sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
247ead46972SAndre Oppermann {
248ead46972SAndre Oppermann 	int error, newnmbufs;
249ead46972SAndre Oppermann 
250ead46972SAndre Oppermann 	newnmbufs = nmbufs;
251ead46972SAndre Oppermann 	error = sysctl_handle_int(oidp, &newnmbufs, 0, req);
252d251e700SJohn Baldwin 	if (error == 0 && req->newptr && newnmbufs != nmbufs) {
253ead46972SAndre Oppermann 		if (newnmbufs > nmbufs) {
254ead46972SAndre Oppermann 			nmbufs = newnmbufs;
255bc4a1b8cSAndre Oppermann 			nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
256ead46972SAndre Oppermann 			EVENTHANDLER_INVOKE(nmbufs_change);
257ead46972SAndre Oppermann 		} else
258ead46972SAndre Oppermann 			error = EINVAL;
259ead46972SAndre Oppermann 	}
260ead46972SAndre Oppermann 	return (error);
261ead46972SAndre Oppermann }
262e0c00addSAndre Oppermann SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
263ead46972SAndre Oppermann &nmbufs, 0, sysctl_nmbufs, "IU",
264ead46972SAndre Oppermann     "Maximum number of mbufs allowed");
265cf70a46bSRandall Stewart 
266099a0e58SBosko Milekic /*
267099a0e58SBosko Milekic  * Zones from which we allocate.
268099a0e58SBosko Milekic  */
269099a0e58SBosko Milekic uma_zone_t	zone_mbuf;
270099a0e58SBosko Milekic uma_zone_t	zone_clust;
271099a0e58SBosko Milekic uma_zone_t	zone_pack;
272ec63cb90SAndre Oppermann uma_zone_t	zone_jumbop;
27356a4e45aSAndre Oppermann uma_zone_t	zone_jumbo9;
27456a4e45aSAndre Oppermann uma_zone_t	zone_jumbo16;
27556a4e45aSAndre Oppermann uma_zone_t	zone_ext_refcnt;
276099a0e58SBosko Milekic 
277099a0e58SBosko Milekic /*
278*54503a13SJonathan T. Looney  * Callout to assist us in freeing mbufs.
279*54503a13SJonathan T. Looney  */
280*54503a13SJonathan T. Looney static struct callout	mb_reclaim_callout;
281*54503a13SJonathan T. Looney static struct mtx	mb_reclaim_callout_mtx;
282*54503a13SJonathan T. Looney 
283*54503a13SJonathan T. Looney /*
284099a0e58SBosko Milekic  * Local prototypes.
285099a0e58SBosko Milekic  */
286b23f72e9SBrian Feldman static int	mb_ctor_mbuf(void *, int, void *, int);
287b23f72e9SBrian Feldman static int	mb_ctor_clust(void *, int, void *, int);
288b23f72e9SBrian Feldman static int	mb_ctor_pack(void *, int, void *, int);
289099a0e58SBosko Milekic static void	mb_dtor_mbuf(void *, int, void *);
29056a4e45aSAndre Oppermann static void	mb_dtor_clust(void *, int, void *);
29156a4e45aSAndre Oppermann static void	mb_dtor_pack(void *, int, void *);
29256a4e45aSAndre Oppermann static int	mb_zinit_pack(void *, int, int);
29356a4e45aSAndre Oppermann static void	mb_zfini_pack(void *, int);
294099a0e58SBosko Milekic 
295099a0e58SBosko Milekic static void	mb_reclaim(void *);
296f2c2231eSRyan Stone static void    *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
297*54503a13SJonathan T. Looney static void	mb_maxaction(uma_zone_t);
298099a0e58SBosko Milekic 
29937140716SAndre Oppermann /* Ensure that MSIZE is a power of 2. */
300a04946cfSBrian Somers CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
301a04946cfSBrian Somers 
302099a0e58SBosko Milekic /*
303099a0e58SBosko Milekic  * Initialize FreeBSD Network buffer allocation.
304099a0e58SBosko Milekic  */
305099a0e58SBosko Milekic static void
306099a0e58SBosko Milekic mbuf_init(void *dummy)
307099a0e58SBosko Milekic {
308099a0e58SBosko Milekic 
309099a0e58SBosko Milekic 	/*
310099a0e58SBosko Milekic 	 * Configure UMA zones for Mbufs, Clusters, and Packets.
311099a0e58SBosko Milekic 	 */
31256a4e45aSAndre Oppermann 	zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
31356a4e45aSAndre Oppermann 	    mb_ctor_mbuf, mb_dtor_mbuf,
314121f0509SMike Silbersack #ifdef INVARIANTS
31556a4e45aSAndre Oppermann 	    trash_init, trash_fini,
316121f0509SMike Silbersack #else
31756a4e45aSAndre Oppermann 	    NULL, NULL,
318121f0509SMike Silbersack #endif
31956a4e45aSAndre Oppermann 	    MSIZE - 1, UMA_ZONE_MAXBUCKET);
32045fe0bf7SPawel Jakub Dawidek 	if (nmbufs > 0)
32145fe0bf7SPawel Jakub Dawidek 		nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
3226e0b6746SPawel Jakub Dawidek 	uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
323*54503a13SJonathan T. Looney 	uma_zone_set_maxaction(zone_mbuf, mb_maxaction);
32456a4e45aSAndre Oppermann 
32568352adfSRobert Watson 	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
32656a4e45aSAndre Oppermann 	    mb_ctor_clust, mb_dtor_clust,
327121f0509SMike Silbersack #ifdef INVARIANTS
32856a4e45aSAndre Oppermann 	    trash_init, trash_fini,
329121f0509SMike Silbersack #else
33056a4e45aSAndre Oppermann 	    NULL, NULL,
331121f0509SMike Silbersack #endif
33256a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
33345fe0bf7SPawel Jakub Dawidek 	if (nmbclusters > 0)
33445fe0bf7SPawel Jakub Dawidek 		nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
3356e0b6746SPawel Jakub Dawidek 	uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
336*54503a13SJonathan T. Looney 	uma_zone_set_maxaction(zone_clust, mb_maxaction);
337099a0e58SBosko Milekic 
33856a4e45aSAndre Oppermann 	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
33956a4e45aSAndre Oppermann 	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
34056a4e45aSAndre Oppermann 
341fcf90618SGleb Smirnoff 	/* Make jumbo frame zone too. Page size, 9k and 16k. */
342ec63cb90SAndre Oppermann 	zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
343d5269a63SAndre Oppermann 	    mb_ctor_clust, mb_dtor_clust,
344d5269a63SAndre Oppermann #ifdef INVARIANTS
345d5269a63SAndre Oppermann 	    trash_init, trash_fini,
346d5269a63SAndre Oppermann #else
347d5269a63SAndre Oppermann 	    NULL, NULL,
348d5269a63SAndre Oppermann #endif
349d5269a63SAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
35045fe0bf7SPawel Jakub Dawidek 	if (nmbjumbop > 0)
35145fe0bf7SPawel Jakub Dawidek 		nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
3526e0b6746SPawel Jakub Dawidek 	uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
353*54503a13SJonathan T. Looney 	uma_zone_set_maxaction(zone_jumbop, mb_maxaction);
354d5269a63SAndre Oppermann 
35556a4e45aSAndre Oppermann 	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
35656a4e45aSAndre Oppermann 	    mb_ctor_clust, mb_dtor_clust,
35756a4e45aSAndre Oppermann #ifdef INVARIANTS
35856a4e45aSAndre Oppermann 	    trash_init, trash_fini,
35956a4e45aSAndre Oppermann #else
36056a4e45aSAndre Oppermann 	    NULL, NULL,
36156a4e45aSAndre Oppermann #endif
36256a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
363ba63339aSAlan Cox 	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
36445fe0bf7SPawel Jakub Dawidek 	if (nmbjumbo9 > 0)
36545fe0bf7SPawel Jakub Dawidek 		nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
3666e0b6746SPawel Jakub Dawidek 	uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
367*54503a13SJonathan T. Looney 	uma_zone_set_maxaction(zone_jumbo9, mb_maxaction);
36856a4e45aSAndre Oppermann 
36956a4e45aSAndre Oppermann 	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
37056a4e45aSAndre Oppermann 	    mb_ctor_clust, mb_dtor_clust,
37156a4e45aSAndre Oppermann #ifdef INVARIANTS
37256a4e45aSAndre Oppermann 	    trash_init, trash_fini,
37356a4e45aSAndre Oppermann #else
37456a4e45aSAndre Oppermann 	    NULL, NULL,
37556a4e45aSAndre Oppermann #endif
37656a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
377ba63339aSAlan Cox 	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
37845fe0bf7SPawel Jakub Dawidek 	if (nmbjumbo16 > 0)
37945fe0bf7SPawel Jakub Dawidek 		nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
3806e0b6746SPawel Jakub Dawidek 	uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
381*54503a13SJonathan T. Looney 	uma_zone_set_maxaction(zone_jumbo16, mb_maxaction);
38256a4e45aSAndre Oppermann 
38356a4e45aSAndre Oppermann 	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
38456a4e45aSAndre Oppermann 	    NULL, NULL,
38556a4e45aSAndre Oppermann 	    NULL, NULL,
38656a4e45aSAndre Oppermann 	    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
38756a4e45aSAndre Oppermann 
38856a4e45aSAndre Oppermann 	/* uma_prealloc() goes here... */
389099a0e58SBosko Milekic 
390*54503a13SJonathan T. Looney 	/* Initialize the mb_reclaim() callout. */
391*54503a13SJonathan T. Looney 	mtx_init(&mb_reclaim_callout_mtx, "mb_reclaim_callout_mtx", NULL,
392*54503a13SJonathan T. Looney 	    MTX_DEF);
393*54503a13SJonathan T. Looney 	callout_init(&mb_reclaim_callout, 1);
394*54503a13SJonathan T. Looney 
395099a0e58SBosko Milekic 	/*
396099a0e58SBosko Milekic 	 * Hook event handler for low-memory situation, used to
397099a0e58SBosko Milekic 	 * drain protocols and push data back to the caches (UMA
398099a0e58SBosko Milekic 	 * later pushes it back to VM).
399099a0e58SBosko Milekic 	 */
400099a0e58SBosko Milekic 	EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
401099a0e58SBosko Milekic 	    EVENTHANDLER_PRI_FIRST);
402099a0e58SBosko Milekic }
40337140716SAndre Oppermann SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
404099a0e58SBosko Milekic 
405099a0e58SBosko Milekic /*
406ba63339aSAlan Cox  * UMA backend page allocator for the jumbo frame zones.
407ba63339aSAlan Cox  *
408ba63339aSAlan Cox  * Allocates kernel virtual memory that is backed by contiguous physical
409ba63339aSAlan Cox  * pages.
410ba63339aSAlan Cox  */
411ba63339aSAlan Cox static void *
412f2c2231eSRyan Stone mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
413ba63339aSAlan Cox {
414ba63339aSAlan Cox 
4157630c265SAlan Cox 	/* Inform UMA that this allocator uses kernel_map/object. */
4167630c265SAlan Cox 	*flags = UMA_SLAB_KERNEL;
4175df87b21SJeff Roberson 	return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait,
4183153e878SAlan Cox 	    (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
419ba63339aSAlan Cox }
420ba63339aSAlan Cox 
421ba63339aSAlan Cox /*
422099a0e58SBosko Milekic  * Constructor for Mbuf master zone.
423099a0e58SBosko Milekic  *
424099a0e58SBosko Milekic  * The 'arg' pointer points to a mb_args structure which
425099a0e58SBosko Milekic  * contains call-specific information required to support the
42656a4e45aSAndre Oppermann  * mbuf allocation API.  See mbuf.h.
427099a0e58SBosko Milekic  */
428b23f72e9SBrian Feldman static int
429b23f72e9SBrian Feldman mb_ctor_mbuf(void *mem, int size, void *arg, int how)
430099a0e58SBosko Milekic {
431099a0e58SBosko Milekic 	struct mbuf *m;
432099a0e58SBosko Milekic 	struct mb_args *args;
433b23f72e9SBrian Feldman 	int error;
434099a0e58SBosko Milekic 	int flags;
435099a0e58SBosko Milekic 	short type;
436099a0e58SBosko Milekic 
437121f0509SMike Silbersack #ifdef INVARIANTS
438121f0509SMike Silbersack 	trash_ctor(mem, size, arg, how);
439121f0509SMike Silbersack #endif
440099a0e58SBosko Milekic 	args = (struct mb_args *)arg;
441099a0e58SBosko Milekic 	type = args->type;
442099a0e58SBosko Milekic 
44356a4e45aSAndre Oppermann 	/*
44456a4e45aSAndre Oppermann 	 * The mbuf is initialized later.  The caller has the
445fcf90618SGleb Smirnoff 	 * responsibility to set up any MAC labels too.
44656a4e45aSAndre Oppermann 	 */
44756a4e45aSAndre Oppermann 	if (type == MT_NOINIT)
44856a4e45aSAndre Oppermann 		return (0);
44956a4e45aSAndre Oppermann 
450afb295ccSAndre Oppermann 	m = (struct mbuf *)mem;
451afb295ccSAndre Oppermann 	flags = args->flags;
452afb295ccSAndre Oppermann 
453afb295ccSAndre Oppermann 	error = m_init(m, NULL, size, how, type, flags);
454afb295ccSAndre Oppermann 
455b23f72e9SBrian Feldman 	return (error);
456099a0e58SBosko Milekic }
457099a0e58SBosko Milekic 
458099a0e58SBosko Milekic /*
45956a4e45aSAndre Oppermann  * The Mbuf master zone destructor.
460099a0e58SBosko Milekic  */
461099a0e58SBosko Milekic static void
462099a0e58SBosko Milekic mb_dtor_mbuf(void *mem, int size, void *arg)
463099a0e58SBosko Milekic {
464099a0e58SBosko Milekic 	struct mbuf *m;
465629b9e08SKip Macy 	unsigned long flags;
466099a0e58SBosko Milekic 
467099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
468629b9e08SKip Macy 	flags = (unsigned long)arg;
469629b9e08SKip Macy 
470a9fa76f2SNavdeep Parhar 	KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
471ce6169e7SAndre Oppermann 	if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
472099a0e58SBosko Milekic 		m_tag_delete_chain(m, NULL);
473121f0509SMike Silbersack #ifdef INVARIANTS
474121f0509SMike Silbersack 	trash_dtor(mem, size, arg);
475121f0509SMike Silbersack #endif
476099a0e58SBosko Milekic }
477099a0e58SBosko Milekic 
47856a4e45aSAndre Oppermann /*
47956a4e45aSAndre Oppermann  * The Mbuf Packet zone destructor.
48056a4e45aSAndre Oppermann  */
481099a0e58SBosko Milekic static void
482099a0e58SBosko Milekic mb_dtor_pack(void *mem, int size, void *arg)
483099a0e58SBosko Milekic {
484099a0e58SBosko Milekic 	struct mbuf *m;
485099a0e58SBosko Milekic 
486099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
487099a0e58SBosko Milekic 	if ((m->m_flags & M_PKTHDR) != 0)
488099a0e58SBosko Milekic 		m_tag_delete_chain(m, NULL);
48956a4e45aSAndre Oppermann 
49056a4e45aSAndre Oppermann 	/* Make sure we've got a clean cluster back. */
49156a4e45aSAndre Oppermann 	KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
49256a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
49356a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
494cf827063SPoul-Henning Kamp 	KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__));
495cf827063SPoul-Henning Kamp 	KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
49656a4e45aSAndre Oppermann 	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
49749d46b61SGleb Smirnoff 	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
498fcc34a23SGleb Smirnoff 	KASSERT(*m->m_ext.ext_cnt == 1, ("%s: ext_cnt != 1", __func__));
499121f0509SMike Silbersack #ifdef INVARIANTS
500121f0509SMike Silbersack 	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
501121f0509SMike Silbersack #endif
5026c125b8dSMohan Srinivasan 	/*
503ef44c8d2SDavid E. O'Brien 	 * If there are processes blocked on zone_clust, waiting for pages
504ef44c8d2SDavid E. O'Brien 	 * to be freed up, * cause them to be woken up by draining the
505ef44c8d2SDavid E. O'Brien 	 * packet zone.  We are exposed to a race here * (in the check for
506ef44c8d2SDavid E. O'Brien 	 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that
507ef44c8d2SDavid E. O'Brien 	 * is deliberate. We don't want to acquire the zone lock for every
508ef44c8d2SDavid E. O'Brien 	 * mbuf free.
5096c125b8dSMohan Srinivasan 	 */
5106c125b8dSMohan Srinivasan 	if (uma_zone_exhausted_nolock(zone_clust))
5116c125b8dSMohan Srinivasan 		zone_drain(zone_pack);
512099a0e58SBosko Milekic }
513099a0e58SBosko Milekic 
514099a0e58SBosko Milekic /*
515ec63cb90SAndre Oppermann  * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
516099a0e58SBosko Milekic  *
517099a0e58SBosko Milekic  * Here the 'arg' pointer points to the Mbuf which we
51856a4e45aSAndre Oppermann  * are configuring cluster storage for.  If 'arg' is
51956a4e45aSAndre Oppermann  * empty we allocate just the cluster without setting
52056a4e45aSAndre Oppermann  * the mbuf to it.  See mbuf.h.
521099a0e58SBosko Milekic  */
522b23f72e9SBrian Feldman static int
523b23f72e9SBrian Feldman mb_ctor_clust(void *mem, int size, void *arg, int how)
524099a0e58SBosko Milekic {
525099a0e58SBosko Milekic 	struct mbuf *m;
52656a4e45aSAndre Oppermann 	u_int *refcnt;
5270f4d9d04SKip Macy 	int type;
5280f4d9d04SKip Macy 	uma_zone_t zone;
529099a0e58SBosko Milekic 
530121f0509SMike Silbersack #ifdef INVARIANTS
531121f0509SMike Silbersack 	trash_ctor(mem, size, arg, how);
532121f0509SMike Silbersack #endif
53356a4e45aSAndre Oppermann 	switch (size) {
53456a4e45aSAndre Oppermann 	case MCLBYTES:
53556a4e45aSAndre Oppermann 		type = EXT_CLUSTER;
5360f4d9d04SKip Macy 		zone = zone_clust;
53756a4e45aSAndre Oppermann 		break;
538ec63cb90SAndre Oppermann #if MJUMPAGESIZE != MCLBYTES
539ec63cb90SAndre Oppermann 	case MJUMPAGESIZE:
540ec63cb90SAndre Oppermann 		type = EXT_JUMBOP;
5410f4d9d04SKip Macy 		zone = zone_jumbop;
542d5269a63SAndre Oppermann 		break;
54336ae3fd3SAndre Oppermann #endif
54456a4e45aSAndre Oppermann 	case MJUM9BYTES:
54556a4e45aSAndre Oppermann 		type = EXT_JUMBO9;
5460f4d9d04SKip Macy 		zone = zone_jumbo9;
54756a4e45aSAndre Oppermann 		break;
54856a4e45aSAndre Oppermann 	case MJUM16BYTES:
54956a4e45aSAndre Oppermann 		type = EXT_JUMBO16;
5500f4d9d04SKip Macy 		zone = zone_jumbo16;
55156a4e45aSAndre Oppermann 		break;
55256a4e45aSAndre Oppermann 	default:
55356a4e45aSAndre Oppermann 		panic("unknown cluster size");
55456a4e45aSAndre Oppermann 		break;
55556a4e45aSAndre Oppermann 	}
5560f4d9d04SKip Macy 
5570f4d9d04SKip Macy 	m = (struct mbuf *)arg;
5580f4d9d04SKip Macy 	refcnt = uma_find_refcnt(zone, mem);
5590f4d9d04SKip Macy 	*refcnt = 1;
5600f4d9d04SKip Macy 	if (m != NULL) {
561099a0e58SBosko Milekic 		m->m_ext.ext_buf = (caddr_t)mem;
562099a0e58SBosko Milekic 		m->m_data = m->m_ext.ext_buf;
563099a0e58SBosko Milekic 		m->m_flags |= M_EXT;
564099a0e58SBosko Milekic 		m->m_ext.ext_free = NULL;
565cf827063SPoul-Henning Kamp 		m->m_ext.ext_arg1 = NULL;
566cf827063SPoul-Henning Kamp 		m->m_ext.ext_arg2 = NULL;
56756a4e45aSAndre Oppermann 		m->m_ext.ext_size = size;
56856a4e45aSAndre Oppermann 		m->m_ext.ext_type = type;
569894734cbSAndre Oppermann 		m->m_ext.ext_flags = 0;
570fcc34a23SGleb Smirnoff 		m->m_ext.ext_cnt = refcnt;
57156a4e45aSAndre Oppermann 	}
5720f4d9d04SKip Macy 
573b23f72e9SBrian Feldman 	return (0);
574099a0e58SBosko Milekic }
575099a0e58SBosko Milekic 
57656a4e45aSAndre Oppermann /*
57756a4e45aSAndre Oppermann  * The Mbuf Cluster zone destructor.
57856a4e45aSAndre Oppermann  */
579099a0e58SBosko Milekic static void
580099a0e58SBosko Milekic mb_dtor_clust(void *mem, int size, void *arg)
581099a0e58SBosko Milekic {
582121f0509SMike Silbersack #ifdef INVARIANTS
5830f4d9d04SKip Macy 	uma_zone_t zone;
5840f4d9d04SKip Macy 
5850f4d9d04SKip Macy 	zone = m_getzone(size);
5860f4d9d04SKip Macy 	KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
5870f4d9d04SKip Macy 		("%s: refcnt incorrect %u", __func__,
5880f4d9d04SKip Macy 		 *(uma_find_refcnt(zone, mem))) );
5890f4d9d04SKip Macy 
590121f0509SMike Silbersack 	trash_dtor(mem, size, arg);
591121f0509SMike Silbersack #endif
592099a0e58SBosko Milekic }
593099a0e58SBosko Milekic 
594099a0e58SBosko Milekic /*
595099a0e58SBosko Milekic  * The Packet secondary zone's init routine, executed on the
59656a4e45aSAndre Oppermann  * object's transition from mbuf keg slab to zone cache.
597099a0e58SBosko Milekic  */
598b23f72e9SBrian Feldman static int
59956a4e45aSAndre Oppermann mb_zinit_pack(void *mem, int size, int how)
600099a0e58SBosko Milekic {
601099a0e58SBosko Milekic 	struct mbuf *m;
602099a0e58SBosko Milekic 
60356a4e45aSAndre Oppermann 	m = (struct mbuf *)mem;		/* m is virgin. */
604a7bd90efSAndre Oppermann 	if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
605a7bd90efSAndre Oppermann 	    m->m_ext.ext_buf == NULL)
606b23f72e9SBrian Feldman 		return (ENOMEM);
607cd5bb63bSAndre Oppermann 	m->m_ext.ext_type = EXT_PACKET;	/* Override. */
608121f0509SMike Silbersack #ifdef INVARIANTS
609121f0509SMike Silbersack 	trash_init(m->m_ext.ext_buf, MCLBYTES, how);
610121f0509SMike Silbersack #endif
611b23f72e9SBrian Feldman 	return (0);
612099a0e58SBosko Milekic }
613099a0e58SBosko Milekic 
614099a0e58SBosko Milekic /*
615099a0e58SBosko Milekic  * The Packet secondary zone's fini routine, executed on the
616099a0e58SBosko Milekic  * object's transition from zone cache to keg slab.
617099a0e58SBosko Milekic  */
618099a0e58SBosko Milekic static void
61956a4e45aSAndre Oppermann mb_zfini_pack(void *mem, int size)
620099a0e58SBosko Milekic {
621099a0e58SBosko Milekic 	struct mbuf *m;
622099a0e58SBosko Milekic 
623099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
624121f0509SMike Silbersack #ifdef INVARIANTS
625121f0509SMike Silbersack 	trash_fini(m->m_ext.ext_buf, MCLBYTES);
626121f0509SMike Silbersack #endif
627099a0e58SBosko Milekic 	uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
628a7b844d2SMike Silbersack #ifdef INVARIANTS
629a7b844d2SMike Silbersack 	trash_dtor(mem, size, NULL);
630a7b844d2SMike Silbersack #endif
631099a0e58SBosko Milekic }
632099a0e58SBosko Milekic 
633099a0e58SBosko Milekic /*
634099a0e58SBosko Milekic  * The "packet" keg constructor.
635099a0e58SBosko Milekic  */
636b23f72e9SBrian Feldman static int
637b23f72e9SBrian Feldman mb_ctor_pack(void *mem, int size, void *arg, int how)
638099a0e58SBosko Milekic {
639099a0e58SBosko Milekic 	struct mbuf *m;
640099a0e58SBosko Milekic 	struct mb_args *args;
641ce28636bSAndre Oppermann 	int error, flags;
642099a0e58SBosko Milekic 	short type;
643099a0e58SBosko Milekic 
644099a0e58SBosko Milekic 	m = (struct mbuf *)mem;
645099a0e58SBosko Milekic 	args = (struct mb_args *)arg;
646099a0e58SBosko Milekic 	flags = args->flags;
647099a0e58SBosko Milekic 	type = args->type;
648099a0e58SBosko Milekic 
649121f0509SMike Silbersack #ifdef INVARIANTS
650121f0509SMike Silbersack 	trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
651121f0509SMike Silbersack #endif
652099a0e58SBosko Milekic 
653afb295ccSAndre Oppermann 	error = m_init(m, NULL, size, how, type, flags);
654afb295ccSAndre Oppermann 
65556a4e45aSAndre Oppermann 	/* m_ext is already initialized. */
656afb295ccSAndre Oppermann 	m->m_data = m->m_ext.ext_buf;
657afb295ccSAndre Oppermann  	m->m_flags = (flags | M_EXT);
65856a4e45aSAndre Oppermann 
659afb295ccSAndre Oppermann 	return (error);
660099a0e58SBosko Milekic }
661099a0e58SBosko Milekic 
6625b204a11SKip Macy int
6635b204a11SKip Macy m_pkthdr_init(struct mbuf *m, int how)
6645b204a11SKip Macy {
6655b204a11SKip Macy #ifdef MAC
6665b204a11SKip Macy 	int error;
6675b204a11SKip Macy #endif
6685b204a11SKip Macy 	m->m_data = m->m_pktdat;
669b7bd677fSEd Maste 	bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
6705b204a11SKip Macy #ifdef MAC
6715b204a11SKip Macy 	/* If the label init fails, fail the alloc */
6725b204a11SKip Macy 	error = mac_mbuf_init(m, how);
6735b204a11SKip Macy 	if (error)
6745b204a11SKip Macy 		return (error);
6755b204a11SKip Macy #endif
6765b204a11SKip Macy 
6775b204a11SKip Macy 	return (0);
6785b204a11SKip Macy }
6795b204a11SKip Macy 
680099a0e58SBosko Milekic /*
681099a0e58SBosko Milekic  * This is the protocol drain routine.
682099a0e58SBosko Milekic  *
683099a0e58SBosko Milekic  * No locks should be held when this is called.  The drain routines have to
684099a0e58SBosko Milekic  * presently acquire some locks which raises the possibility of lock order
685099a0e58SBosko Milekic  * reversal.
686099a0e58SBosko Milekic  */
687099a0e58SBosko Milekic static void
688099a0e58SBosko Milekic mb_reclaim(void *junk)
689099a0e58SBosko Milekic {
690099a0e58SBosko Milekic 	struct domain *dp;
691099a0e58SBosko Milekic 	struct protosw *pr;
692099a0e58SBosko Milekic 
693099a0e58SBosko Milekic 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
694099a0e58SBosko Milekic 	    "mb_reclaim()");
695099a0e58SBosko Milekic 
696099a0e58SBosko Milekic 	for (dp = domains; dp != NULL; dp = dp->dom_next)
697099a0e58SBosko Milekic 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
698099a0e58SBosko Milekic 			if (pr->pr_drain != NULL)
699099a0e58SBosko Milekic 				(*pr->pr_drain)();
700099a0e58SBosko Milekic }
701*54503a13SJonathan T. Looney 
702*54503a13SJonathan T. Looney /*
703*54503a13SJonathan T. Looney  * This is the function called by the mb_reclaim_callout, which is
704*54503a13SJonathan T. Looney  * used when we hit the maximum for a zone.
705*54503a13SJonathan T. Looney  *
706*54503a13SJonathan T. Looney  * (See mb_maxaction() below.)
707*54503a13SJonathan T. Looney  */
708*54503a13SJonathan T. Looney static void
709*54503a13SJonathan T. Looney mb_reclaim_timer(void *junk __unused)
710*54503a13SJonathan T. Looney {
711*54503a13SJonathan T. Looney 
712*54503a13SJonathan T. Looney 	mtx_lock(&mb_reclaim_callout_mtx);
713*54503a13SJonathan T. Looney 
714*54503a13SJonathan T. Looney 	/*
715*54503a13SJonathan T. Looney 	 * Avoid running this function extra times by skipping this invocation
716*54503a13SJonathan T. Looney 	 * if the callout has already been rescheduled.
717*54503a13SJonathan T. Looney 	 */
718*54503a13SJonathan T. Looney 	if (callout_pending(&mb_reclaim_callout) ||
719*54503a13SJonathan T. Looney 	    !callout_active(&mb_reclaim_callout)) {
720*54503a13SJonathan T. Looney 		mtx_unlock(&mb_reclaim_callout_mtx);
721*54503a13SJonathan T. Looney 		return;
722*54503a13SJonathan T. Looney 	}
723*54503a13SJonathan T. Looney 	mtx_unlock(&mb_reclaim_callout_mtx);
724*54503a13SJonathan T. Looney 
725*54503a13SJonathan T. Looney 	mb_reclaim(NULL);
726*54503a13SJonathan T. Looney 
727*54503a13SJonathan T. Looney 	mtx_lock(&mb_reclaim_callout_mtx);
728*54503a13SJonathan T. Looney 	callout_deactivate(&mb_reclaim_callout);
729*54503a13SJonathan T. Looney 	mtx_unlock(&mb_reclaim_callout_mtx);
730*54503a13SJonathan T. Looney }
731*54503a13SJonathan T. Looney 
732*54503a13SJonathan T. Looney /*
733*54503a13SJonathan T. Looney  * This function is called when we hit the maximum for a zone.
734*54503a13SJonathan T. Looney  *
735*54503a13SJonathan T. Looney  * At that point, we want to call the protocol drain routine to free up some
736*54503a13SJonathan T. Looney  * mbufs. However, we will use the callout routines to schedule this to
737*54503a13SJonathan T. Looney  * occur in another thread. (The thread calling this function holds the
738*54503a13SJonathan T. Looney  * zone lock.)
739*54503a13SJonathan T. Looney  */
740*54503a13SJonathan T. Looney static void
741*54503a13SJonathan T. Looney mb_maxaction(uma_zone_t zone __unused)
742*54503a13SJonathan T. Looney {
743*54503a13SJonathan T. Looney 
744*54503a13SJonathan T. Looney 	/*
745*54503a13SJonathan T. Looney 	 * If we can't immediately obtain the lock, either the callout
746*54503a13SJonathan T. Looney 	 * is currently running, or another thread is scheduling the
747*54503a13SJonathan T. Looney 	 * callout.
748*54503a13SJonathan T. Looney 	 */
749*54503a13SJonathan T. Looney 	if (!mtx_trylock(&mb_reclaim_callout_mtx))
750*54503a13SJonathan T. Looney 		return;
751*54503a13SJonathan T. Looney 
752*54503a13SJonathan T. Looney 	/* If not already scheduled/running, schedule the callout. */
753*54503a13SJonathan T. Looney 	if (!callout_active(&mb_reclaim_callout)) {
754*54503a13SJonathan T. Looney 		callout_reset(&mb_reclaim_callout, 1, mb_reclaim_timer, NULL);
755*54503a13SJonathan T. Looney 	}
756*54503a13SJonathan T. Looney 
757*54503a13SJonathan T. Looney 	mtx_unlock(&mb_reclaim_callout_mtx);
758*54503a13SJonathan T. Looney }
759