xref: /freebsd/sys/vm/uma_core.c (revision 78704ef45793e56c8e064611c05c9bb8a0067e9f)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  *
28  */
29 
30 /*
31  * uma_core.c  Implementation of the Universal Memory allocator
32  *
33  * This allocator is intended to replace the multitude of similar object caches
34  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
35  * effecient.  A primary design goal is to return unused memory to the rest of
36  * the system.  This will make the system as a whole more flexible due to the
37  * ability to move memory to subsystems which most need it instead of leaving
38  * pools of reserved memory unused.
39  *
40  * The basic ideas stem from similar slab/zone based allocators whose algorithms
41  * are well known.
42  *
43  */
44 
45 /*
46  * TODO:
47  *	- Improve memory usage for large allocations
48  *	- Investigate cache size adjustments
49  */
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 
59 #include "opt_param.h"
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/kernel.h>
63 #include <sys/types.h>
64 #include <sys/queue.h>
65 #include <sys/malloc.h>
66 #include <sys/lock.h>
67 #include <sys/sysctl.h>
68 #include <sys/mutex.h>
69 #include <sys/proc.h>
70 #include <sys/smp.h>
71 #include <sys/vmmeter.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_extern.h>
80 #include <vm/uma.h>
81 #include <vm/uma_int.h>
82 #include <vm/uma_dbg.h>
83 
84 /*
85  * This is the zone from which all zones are spawned.  The idea is that even
86  * the zone heads are allocated from the allocator, so we use the bss section
87  * to bootstrap us.
88  */
89 static struct uma_zone masterzone;
90 static uma_zone_t zones = &masterzone;
91 
92 /* This is the zone from which all of uma_slab_t's are allocated. */
93 static uma_zone_t slabzone;
94 
95 /*
96  * The initial hash tables come out of this zone so they can be allocated
97  * prior to malloc coming up.
98  */
99 static uma_zone_t hashzone;
100 
101 /*
102  * Zone that buckets come from.
103  */
104 static uma_zone_t bucketzone;
105 
106 /*
107  * Are we allowed to allocate buckets?
108  */
109 static int bucketdisable = 1;
110 
111 /* Linked list of all zones in the system */
112 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
113 
114 /* This mutex protects the zone list */
115 static struct mtx uma_mtx;
116 
117 /* Linked list of boot time pages */
118 static LIST_HEAD(,uma_slab) uma_boot_pages =
119     LIST_HEAD_INITIALIZER(&uma_boot_pages);
120 
121 /* Count of free boottime pages */
122 static int uma_boot_free = 0;
123 
124 /* Is the VM done starting up? */
125 static int booted = 0;
126 
127 /* This is the handle used to schedule our working set calculator */
128 static struct callout uma_callout;
129 
130 /* This is mp_maxid + 1, for use while looping over each cpu */
131 static int maxcpu;
132 
133 /*
134  * This structure is passed as the zone ctor arg so that I don't have to create
135  * a special allocation function just for zones.
136  */
137 struct uma_zctor_args {
138 	char *name;
139 	size_t size;
140 	uma_ctor ctor;
141 	uma_dtor dtor;
142 	uma_init uminit;
143 	uma_fini fini;
144 	int align;
145 	u_int16_t flags;
146 };
147 
148 /* Prototypes.. */
149 
150 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
151 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
152 static void page_free(void *, int, u_int8_t);
153 static uma_slab_t slab_zalloc(uma_zone_t, int);
154 static void cache_drain(uma_zone_t);
155 static void bucket_drain(uma_zone_t, uma_bucket_t);
156 static void zone_drain(uma_zone_t);
157 static void zone_ctor(void *, int, void *);
158 static void zone_dtor(void *, int, void *);
159 static void zero_init(void *, int);
160 static void zone_small_init(uma_zone_t zone);
161 static void zone_large_init(uma_zone_t zone);
162 static void zone_foreach(void (*zfunc)(uma_zone_t));
163 static void zone_timeout(uma_zone_t zone);
164 static int hash_alloc(struct uma_hash *);
165 static int hash_expand(struct uma_hash *, struct uma_hash *);
166 static void hash_free(struct uma_hash *hash);
167 static void uma_timeout(void *);
168 static void uma_startup3(void);
169 static void *uma_zalloc_internal(uma_zone_t, void *, int, uma_bucket_t);
170 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
171 static void bucket_enable(void);
172 void uma_print_zone(uma_zone_t);
173 void uma_print_stats(void);
174 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
175 
176 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
177     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
178 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
179 
180 /*
181  * This routine checks to see whether or not it's safe to enable buckets.
182  */
183 
184 static void
185 bucket_enable(void)
186 {
187 	if (cnt.v_free_count < cnt.v_free_min)
188 		bucketdisable = 1;
189 	else
190 		bucketdisable = 0;
191 }
192 
193 
194 /*
195  * Routine called by timeout which is used to fire off some time interval
196  * based calculations.  (working set, stats, etc.)
197  *
198  * Arguments:
199  *	arg   Unused
200  *
201  * Returns:
202  *	Nothing
203  */
204 static void
205 uma_timeout(void *unused)
206 {
207 	bucket_enable();
208 	zone_foreach(zone_timeout);
209 
210 	/* Reschedule this event */
211 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
212 }
213 
214 /*
215  * Routine to perform timeout driven calculations.  This does the working set
216  * as well as hash expanding, and per cpu statistics aggregation.
217  *
218  *  Arguments:
219  *	zone  The zone to operate on
220  *
221  *  Returns:
222  *	Nothing
223  */
224 static void
225 zone_timeout(uma_zone_t zone)
226 {
227 	uma_cache_t cache;
228 	u_int64_t alloc;
229 	int free;
230 	int cpu;
231 
232 	alloc = 0;
233 	free = 0;
234 
235 	/*
236 	 * Aggregate per cpu cache statistics back to the zone.
237 	 *
238 	 * I may rewrite this to set a flag in the per cpu cache instead of
239 	 * locking.  If the flag is not cleared on the next round I will have
240 	 * to lock and do it here instead so that the statistics don't get too
241 	 * far out of sync.
242 	 */
243 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
244 		for (cpu = 0; cpu < maxcpu; cpu++) {
245 			if (CPU_ABSENT(cpu))
246 				continue;
247 			CPU_LOCK(zone, cpu);
248 			cache = &zone->uz_cpu[cpu];
249 			/* Add them up, and reset */
250 			alloc += cache->uc_allocs;
251 			cache->uc_allocs = 0;
252 			if (cache->uc_allocbucket)
253 				free += cache->uc_allocbucket->ub_ptr + 1;
254 			if (cache->uc_freebucket)
255 				free += cache->uc_freebucket->ub_ptr + 1;
256 			CPU_UNLOCK(zone, cpu);
257 		}
258 	}
259 
260 	/* Now push these stats back into the zone.. */
261 	ZONE_LOCK(zone);
262 	zone->uz_allocs += alloc;
263 
264 	/*
265 	 * cachefree is an instantanious snapshot of what is in the per cpu
266 	 * caches, not an accurate counter
267 	 */
268 	zone->uz_cachefree = free;
269 
270 	/*
271 	 * Expand the zone hash table.
272 	 *
273 	 * This is done if the number of slabs is larger than the hash size.
274 	 * What I'm trying to do here is completely reduce collisions.  This
275 	 * may be a little aggressive.  Should I allow for two collisions max?
276 	 */
277 
278 	if (zone->uz_flags & UMA_ZFLAG_HASH &&
279 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
280 		struct uma_hash newhash;
281 		struct uma_hash oldhash;
282 		int ret;
283 
284 		/*
285 		 * This is so involved because allocating and freeing
286 		 * while the zone lock is held will lead to deadlock.
287 		 * I have to do everything in stages and check for
288 		 * races.
289 		 */
290 		newhash = zone->uz_hash;
291 		ZONE_UNLOCK(zone);
292 		ret = hash_alloc(&newhash);
293 		ZONE_LOCK(zone);
294 		if (ret) {
295 			if (hash_expand(&zone->uz_hash, &newhash)) {
296 				oldhash = zone->uz_hash;
297 				zone->uz_hash = newhash;
298 			} else
299 				oldhash = newhash;
300 
301 			ZONE_UNLOCK(zone);
302 			hash_free(&oldhash);
303 			ZONE_LOCK(zone);
304 		}
305 	}
306 
307 	/*
308 	 * Here we compute the working set size as the total number of items
309 	 * left outstanding since the last time interval.  This is slightly
310 	 * suboptimal. What we really want is the highest number of outstanding
311 	 * items during the last time quantum.  This should be close enough.
312 	 *
313 	 * The working set size is used to throttle the zone_drain function.
314 	 * We don't want to return memory that we may need again immediately.
315 	 */
316 	alloc = zone->uz_allocs - zone->uz_oallocs;
317 	zone->uz_oallocs = zone->uz_allocs;
318 	zone->uz_wssize = alloc;
319 
320 	ZONE_UNLOCK(zone);
321 }
322 
323 /*
324  * Allocate and zero fill the next sized hash table from the appropriate
325  * backing store.
326  *
327  * Arguments:
328  *	hash  A new hash structure with the old hash size in uh_hashsize
329  *
330  * Returns:
331  *	1 on sucess and 0 on failure.
332  */
333 static int
334 hash_alloc(struct uma_hash *hash)
335 {
336 	int oldsize;
337 	int alloc;
338 
339 	oldsize = hash->uh_hashsize;
340 
341 	/* We're just going to go to a power of two greater */
342 	if (oldsize)  {
343 		hash->uh_hashsize = oldsize * 2;
344 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
345 		/* XXX Shouldn't be abusing DEVBUF here */
346 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
347 		    M_DEVBUF, M_NOWAIT);
348 	} else {
349 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
350 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
351 		    M_WAITOK, NULL);
352 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
353 	}
354 	if (hash->uh_slab_hash) {
355 		bzero(hash->uh_slab_hash, alloc);
356 		hash->uh_hashmask = hash->uh_hashsize - 1;
357 		return (1);
358 	}
359 
360 	return (0);
361 }
362 
363 /*
364  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
365  * to reduce collisions.  This must not be done in the regular allocation path,
366  * otherwise, we can recurse on the vm while allocating pages.
367  *
368  * Arguments:
369  *	oldhash  The hash you want to expand
370  *	newhash  The hash structure for the new table
371  *
372  * Returns:
373  * 	Nothing
374  *
375  * Discussion:
376  */
377 static int
378 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
379 {
380 	uma_slab_t slab;
381 	int hval;
382 	int i;
383 
384 	if (!newhash->uh_slab_hash)
385 		return (0);
386 
387 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
388 		return (0);
389 
390 	/*
391 	 * I need to investigate hash algorithms for resizing without a
392 	 * full rehash.
393 	 */
394 
395 	for (i = 0; i < oldhash->uh_hashsize; i++)
396 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
397 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
398 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
399 			hval = UMA_HASH(newhash, slab->us_data);
400 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
401 			    slab, us_hlink);
402 		}
403 
404 	return (1);
405 }
406 
407 /*
408  * Free the hash bucket to the appropriate backing store.
409  *
410  * Arguments:
411  *	slab_hash  The hash bucket we're freeing
412  *	hashsize   The number of entries in that hash bucket
413  *
414  * Returns:
415  *	Nothing
416  */
417 static void
418 hash_free(struct uma_hash *hash)
419 {
420 	if (hash->uh_slab_hash == NULL)
421 		return;
422 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
423 		uma_zfree_internal(hashzone,
424 		    hash->uh_slab_hash, NULL, 0);
425 	else
426 		free(hash->uh_slab_hash, M_DEVBUF);
427 }
428 
429 /*
430  * Frees all outstanding items in a bucket
431  *
432  * Arguments:
433  *	zone   The zone to free to, must be unlocked.
434  *	bucket The free/alloc bucket with items, cpu queue must be locked.
435  *
436  * Returns:
437  *	Nothing
438  */
439 
440 static void
441 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
442 {
443 	uma_slab_t slab;
444 	int mzone;
445 	void *item;
446 
447 	if (bucket == NULL)
448 		return;
449 
450 	slab = NULL;
451 	mzone = 0;
452 
453 	/* We have to lookup the slab again for malloc.. */
454 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
455 		mzone = 1;
456 
457 	while (bucket->ub_ptr > -1)  {
458 		item = bucket->ub_bucket[bucket->ub_ptr];
459 #ifdef INVARIANTS
460 		bucket->ub_bucket[bucket->ub_ptr] = NULL;
461 		KASSERT(item != NULL,
462 		    ("bucket_drain: botched ptr, item is NULL"));
463 #endif
464 		bucket->ub_ptr--;
465 		/*
466 		 * This is extremely inefficient.  The slab pointer was passed
467 		 * to uma_zfree_arg, but we lost it because the buckets don't
468 		 * hold them.  This will go away when free() gets a size passed
469 		 * to it.
470 		 */
471 		if (mzone)
472 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
473 		uma_zfree_internal(zone, item, slab, 1);
474 	}
475 }
476 
477 /*
478  * Drains the per cpu caches for a zone.
479  *
480  * Arguments:
481  *	zone  The zone to drain, must be unlocked.
482  *
483  * Returns:
484  *	Nothing
485  *
486  * This function returns with the zone locked so that the per cpu queues can
487  * not be filled until zone_drain is finished.
488  *
489  */
490 static void
491 cache_drain(uma_zone_t zone)
492 {
493 	uma_bucket_t bucket;
494 	uma_cache_t cache;
495 	int cpu;
496 
497 	/*
498 	 * Flush out the per cpu queues.
499 	 *
500 	 * XXX This causes unnecessary thrashing due to immediately having
501 	 * empty per cpu queues.  I need to improve this.
502 	 */
503 
504 	/*
505 	 * We have to lock each cpu cache before locking the zone
506 	 */
507 	ZONE_UNLOCK(zone);
508 
509 	for (cpu = 0; cpu < maxcpu; cpu++) {
510 		if (CPU_ABSENT(cpu))
511 			continue;
512 		CPU_LOCK(zone, cpu);
513 		cache = &zone->uz_cpu[cpu];
514 		bucket_drain(zone, cache->uc_allocbucket);
515 		bucket_drain(zone, cache->uc_freebucket);
516 	}
517 
518 	/*
519 	 * Drain the bucket queues and free the buckets, we just keep two per
520 	 * cpu (alloc/free).
521 	 */
522 	ZONE_LOCK(zone);
523 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
524 		LIST_REMOVE(bucket, ub_link);
525 		ZONE_UNLOCK(zone);
526 		bucket_drain(zone, bucket);
527 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
528 		ZONE_LOCK(zone);
529 	}
530 
531 	/* Now we do the free queue.. */
532 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
533 		LIST_REMOVE(bucket, ub_link);
534 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
535 	}
536 
537 	/* We unlock here, but they will all block until the zone is unlocked */
538 	for (cpu = 0; cpu < maxcpu; cpu++) {
539 		if (CPU_ABSENT(cpu))
540 			continue;
541 		CPU_UNLOCK(zone, cpu);
542 	}
543 
544 	zone->uz_cachefree = 0;
545 }
546 
547 /*
548  * Frees pages from a zone back to the system.  This is done on demand from
549  * the pageout daemon.
550  *
551  * Arguments:
552  *	zone  The zone to free pages from
553  *	all   Should we drain all items?
554  *
555  * Returns:
556  *	Nothing.
557  */
558 static void
559 zone_drain(uma_zone_t zone)
560 {
561 	struct slabhead freeslabs = {};
562 	uma_slab_t slab;
563 	uma_slab_t n;
564 	u_int64_t extra;
565 	u_int8_t flags;
566 	u_int8_t *mem;
567 	int i;
568 
569 	/*
570 	 * We don't want to take pages from staticly allocated zones at this
571 	 * time
572 	 */
573 	if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
574 		return;
575 
576 	ZONE_LOCK(zone);
577 
578 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
579 		cache_drain(zone);
580 
581 	if (zone->uz_free < zone->uz_wssize)
582 		goto finished;
583 #ifdef UMA_DEBUG
584 	printf("%s working set size: %llu free items: %u\n",
585 	    zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
586 #endif
587 	extra = zone->uz_free - zone->uz_wssize;
588 	extra /= zone->uz_ipers;
589 
590 	/* extra is now the number of extra slabs that we can free */
591 
592 	if (extra == 0)
593 		goto finished;
594 
595 	slab = LIST_FIRST(&zone->uz_free_slab);
596 	while (slab && extra) {
597 		n = LIST_NEXT(slab, us_link);
598 
599 		/* We have no where to free these to */
600 		if (slab->us_flags & UMA_SLAB_BOOT) {
601 			slab = n;
602 			continue;
603 		}
604 
605 		LIST_REMOVE(slab, us_link);
606 		zone->uz_pages -= zone->uz_ppera;
607 		zone->uz_free -= zone->uz_ipers;
608 
609 		if (zone->uz_flags & UMA_ZFLAG_HASH)
610 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
611 
612 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
613 
614 		slab = n;
615 		extra--;
616 	}
617 finished:
618 	ZONE_UNLOCK(zone);
619 
620 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
621 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
622 		if (zone->uz_fini)
623 			for (i = 0; i < zone->uz_ipers; i++)
624 				zone->uz_fini(
625 				    slab->us_data + (zone->uz_rsize * i),
626 				    zone->uz_size);
627 		flags = slab->us_flags;
628 		mem = slab->us_data;
629 
630 		if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
631 			uma_zfree_internal(slabzone, slab, NULL, 0);
632 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
633 			for (i = 0; i < zone->uz_ppera; i++)
634 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
635 				    kmem_object);
636 #ifdef UMA_DEBUG
637 		printf("%s: Returning %d bytes.\n",
638 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
639 #endif
640 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
641 	}
642 
643 }
644 
645 /*
646  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
647  *
648  * Arguments:
649  *	zone  The zone to allocate slabs for
650  *	wait  Shall we wait?
651  *
652  * Returns:
653  *	The slab that was allocated or NULL if there is no memory and the
654  *	caller specified M_NOWAIT.
655  *
656  */
657 static uma_slab_t
658 slab_zalloc(uma_zone_t zone, int wait)
659 {
660 	uma_slab_t slab;	/* Starting slab */
661 	u_int8_t *mem;
662 	u_int8_t flags;
663 	int i;
664 
665 	slab = NULL;
666 
667 #ifdef UMA_DEBUG
668 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
669 #endif
670 	ZONE_UNLOCK(zone);
671 
672 	if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
673 		slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
674 		if (slab == NULL) {
675 			ZONE_LOCK(zone);
676 			return NULL;
677 		}
678 	}
679 
680 	/*
681 	 * This reproduces the old vm_zone behavior of zero filling pages the
682 	 * first time they are added to a zone.
683 	 *
684 	 * Malloced items are zeroed in uma_zalloc.
685 	 */
686 
687 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
688 		wait |= M_ZERO;
689 	else
690 		wait &= ~M_ZERO;
691 
692 	if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
693 		mtx_lock(&Giant);
694 		mem = zone->uz_allocf(zone,
695 		    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
696 		mtx_unlock(&Giant);
697 		if (mem == NULL) {
698 			ZONE_LOCK(zone);
699 			return (NULL);
700 		}
701 	} else {
702 		uma_slab_t tmps;
703 
704 		if (zone->uz_ppera > 1)
705 			panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
706 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
707 			panic("Mallocing before uma_startup2 has been called.\n");
708 		if (uma_boot_free == 0)
709 			panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
710 		tmps = LIST_FIRST(&uma_boot_pages);
711 		LIST_REMOVE(tmps, us_link);
712 		uma_boot_free--;
713 		mem = tmps->us_data;
714 	}
715 
716 	/* Point the slab into the allocated memory */
717 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
718 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
719 
720 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
721 		for (i = 0; i < zone->uz_ppera; i++)
722 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
723 
724 	slab->us_zone = zone;
725 	slab->us_data = mem;
726 
727 	/*
728 	 * This is intended to spread data out across cache lines.
729 	 *
730 	 * This code doesn't seem to work properly on x86, and on alpha
731 	 * it makes absolutely no performance difference. I'm sure it could
732 	 * use some tuning, but sun makes outrageous claims about it's
733 	 * performance.
734 	 */
735 #if 0
736 	if (zone->uz_cachemax) {
737 		slab->us_data += zone->uz_cacheoff;
738 		zone->uz_cacheoff += UMA_CACHE_INC;
739 		if (zone->uz_cacheoff > zone->uz_cachemax)
740 			zone->uz_cacheoff = 0;
741 	}
742 #endif
743 
744 	slab->us_freecount = zone->uz_ipers;
745 	slab->us_firstfree = 0;
746 	slab->us_flags = flags;
747 	for (i = 0; i < zone->uz_ipers; i++)
748 		slab->us_freelist[i] = i+1;
749 
750 	if (zone->uz_init)
751 		for (i = 0; i < zone->uz_ipers; i++)
752 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
753 			    zone->uz_size);
754 	ZONE_LOCK(zone);
755 
756 	if (zone->uz_flags & UMA_ZFLAG_HASH)
757 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
758 
759 	zone->uz_pages += zone->uz_ppera;
760 	zone->uz_free += zone->uz_ipers;
761 
762 
763 	return (slab);
764 }
765 
766 /*
767  * Allocates a number of pages from the system
768  *
769  * Arguments:
770  *	zone  Unused
771  *	bytes  The number of bytes requested
772  *	wait  Shall we wait?
773  *
774  * Returns:
775  *	A pointer to the alloced memory or possibly
776  *	NULL if M_NOWAIT is set.
777  */
778 static void *
779 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
780 {
781 	void *p;	/* Returned page */
782 
783 	*pflag = UMA_SLAB_KMEM;
784 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
785 
786 	return (p);
787 }
788 
789 /*
790  * Allocates a number of pages from within an object
791  *
792  * Arguments:
793  *	zone   Unused
794  *	bytes  The number of bytes requested
795  *	wait   Shall we wait?
796  *
797  * Returns:
798  *	A pointer to the alloced memory or possibly
799  *	NULL if M_NOWAIT is set.
800  *
801  * TODO: If we fail during a multi-page allocation release the pages that have
802  *	 already been allocated.
803  */
804 static void *
805 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
806 {
807 	vm_offset_t zkva;
808 	vm_offset_t retkva;
809 	vm_page_t p;
810 	int pages;
811 
812 	retkva = 0;
813 	pages = zone->uz_pages;
814 
815 	/*
816 	 * This looks a little weird since we're getting one page at a time
817 	 */
818 	while (bytes > 0) {
819 		p = vm_page_alloc(zone->uz_obj, pages,
820 		    VM_ALLOC_INTERRUPT);
821 		if (p == NULL)
822 			return (NULL);
823 
824 		zkva = zone->uz_kva + pages * PAGE_SIZE;
825 		if (retkva == 0)
826 			retkva = zkva;
827 		pmap_qenter(zkva, &p, 1);
828 		bytes -= PAGE_SIZE;
829 		pages += 1;
830 	}
831 
832 	*flags = UMA_SLAB_PRIV;
833 
834 	return ((void *)retkva);
835 }
836 
837 /*
838  * Frees a number of pages to the system
839  *
840  * Arguments:
841  *	mem   A pointer to the memory to be freed
842  *	size  The size of the memory being freed
843  *	flags The original p->us_flags field
844  *
845  * Returns:
846  *	Nothing
847  *
848  */
849 static void
850 page_free(void *mem, int size, u_int8_t flags)
851 {
852 	vm_map_t map;
853 
854 	if (flags & UMA_SLAB_KMEM)
855 		map = kmem_map;
856 	else
857 		panic("UMA: page_free used with invalid flags %d\n", flags);
858 
859 	kmem_free(map, (vm_offset_t)mem, size);
860 }
861 
862 /*
863  * Zero fill initializer
864  *
865  * Arguments/Returns follow uma_init specifications
866  *
867  */
868 static void
869 zero_init(void *mem, int size)
870 {
871 	bzero(mem, size);
872 }
873 
874 /*
875  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
876  *
877  * Arguments
878  *	zone  The zone we should initialize
879  *
880  * Returns
881  *	Nothing
882  */
883 static void
884 zone_small_init(uma_zone_t zone)
885 {
886 	int rsize;
887 	int memused;
888 	int ipers;
889 
890 	rsize = zone->uz_size;
891 
892 	if (rsize < UMA_SMALLEST_UNIT)
893 		rsize = UMA_SMALLEST_UNIT;
894 
895 	if (rsize & zone->uz_align)
896 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
897 
898 	zone->uz_rsize = rsize;
899 
900 	rsize += 1;	/* Account for the byte of linkage */
901 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
902 	zone->uz_ppera = 1;
903 
904 	memused = zone->uz_ipers * zone->uz_rsize;
905 
906 	/* Can we do any better? */
907 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
908 		if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
909 			return;
910 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
911 		if (ipers > zone->uz_ipers) {
912 			zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
913 			if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
914 				zone->uz_flags |= UMA_ZFLAG_HASH;
915 			zone->uz_ipers = ipers;
916 		}
917 	}
918 
919 }
920 
921 /*
922  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
923  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
924  * more complicated.
925  *
926  * Arguments
927  *	zone  The zone we should initialize
928  *
929  * Returns
930  *	Nothing
931  */
932 static void
933 zone_large_init(uma_zone_t zone)
934 {
935 	int pages;
936 
937 	pages = zone->uz_size / UMA_SLAB_SIZE;
938 
939 	/* Account for remainder */
940 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
941 		pages++;
942 
943 	zone->uz_ppera = pages;
944 	zone->uz_ipers = 1;
945 
946 	zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
947 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
948 		zone->uz_flags |= UMA_ZFLAG_HASH;
949 
950 	zone->uz_rsize = zone->uz_size;
951 }
952 
953 /*
954  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
955  * the zone onto the global zone list.
956  *
957  * Arguments/Returns follow uma_ctor specifications
958  *	udata  Actually uma_zcreat_args
959  *
960  */
961 
962 static void
963 zone_ctor(void *mem, int size, void *udata)
964 {
965 	struct uma_zctor_args *arg = udata;
966 	uma_zone_t zone = mem;
967 	int privlc;
968 	int cplen;
969 	int cpu;
970 
971 	bzero(zone, size);
972 	zone->uz_name = arg->name;
973 	zone->uz_size = arg->size;
974 	zone->uz_ctor = arg->ctor;
975 	zone->uz_dtor = arg->dtor;
976 	zone->uz_init = arg->uminit;
977 	zone->uz_fini = arg->fini;
978 	zone->uz_align = arg->align;
979 	zone->uz_free = 0;
980 	zone->uz_pages = 0;
981 	zone->uz_flags = 0;
982 	zone->uz_allocf = page_alloc;
983 	zone->uz_freef = page_free;
984 
985 	if (arg->flags & UMA_ZONE_ZINIT)
986 		zone->uz_init = zero_init;
987 
988 	if (arg->flags & UMA_ZONE_INTERNAL)
989 		zone->uz_flags |= UMA_ZFLAG_INTERNAL;
990 
991 	if (arg->flags & UMA_ZONE_MALLOC)
992 		zone->uz_flags |= UMA_ZFLAG_MALLOC;
993 
994 	if (arg->flags & UMA_ZONE_NOFREE)
995 		zone->uz_flags |= UMA_ZFLAG_NOFREE;
996 
997 	if (arg->flags & UMA_ZONE_VM)
998 		zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
999 
1000 	if (zone->uz_size > UMA_SLAB_SIZE)
1001 		zone_large_init(zone);
1002 	else
1003 		zone_small_init(zone);
1004 
1005 	if (arg->flags & UMA_ZONE_MTXCLASS)
1006 		privlc = 1;
1007 	else
1008 		privlc = 0;
1009 
1010 	/* We do this so that the per cpu lock name is unique for each zone */
1011 	memcpy(zone->uz_lname, "PCPU ", 5);
1012 	cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1013 	memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1014 	zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1015 
1016 	/*
1017 	 * If we're putting the slab header in the actual page we need to
1018 	 * figure out where in each page it goes.  This calculates a right
1019 	 * justified offset into the memory on a ALIGN_PTR boundary.
1020 	 */
1021 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1022 		int totsize;
1023 		int waste;
1024 
1025 		/* Size of the slab struct and free list */
1026 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1027 		if (totsize & UMA_ALIGN_PTR)
1028 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1029 			    (UMA_ALIGN_PTR + 1);
1030 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1031 
1032 		waste = zone->uz_pgoff;
1033 		waste -= (zone->uz_ipers * zone->uz_rsize);
1034 
1035 		/*
1036 		 * This calculates how much space we have for cache line size
1037 		 * optimizations.  It works by offseting each slab slightly.
1038 		 * Currently it breaks on x86, and so it is disabled.
1039 		 */
1040 
1041 		if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1042 			zone->uz_cachemax = waste - UMA_CACHE_INC;
1043 			zone->uz_cacheoff = 0;
1044 		}
1045 
1046 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1047 		    + zone->uz_ipers;
1048 		/* I don't think it's possible, but I'll make sure anyway */
1049 		if (totsize > UMA_SLAB_SIZE) {
1050 			printf("zone %s ipers %d rsize %d size %d\n",
1051 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1052 			    zone->uz_size);
1053 			panic("UMA slab won't fit.\n");
1054 		}
1055 	}
1056 
1057 	if (zone->uz_flags & UMA_ZFLAG_HASH)
1058 		hash_alloc(&zone->uz_hash);
1059 
1060 #ifdef UMA_DEBUG
1061 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1062 	    zone->uz_name, zone,
1063 	    zone->uz_size, zone->uz_ipers,
1064 	    zone->uz_ppera, zone->uz_pgoff);
1065 #endif
1066 	ZONE_LOCK_INIT(zone, privlc);
1067 
1068 	mtx_lock(&uma_mtx);
1069 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1070 	mtx_unlock(&uma_mtx);
1071 
1072 	/*
1073 	 * Some internal zones don't have room allocated for the per cpu
1074 	 * caches.  If we're internal, bail out here.
1075 	 */
1076 
1077 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1078 		return;
1079 
1080 	if (zone->uz_ipers < UMA_BUCKET_SIZE)
1081 		zone->uz_count = zone->uz_ipers - 1;
1082 	else
1083 		zone->uz_count = UMA_BUCKET_SIZE - 1;
1084 
1085 	for (cpu = 0; cpu < maxcpu; cpu++)
1086 		CPU_LOCK_INIT(zone, cpu, privlc);
1087 }
1088 
1089 /*
1090  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1091  * and removes the zone from the global list.
1092  *
1093  * Arguments/Returns follow uma_dtor specifications
1094  *	udata  unused
1095  */
1096 
1097 static void
1098 zone_dtor(void *arg, int size, void *udata)
1099 {
1100 	uma_zone_t zone;
1101 	int cpu;
1102 
1103 	zone = (uma_zone_t)arg;
1104 
1105 	ZONE_LOCK(zone);
1106 	zone->uz_wssize = 0;
1107 	ZONE_UNLOCK(zone);
1108 
1109 	mtx_lock(&uma_mtx);
1110 	LIST_REMOVE(zone, uz_link);
1111 	zone_drain(zone);
1112 	mtx_unlock(&uma_mtx);
1113 
1114 	ZONE_LOCK(zone);
1115 	if (zone->uz_free != 0)
1116 		printf("Zone %s was not empty.  Lost %d pages of memory.\n",
1117 		    zone->uz_name, zone->uz_pages);
1118 
1119 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
1120 		for (cpu = 0; cpu < maxcpu; cpu++)
1121 			CPU_LOCK_FINI(zone, cpu);
1122 
1123 	ZONE_UNLOCK(zone);
1124 	if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1125 		hash_free(&zone->uz_hash);
1126 
1127 	ZONE_LOCK_FINI(zone);
1128 }
1129 /*
1130  * Traverses every zone in the system and calls a callback
1131  *
1132  * Arguments:
1133  *	zfunc  A pointer to a function which accepts a zone
1134  *		as an argument.
1135  *
1136  * Returns:
1137  *	Nothing
1138  */
1139 static void
1140 zone_foreach(void (*zfunc)(uma_zone_t))
1141 {
1142 	uma_zone_t zone;
1143 
1144 	mtx_lock(&uma_mtx);
1145 	LIST_FOREACH(zone, &uma_zones, uz_link) {
1146 		zfunc(zone);
1147 	}
1148 	mtx_unlock(&uma_mtx);
1149 }
1150 
1151 /* Public functions */
1152 /* See uma.h */
1153 void
1154 uma_startup(void *bootmem)
1155 {
1156 	struct uma_zctor_args args;
1157 	uma_slab_t slab;
1158 	int slabsize;
1159 	int i;
1160 
1161 #ifdef UMA_DEBUG
1162 	printf("Creating uma zone headers zone.\n");
1163 #endif
1164 #ifdef SMP
1165 	maxcpu = mp_maxid + 1;
1166 #else
1167 	maxcpu = 1;
1168 #endif
1169 #ifdef UMA_DEBUG
1170 	printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1171 	Debugger("stop");
1172 #endif
1173 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1174 	/* "manually" Create the initial zone */
1175 	args.name = "UMA Zones";
1176 	args.size = sizeof(struct uma_zone) +
1177 	    (sizeof(struct uma_cache) * (maxcpu - 1));
1178 	args.ctor = zone_ctor;
1179 	args.dtor = zone_dtor;
1180 	args.uminit = zero_init;
1181 	args.fini = NULL;
1182 	args.align = 32 - 1;
1183 	args.flags = UMA_ZONE_INTERNAL;
1184 	/* The initial zone has no Per cpu queues so it's smaller */
1185 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1186 
1187 #ifdef UMA_DEBUG
1188 	printf("Filling boot free list.\n");
1189 #endif
1190 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1191 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1192 		slab->us_data = (u_int8_t *)slab;
1193 		slab->us_flags = UMA_SLAB_BOOT;
1194 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1195 		uma_boot_free++;
1196 	}
1197 
1198 #ifdef UMA_DEBUG
1199 	printf("Creating slab zone.\n");
1200 #endif
1201 
1202 	/*
1203 	 * This is the max number of free list items we'll have with
1204 	 * offpage slabs.
1205 	 */
1206 
1207 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1208 	slabsize /= UMA_MAX_WASTE;
1209 	slabsize++;			/* In case there it's rounded */
1210 	slabsize += sizeof(struct uma_slab);
1211 
1212 	/* Now make a zone for slab headers */
1213 	slabzone = uma_zcreate("UMA Slabs",
1214 				slabsize,
1215 				NULL, NULL, NULL, NULL,
1216 				UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1217 
1218 	hashzone = uma_zcreate("UMA Hash",
1219 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1220 	    NULL, NULL, NULL, NULL,
1221 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1222 
1223 	bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1224 	    NULL, NULL, NULL, NULL,
1225 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1226 
1227 
1228 #ifdef UMA_DEBUG
1229 	printf("UMA startup complete.\n");
1230 #endif
1231 }
1232 
1233 /* see uma.h */
1234 void
1235 uma_startup2(void)
1236 {
1237 	booted = 1;
1238 	bucket_enable();
1239 #ifdef UMA_DEBUG
1240 	printf("UMA startup2 complete.\n");
1241 #endif
1242 }
1243 
1244 /*
1245  * Initialize our callout handle
1246  *
1247  */
1248 
1249 static void
1250 uma_startup3(void)
1251 {
1252 #ifdef UMA_DEBUG
1253 	printf("Starting callout.\n");
1254 #endif
1255 	callout_init(&uma_callout, 0);
1256 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1257 #ifdef UMA_DEBUG
1258 	printf("UMA startup3 complete.\n");
1259 #endif
1260 }
1261 
1262 /* See uma.h */
1263 uma_zone_t
1264 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1265 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1266 
1267 {
1268 	struct uma_zctor_args args;
1269 
1270 	/* This stuff is essential for the zone ctor */
1271 	args.name = name;
1272 	args.size = size;
1273 	args.ctor = ctor;
1274 	args.dtor = dtor;
1275 	args.uminit = uminit;
1276 	args.fini = fini;
1277 	args.align = align;
1278 	args.flags = flags;
1279 
1280 	return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL));
1281 }
1282 
1283 /* See uma.h */
1284 void
1285 uma_zdestroy(uma_zone_t zone)
1286 {
1287 	uma_zfree_internal(zones, zone, NULL, 0);
1288 }
1289 
1290 /* See uma.h */
1291 void *
1292 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1293 {
1294 	void *item;
1295 	uma_cache_t cache;
1296 	uma_bucket_t bucket;
1297 	int cpu;
1298 
1299 	/* This is the fast path allocation */
1300 #ifdef UMA_DEBUG_ALLOC_1
1301 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1302 #endif
1303 
1304 	if (!(flags & M_NOWAIT)) {
1305 		KASSERT(curthread->td_intr_nesting_level == 0,
1306 		   ("malloc(M_WAITOK) in interrupt context"));
1307 		WITNESS_SLEEP(1, NULL);
1308 	}
1309 
1310 zalloc_restart:
1311 	cpu = PCPU_GET(cpuid);
1312 	CPU_LOCK(zone, cpu);
1313 	cache = &zone->uz_cpu[cpu];
1314 
1315 zalloc_start:
1316 	bucket = cache->uc_allocbucket;
1317 
1318 	if (bucket) {
1319 		if (bucket->ub_ptr > -1) {
1320 			item = bucket->ub_bucket[bucket->ub_ptr];
1321 #ifdef INVARIANTS
1322 			bucket->ub_bucket[bucket->ub_ptr] = NULL;
1323 #endif
1324 			bucket->ub_ptr--;
1325 			KASSERT(item != NULL,
1326 			    ("uma_zalloc: Bucket pointer mangled."));
1327 			cache->uc_allocs++;
1328 #ifdef INVARIANTS
1329 			uma_dbg_alloc(zone, NULL, item);
1330 #endif
1331 			CPU_UNLOCK(zone, cpu);
1332 			if (zone->uz_ctor)
1333 				zone->uz_ctor(item, zone->uz_size, udata);
1334 			if (flags & M_ZERO)
1335 				bzero(item, zone->uz_size);
1336 			return (item);
1337 		} else if (cache->uc_freebucket) {
1338 			/*
1339 			 * We have run out of items in our allocbucket.
1340 			 * See if we can switch with our free bucket.
1341 			 */
1342 			if (cache->uc_freebucket->ub_ptr > -1) {
1343 				uma_bucket_t swap;
1344 
1345 #ifdef UMA_DEBUG_ALLOC
1346 				printf("uma_zalloc: Swapping empty with alloc.\n");
1347 #endif
1348 				swap = cache->uc_freebucket;
1349 				cache->uc_freebucket = cache->uc_allocbucket;
1350 				cache->uc_allocbucket = swap;
1351 
1352 				goto zalloc_start;
1353 			}
1354 		}
1355 	}
1356 	ZONE_LOCK(zone);
1357 	/* Since we have locked the zone we may as well send back our stats */
1358 	zone->uz_allocs += cache->uc_allocs;
1359 	cache->uc_allocs = 0;
1360 
1361 	/* Our old one is now a free bucket */
1362 	if (cache->uc_allocbucket) {
1363 		KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1364 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1365 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1366 		    cache->uc_allocbucket, ub_link);
1367 		cache->uc_allocbucket = NULL;
1368 	}
1369 
1370 	/* Check the free list for a new alloc bucket */
1371 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1372 		KASSERT(bucket->ub_ptr != -1,
1373 		    ("uma_zalloc_arg: Returning an empty bucket."));
1374 
1375 		LIST_REMOVE(bucket, ub_link);
1376 		cache->uc_allocbucket = bucket;
1377 		ZONE_UNLOCK(zone);
1378 		goto zalloc_start;
1379 	}
1380 	/* Bump up our uz_count so we get here less */
1381 	if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1382 		zone->uz_count++;
1383 
1384 	/* We are no longer associated with this cpu!!! */
1385 	CPU_UNLOCK(zone, cpu);
1386 
1387 	/*
1388 	 * Now lets just fill a bucket and put it on the free list.  If that
1389 	 * works we'll restart the allocation from the begining.
1390 	 *
1391 	 * Try this zone's free list first so we don't allocate extra buckets.
1392 	 */
1393 
1394 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL)
1395 		LIST_REMOVE(bucket, ub_link);
1396 
1397 	/* Now we no longer need the zone lock. */
1398 	ZONE_UNLOCK(zone);
1399 
1400 	if (bucket == NULL) {
1401 		int bflags;
1402 
1403 		bflags = flags;
1404 		if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1405 			bflags |= M_NOVM;
1406 
1407 		bucket = uma_zalloc_internal(bucketzone,
1408 		    NULL, bflags, NULL);
1409 	}
1410 
1411 	if (bucket != NULL) {
1412 #ifdef INVARIANTS
1413 		bzero(bucket, bucketzone->uz_size);
1414 #endif
1415 		bucket->ub_ptr = -1;
1416 
1417 		if (uma_zalloc_internal(zone, udata, flags, bucket))
1418 			goto zalloc_restart;
1419 		else
1420 			uma_zfree_internal(bucketzone, bucket, NULL, 0);
1421 	}
1422 	/*
1423 	 * We may not get a bucket if we recurse, so
1424 	 * return an actual item.
1425 	 */
1426 #ifdef UMA_DEBUG
1427 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1428 #endif
1429 
1430 	return (uma_zalloc_internal(zone, udata, flags, NULL));
1431 }
1432 
1433 /*
1434  * Allocates an item for an internal zone OR fills a bucket
1435  *
1436  * Arguments
1437  *	zone   The zone to alloc for.
1438  *	udata  The data to be passed to the constructor.
1439  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1440  *	bucket The bucket to fill or NULL
1441  *
1442  * Returns
1443  *	NULL if there is no memory and M_NOWAIT is set
1444  *	An item if called on an interal zone
1445  *	Non NULL if called to fill a bucket and it was successful.
1446  *
1447  * Discussion:
1448  *	This was much cleaner before it had to do per cpu caches.  It is
1449  *	complicated now because it has to handle the simple internal case, and
1450  *	the more involved bucket filling and allocation.
1451  */
1452 
1453 static void *
1454 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags, uma_bucket_t bucket)
1455 {
1456 	uma_slab_t slab;
1457 	u_int8_t freei;
1458 	void *item;
1459 
1460 	item = NULL;
1461 
1462 	/*
1463 	 * This is to stop us from allocating per cpu buckets while we're
1464 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
1465 	 * boot pages.
1466 	 */
1467 
1468 	if (bucketdisable && zone == bucketzone)
1469 		return (NULL);
1470 
1471 #ifdef UMA_DEBUG_ALLOC
1472 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1473 #endif
1474 	ZONE_LOCK(zone);
1475 
1476 	/*
1477 	 * This code is here to limit the number of simultaneous bucket fills
1478 	 * for any given zone to the number of per cpu caches in this zone. This
1479 	 * is done so that we don't allocate more memory than we really need.
1480 	 */
1481 
1482 	if (bucket) {
1483 #ifdef SMP
1484 		if (zone->uz_fills >= mp_ncpus) {
1485 #else
1486 		if (zone->uz_fills > 1) {
1487 #endif
1488 			ZONE_UNLOCK(zone);
1489 			return (NULL);
1490 		}
1491 
1492 		zone->uz_fills++;
1493 	}
1494 
1495 new_slab:
1496 
1497 	/* Find a slab with some space */
1498 	if (zone->uz_free) {
1499 		if (!LIST_EMPTY(&zone->uz_part_slab)) {
1500 			slab = LIST_FIRST(&zone->uz_part_slab);
1501 		} else {
1502 			slab = LIST_FIRST(&zone->uz_free_slab);
1503 			LIST_REMOVE(slab, us_link);
1504 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1505 		}
1506 	} else {
1507 		/*
1508 		 * This is to prevent us from recursively trying to allocate
1509 		 * buckets.  The problem is that if an allocation forces us to
1510 		 * grab a new bucket we will call page_alloc, which will go off
1511 		 * and cause the vm to allocate vm_map_entries.  If we need new
1512 		 * buckets there too we will recurse in kmem_alloc and bad
1513 		 * things happen.  So instead we return a NULL bucket, and make
1514 		 * the code that allocates buckets smart enough to deal with it
1515 		 */
1516 		if (zone == bucketzone && zone->uz_recurse != 0) {
1517 			ZONE_UNLOCK(zone);
1518 			return (NULL);
1519 		}
1520 		while (zone->uz_maxpages &&
1521 		    zone->uz_pages >= zone->uz_maxpages) {
1522 			zone->uz_flags |= UMA_ZFLAG_FULL;
1523 
1524 			if (flags & M_WAITOK)
1525 				msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1526 			else
1527 				goto alloc_fail;
1528 
1529 			goto new_slab;
1530 		}
1531 
1532 		if (flags & M_NOVM)
1533 			goto alloc_fail;
1534 
1535 		zone->uz_recurse++;
1536 		slab = slab_zalloc(zone, flags);
1537 		zone->uz_recurse--;
1538 		/*
1539 		 * We might not have been able to get a slab but another cpu
1540 		 * could have while we were unlocked.  If we did get a slab put
1541 		 * it on the partially used slab list.  If not check the free
1542 		 * count and restart or fail accordingly.
1543 		 */
1544 		if (slab)
1545 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1546 		else if (zone->uz_free == 0)
1547 			goto alloc_fail;
1548 		else
1549 			goto new_slab;
1550 	}
1551 	/*
1552 	 * If this is our first time though put this guy on the list.
1553 	 */
1554 	if (bucket != NULL && bucket->ub_ptr == -1)
1555 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1556 		    bucket, ub_link);
1557 
1558 
1559 	while (slab->us_freecount) {
1560 		freei = slab->us_firstfree;
1561 		slab->us_firstfree = slab->us_freelist[freei];
1562 
1563 		item = slab->us_data + (zone->uz_rsize * freei);
1564 
1565 		slab->us_freecount--;
1566 		zone->uz_free--;
1567 #ifdef INVARIANTS
1568 		uma_dbg_alloc(zone, slab, item);
1569 #endif
1570 		if (bucket == NULL) {
1571 			zone->uz_allocs++;
1572 			break;
1573 		}
1574 		bucket->ub_bucket[++bucket->ub_ptr] = item;
1575 
1576 		/* Don't overfill the bucket! */
1577 		if (bucket->ub_ptr == zone->uz_count)
1578 			break;
1579 	}
1580 
1581 	/* Move this slab to the full list */
1582 	if (slab->us_freecount == 0) {
1583 		LIST_REMOVE(slab, us_link);
1584 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1585 	}
1586 
1587 	if (bucket != NULL) {
1588 		/* Try to keep the buckets totally full, but don't block */
1589 		if (bucket->ub_ptr < zone->uz_count) {
1590 			flags |= M_NOWAIT;
1591 			flags &= ~M_WAITOK;
1592 			goto new_slab;
1593 		} else
1594 			zone->uz_fills--;
1595 	}
1596 
1597 	ZONE_UNLOCK(zone);
1598 
1599 	/* Only construct at this time if we're not filling a bucket */
1600 	if (bucket == NULL) {
1601 		if (zone->uz_ctor != NULL)
1602 			zone->uz_ctor(item, zone->uz_size, udata);
1603 		if (flags & M_ZERO)
1604 			bzero(item, zone->uz_size);
1605 	}
1606 
1607 	return (item);
1608 
1609 alloc_fail:
1610 	if (bucket != NULL)
1611 		zone->uz_fills--;
1612 	ZONE_UNLOCK(zone);
1613 
1614 	if (bucket != NULL && bucket->ub_ptr != -1)
1615 		return (bucket);
1616 
1617 	return (NULL);
1618 }
1619 
1620 /* See uma.h */
1621 void
1622 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1623 {
1624 	uma_cache_t cache;
1625 	uma_bucket_t bucket;
1626 	int bflags;
1627 	int cpu;
1628 
1629 	/* This is the fast path free */
1630 #ifdef UMA_DEBUG_ALLOC_1
1631 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1632 #endif
1633 	/*
1634 	 * The race here is acceptable.  If we miss it we'll just have to wait
1635 	 * a little longer for the limits to be reset.
1636 	 */
1637 
1638 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1639 		goto zfree_internal;
1640 
1641 zfree_restart:
1642 	cpu = PCPU_GET(cpuid);
1643 	CPU_LOCK(zone, cpu);
1644 	cache = &zone->uz_cpu[cpu];
1645 
1646 zfree_start:
1647 	bucket = cache->uc_freebucket;
1648 
1649 	if (bucket) {
1650 		/*
1651 		 * Do we have room in our bucket? It is OK for this uz count
1652 		 * check to be slightly out of sync.
1653 		 */
1654 
1655 		if (bucket->ub_ptr < zone->uz_count) {
1656 			bucket->ub_ptr++;
1657 			KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1658 			    ("uma_zfree: Freeing to non free bucket index."));
1659 			bucket->ub_bucket[bucket->ub_ptr] = item;
1660 			if (zone->uz_dtor)
1661 				zone->uz_dtor(item, zone->uz_size, udata);
1662 #ifdef INVARIANTS
1663 			if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1664 				uma_dbg_free(zone, udata, item);
1665 			else
1666 				uma_dbg_free(zone, NULL, item);
1667 #endif
1668 			CPU_UNLOCK(zone, cpu);
1669 			return;
1670 		} else if (cache->uc_allocbucket) {
1671 #ifdef UMA_DEBUG_ALLOC
1672 			printf("uma_zfree: Swapping buckets.\n");
1673 #endif
1674 			/*
1675 			 * We have run out of space in our freebucket.
1676 			 * See if we can switch with our alloc bucket.
1677 			 */
1678 			if (cache->uc_allocbucket->ub_ptr <
1679 			    cache->uc_freebucket->ub_ptr) {
1680 				uma_bucket_t swap;
1681 
1682 				swap = cache->uc_freebucket;
1683 				cache->uc_freebucket = cache->uc_allocbucket;
1684 				cache->uc_allocbucket = swap;
1685 
1686 				goto zfree_start;
1687 			}
1688 		}
1689 	}
1690 
1691 	/*
1692 	 * We can get here for two reasons:
1693 	 *
1694 	 * 1) The buckets are NULL
1695 	 * 2) The alloc and free buckets are both somewhat full.
1696 	 *
1697 	 */
1698 
1699 	ZONE_LOCK(zone);
1700 
1701 	bucket = cache->uc_freebucket;
1702 	cache->uc_freebucket = NULL;
1703 
1704 	/* Can we throw this on the zone full list? */
1705 	if (bucket != NULL) {
1706 #ifdef UMA_DEBUG_ALLOC
1707 		printf("uma_zfree: Putting old bucket on the free list.\n");
1708 #endif
1709 		/* ub_ptr is pointing to the last free item */
1710 		KASSERT(bucket->ub_ptr != -1,
1711 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1712 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1713 		    bucket, ub_link);
1714 	}
1715 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1716 		LIST_REMOVE(bucket, ub_link);
1717 		ZONE_UNLOCK(zone);
1718 		cache->uc_freebucket = bucket;
1719 		goto zfree_start;
1720 	}
1721 	/* We're done with this CPU now */
1722 	CPU_UNLOCK(zone, cpu);
1723 
1724 	/* And the zone.. */
1725 	ZONE_UNLOCK(zone);
1726 
1727 #ifdef UMA_DEBUG_ALLOC
1728 	printf("uma_zfree: Allocating new free bucket.\n");
1729 #endif
1730 	bflags = M_NOWAIT;
1731 
1732 	if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1733 		bflags |= M_NOVM;
1734 #ifdef INVARIANTS
1735 	bflags |= M_ZERO;
1736 #endif
1737 	bucket = uma_zalloc_internal(bucketzone,
1738 	    NULL, bflags, NULL);
1739 	if (bucket) {
1740 		bucket->ub_ptr = -1;
1741 		ZONE_LOCK(zone);
1742 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1743 		    bucket, ub_link);
1744 		ZONE_UNLOCK(zone);
1745 		goto zfree_restart;
1746 	}
1747 
1748 	/*
1749 	 * If nothing else caught this, we'll just do an internal free.
1750 	 */
1751 
1752 zfree_internal:
1753 
1754 	uma_zfree_internal(zone, item, udata, 0);
1755 
1756 	return;
1757 
1758 }
1759 
1760 /*
1761  * Frees an item to an INTERNAL zone or allocates a free bucket
1762  *
1763  * Arguments:
1764  *	zone   The zone to free to
1765  *	item   The item we're freeing
1766  *	udata  User supplied data for the dtor
1767  *	skip   Skip the dtor, it was done in uma_zfree_arg
1768  */
1769 
1770 static void
1771 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1772 {
1773 	uma_slab_t slab;
1774 	u_int8_t *mem;
1775 	u_int8_t freei;
1776 
1777 	ZONE_LOCK(zone);
1778 
1779 	if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1780 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1781 		if (zone->uz_flags & UMA_ZFLAG_HASH)
1782 			slab = hash_sfind(&zone->uz_hash, mem);
1783 		else {
1784 			mem += zone->uz_pgoff;
1785 			slab = (uma_slab_t)mem;
1786 		}
1787 	} else {
1788 		slab = (uma_slab_t)udata;
1789 	}
1790 
1791 	/* Do we need to remove from any lists? */
1792 	if (slab->us_freecount+1 == zone->uz_ipers) {
1793 		LIST_REMOVE(slab, us_link);
1794 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1795 	} else if (slab->us_freecount == 0) {
1796 		LIST_REMOVE(slab, us_link);
1797 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1798 	}
1799 
1800 	/* Slab management stuff */
1801 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1802 		/ zone->uz_rsize;
1803 
1804 #ifdef INVARIANTS
1805 	if (!skip)
1806 		uma_dbg_free(zone, slab, item);
1807 #endif
1808 
1809 	slab->us_freelist[freei] = slab->us_firstfree;
1810 	slab->us_firstfree = freei;
1811 	slab->us_freecount++;
1812 
1813 	/* Zone statistics */
1814 	zone->uz_free++;
1815 
1816 	if (!skip && zone->uz_dtor)
1817 		zone->uz_dtor(item, zone->uz_size, udata);
1818 
1819 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1820 		if (zone->uz_pages < zone->uz_maxpages)
1821 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1822 
1823 		/* We can handle one more allocation */
1824 		wakeup_one(&zone);
1825 	}
1826 
1827 	ZONE_UNLOCK(zone);
1828 }
1829 
1830 /* See uma.h */
1831 void
1832 uma_zone_set_max(uma_zone_t zone, int nitems)
1833 {
1834 	ZONE_LOCK(zone);
1835 	if (zone->uz_ppera > 1)
1836 		zone->uz_maxpages = nitems * zone->uz_ppera;
1837 	else
1838 		zone->uz_maxpages = nitems / zone->uz_ipers;
1839 
1840 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1841 		zone->uz_maxpages++;
1842 
1843 	ZONE_UNLOCK(zone);
1844 }
1845 
1846 /* See uma.h */
1847 void
1848 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1849 {
1850 	ZONE_LOCK(zone);
1851 
1852 	zone->uz_freef = freef;
1853 
1854 	ZONE_UNLOCK(zone);
1855 }
1856 
1857 /* See uma.h */
1858 void
1859 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1860 {
1861 	ZONE_LOCK(zone);
1862 
1863 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1864 	zone->uz_allocf = allocf;
1865 
1866 	ZONE_UNLOCK(zone);
1867 }
1868 
1869 /* See uma.h */
1870 int
1871 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1872 {
1873 	int pages;
1874 	vm_offset_t kva;
1875 
1876 	mtx_lock(&Giant);
1877 
1878 	pages = count / zone->uz_ipers;
1879 
1880 	if (pages * zone->uz_ipers < count)
1881 		pages++;
1882 
1883 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1884 
1885 	if (kva == 0) {
1886 		mtx_unlock(&Giant);
1887 		return (0);
1888 	}
1889 
1890 
1891 	if (obj == NULL)
1892 		obj = vm_object_allocate(OBJT_DEFAULT,
1893 		    pages);
1894 	else
1895 		_vm_object_allocate(OBJT_DEFAULT,
1896 		    pages, obj);
1897 
1898 	ZONE_LOCK(zone);
1899 	zone->uz_kva = kva;
1900 	zone->uz_obj = obj;
1901 	zone->uz_maxpages = pages;
1902 
1903 	zone->uz_allocf = obj_alloc;
1904 	zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1905 
1906 	ZONE_UNLOCK(zone);
1907 	mtx_unlock(&Giant);
1908 
1909 	return (1);
1910 }
1911 
1912 /* See uma.h */
1913 void
1914 uma_prealloc(uma_zone_t zone, int items)
1915 {
1916 	int slabs;
1917 	uma_slab_t slab;
1918 
1919 	ZONE_LOCK(zone);
1920 	slabs = items / zone->uz_ipers;
1921 	if (slabs * zone->uz_ipers < items)
1922 		slabs++;
1923 
1924 	while (slabs > 0) {
1925 		slab = slab_zalloc(zone, M_WAITOK);
1926 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1927 		slabs--;
1928 	}
1929 	ZONE_UNLOCK(zone);
1930 }
1931 
1932 /* See uma.h */
1933 void
1934 uma_reclaim(void)
1935 {
1936 	/*
1937 	 * You might think that the delay below would improve performance since
1938 	 * the allocator will give away memory that it may ask for immediately.
1939 	 * Really, it makes things worse, since cpu cycles are so much cheaper
1940 	 * than disk activity.
1941 	 */
1942 #if 0
1943 	static struct timeval tv = {0};
1944 	struct timeval now;
1945 	getmicrouptime(&now);
1946 	if (now.tv_sec > tv.tv_sec + 30)
1947 		tv = now;
1948 	else
1949 		return;
1950 #endif
1951 #ifdef UMA_DEBUG
1952 	printf("UMA: vm asked us to release pages!\n");
1953 #endif
1954 	bucket_enable();
1955 	zone_foreach(zone_drain);
1956 
1957 	/*
1958 	 * Some slabs may have been freed but this zone will be visited early
1959 	 * we visit again so that we can free pages that are empty once other
1960 	 * zones are drained.  We have to do the same for buckets.
1961 	 */
1962 	zone_drain(slabzone);
1963 	zone_drain(bucketzone);
1964 }
1965 
1966 void *
1967 uma_large_malloc(int size, int wait)
1968 {
1969 	void *mem;
1970 	uma_slab_t slab;
1971 	u_int8_t flags;
1972 
1973 	slab = uma_zalloc_internal(slabzone, NULL, wait, NULL);
1974 	if (slab == NULL)
1975 		return (NULL);
1976 
1977 	mem = page_alloc(NULL, size, &flags, wait);
1978 	if (mem) {
1979 		vsetslab((vm_offset_t)mem, slab);
1980 		slab->us_data = mem;
1981 		slab->us_flags = flags | UMA_SLAB_MALLOC;
1982 		slab->us_size = size;
1983 	} else {
1984 		uma_zfree_internal(slabzone, slab, NULL, 0);
1985 	}
1986 
1987 
1988 	return (mem);
1989 }
1990 
1991 void
1992 uma_large_free(uma_slab_t slab)
1993 {
1994 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
1995 	page_free(slab->us_data, slab->us_size, slab->us_flags);
1996 	uma_zfree_internal(slabzone, slab, NULL, 0);
1997 }
1998 
1999 void
2000 uma_print_stats(void)
2001 {
2002 	zone_foreach(uma_print_zone);
2003 }
2004 
2005 void
2006 uma_print_zone(uma_zone_t zone)
2007 {
2008 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2009 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2010 	    zone->uz_ipers, zone->uz_ppera,
2011 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2012 }
2013 
2014 /*
2015  * Sysctl handler for vm.zone
2016  *
2017  * stolen from vm_zone.c
2018  */
2019 static int
2020 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2021 {
2022 	int error, len, cnt;
2023 	const int linesize = 128;	/* conservative */
2024 	int totalfree;
2025 	char *tmpbuf, *offset;
2026 	uma_zone_t z;
2027 	char *p;
2028 
2029 	cnt = 0;
2030 	mtx_lock(&uma_mtx);
2031 	LIST_FOREACH(z, &uma_zones, uz_link)
2032 		cnt++;
2033 	mtx_unlock(&uma_mtx);
2034 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2035 			M_TEMP, M_WAITOK);
2036 	len = snprintf(tmpbuf, linesize,
2037 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2038 	if (cnt == 0)
2039 		tmpbuf[len - 1] = '\0';
2040 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2041 	if (error || cnt == 0)
2042 		goto out;
2043 	offset = tmpbuf;
2044 	mtx_lock(&uma_mtx);
2045 	LIST_FOREACH(z, &uma_zones, uz_link) {
2046 		if (cnt == 0)	/* list may have changed size */
2047 			break;
2048 		ZONE_LOCK(z);
2049 		totalfree = z->uz_free + z->uz_cachefree;
2050 		len = snprintf(offset, linesize,
2051 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2052 		    z->uz_name, z->uz_size,
2053 		    z->uz_maxpages * z->uz_ipers,
2054 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2055 		    totalfree,
2056 		    (unsigned long long)z->uz_allocs);
2057 		ZONE_UNLOCK(z);
2058 		for (p = offset + 12; p > offset && *p == ' '; --p)
2059 			/* nothing */ ;
2060 		p[1] = ':';
2061 		cnt--;
2062 		offset += len;
2063 	}
2064 	mtx_unlock(&uma_mtx);
2065 	*offset++ = '\0';
2066 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2067 out:
2068 	FREE(tmpbuf, M_TEMP);
2069 	return (error);
2070 }
2071