xref: /freebsd/sys/vm/uma_core.c (revision a3e8fd0b7f663db7eafff527d5c3ca3bcfa8a537)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  *
28  */
29 
30 /*
31  * uma_core.c  Implementation of the Universal Memory allocator
32  *
33  * This allocator is intended to replace the multitude of similar object caches
34  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
35  * effecient.  A primary design goal is to return unused memory to the rest of
36  * the system.  This will make the system as a whole more flexible due to the
37  * ability to move memory to subsystems which most need it instead of leaving
38  * pools of reserved memory unused.
39  *
40  * The basic ideas stem from similar slab/zone based allocators whose algorithms
41  * are well known.
42  *
43  */
44 
45 /*
46  * TODO:
47  *	- Improve memory usage for large allocations
48  *	- Investigate cache size adjustments
49  */
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 
59 #include "opt_param.h"
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/kernel.h>
63 #include <sys/types.h>
64 #include <sys/queue.h>
65 #include <sys/malloc.h>
66 #include <sys/lock.h>
67 #include <sys/sysctl.h>
68 #include <sys/mutex.h>
69 #include <sys/proc.h>
70 #include <sys/smp.h>
71 #include <sys/vmmeter.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_extern.h>
80 #include <vm/uma.h>
81 #include <vm/uma_int.h>
82 #include <vm/uma_dbg.h>
83 
84 /*
85  * This is the zone from which all zones are spawned.  The idea is that even
86  * the zone heads are allocated from the allocator, so we use the bss section
87  * to bootstrap us.
88  */
89 static struct uma_zone masterzone;
90 static uma_zone_t zones = &masterzone;
91 
92 /* This is the zone from which all of uma_slab_t's are allocated. */
93 static uma_zone_t slabzone;
94 
95 /*
96  * The initial hash tables come out of this zone so they can be allocated
97  * prior to malloc coming up.
98  */
99 static uma_zone_t hashzone;
100 
101 /*
102  * Zone that buckets come from.
103  */
104 static uma_zone_t bucketzone;
105 
106 /*
107  * Are we allowed to allocate buckets?
108  */
109 static int bucketdisable = 1;
110 
111 /* Linked list of all zones in the system */
112 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
113 
114 /* This mutex protects the zone list */
115 static struct mtx uma_mtx;
116 
117 /* Linked list of boot time pages */
118 static LIST_HEAD(,uma_slab) uma_boot_pages =
119     LIST_HEAD_INITIALIZER(&uma_boot_pages);
120 
121 /* Count of free boottime pages */
122 static int uma_boot_free = 0;
123 
124 /* Is the VM done starting up? */
125 static int booted = 0;
126 
127 /* This is the handle used to schedule our working set calculator */
128 static struct callout uma_callout;
129 
130 /* This is mp_maxid + 1, for use while looping over each cpu */
131 static int maxcpu;
132 
133 /*
134  * This structure is passed as the zone ctor arg so that I don't have to create
135  * a special allocation function just for zones.
136  */
137 struct uma_zctor_args {
138 	char *name;
139 	size_t size;
140 	uma_ctor ctor;
141 	uma_dtor dtor;
142 	uma_init uminit;
143 	uma_fini fini;
144 	int align;
145 	u_int16_t flags;
146 };
147 
148 /* Prototypes.. */
149 
150 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
151 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
152 static void page_free(void *, int, u_int8_t);
153 static uma_slab_t slab_zalloc(uma_zone_t, int);
154 static void cache_drain(uma_zone_t);
155 static void bucket_drain(uma_zone_t, uma_bucket_t);
156 static void zone_drain(uma_zone_t);
157 static void zone_ctor(void *, int, void *);
158 static void zone_dtor(void *, int, void *);
159 static void zero_init(void *, int);
160 static void zone_small_init(uma_zone_t zone);
161 static void zone_large_init(uma_zone_t zone);
162 static void zone_foreach(void (*zfunc)(uma_zone_t));
163 static void zone_timeout(uma_zone_t zone);
164 static int hash_alloc(struct uma_hash *);
165 static int hash_expand(struct uma_hash *, struct uma_hash *);
166 static void hash_free(struct uma_hash *hash);
167 static void uma_timeout(void *);
168 static void uma_startup3(void);
169 static void *uma_zalloc_internal(uma_zone_t, void *, int);
170 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
171 static void bucket_enable(void);
172 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
173 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
174 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
175 
176 void uma_print_zone(uma_zone_t);
177 void uma_print_stats(void);
178 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
179 
180 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
181     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
182 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
183 
184 /*
185  * This routine checks to see whether or not it's safe to enable buckets.
186  */
187 
188 static void
189 bucket_enable(void)
190 {
191 	if (cnt.v_free_count < cnt.v_free_min)
192 		bucketdisable = 1;
193 	else
194 		bucketdisable = 0;
195 }
196 
197 
198 /*
199  * Routine called by timeout which is used to fire off some time interval
200  * based calculations.  (working set, stats, etc.)
201  *
202  * Arguments:
203  *	arg   Unused
204  *
205  * Returns:
206  *	Nothing
207  */
208 static void
209 uma_timeout(void *unused)
210 {
211 	bucket_enable();
212 	zone_foreach(zone_timeout);
213 
214 	/* Reschedule this event */
215 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
216 }
217 
218 /*
219  * Routine to perform timeout driven calculations.  This does the working set
220  * as well as hash expanding, and per cpu statistics aggregation.
221  *
222  *  Arguments:
223  *	zone  The zone to operate on
224  *
225  *  Returns:
226  *	Nothing
227  */
228 static void
229 zone_timeout(uma_zone_t zone)
230 {
231 	uma_cache_t cache;
232 	u_int64_t alloc;
233 	int free;
234 	int cpu;
235 
236 	alloc = 0;
237 	free = 0;
238 
239 	/*
240 	 * Aggregate per cpu cache statistics back to the zone.
241 	 *
242 	 * I may rewrite this to set a flag in the per cpu cache instead of
243 	 * locking.  If the flag is not cleared on the next round I will have
244 	 * to lock and do it here instead so that the statistics don't get too
245 	 * far out of sync.
246 	 */
247 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
248 		for (cpu = 0; cpu < maxcpu; cpu++) {
249 			if (CPU_ABSENT(cpu))
250 				continue;
251 			CPU_LOCK(zone, cpu);
252 			cache = &zone->uz_cpu[cpu];
253 			/* Add them up, and reset */
254 			alloc += cache->uc_allocs;
255 			cache->uc_allocs = 0;
256 			if (cache->uc_allocbucket)
257 				free += cache->uc_allocbucket->ub_ptr + 1;
258 			if (cache->uc_freebucket)
259 				free += cache->uc_freebucket->ub_ptr + 1;
260 			CPU_UNLOCK(zone, cpu);
261 		}
262 	}
263 
264 	/* Now push these stats back into the zone.. */
265 	ZONE_LOCK(zone);
266 	zone->uz_allocs += alloc;
267 
268 	/*
269 	 * cachefree is an instantanious snapshot of what is in the per cpu
270 	 * caches, not an accurate counter
271 	 */
272 	zone->uz_cachefree = free;
273 
274 	/*
275 	 * Expand the zone hash table.
276 	 *
277 	 * This is done if the number of slabs is larger than the hash size.
278 	 * What I'm trying to do here is completely reduce collisions.  This
279 	 * may be a little aggressive.  Should I allow for two collisions max?
280 	 */
281 
282 	if (zone->uz_flags & UMA_ZFLAG_HASH &&
283 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
284 		struct uma_hash newhash;
285 		struct uma_hash oldhash;
286 		int ret;
287 
288 		/*
289 		 * This is so involved because allocating and freeing
290 		 * while the zone lock is held will lead to deadlock.
291 		 * I have to do everything in stages and check for
292 		 * races.
293 		 */
294 		newhash = zone->uz_hash;
295 		ZONE_UNLOCK(zone);
296 		ret = hash_alloc(&newhash);
297 		ZONE_LOCK(zone);
298 		if (ret) {
299 			if (hash_expand(&zone->uz_hash, &newhash)) {
300 				oldhash = zone->uz_hash;
301 				zone->uz_hash = newhash;
302 			} else
303 				oldhash = newhash;
304 
305 			ZONE_UNLOCK(zone);
306 			hash_free(&oldhash);
307 			ZONE_LOCK(zone);
308 		}
309 	}
310 
311 	/*
312 	 * Here we compute the working set size as the total number of items
313 	 * left outstanding since the last time interval.  This is slightly
314 	 * suboptimal. What we really want is the highest number of outstanding
315 	 * items during the last time quantum.  This should be close enough.
316 	 *
317 	 * The working set size is used to throttle the zone_drain function.
318 	 * We don't want to return memory that we may need again immediately.
319 	 */
320 	alloc = zone->uz_allocs - zone->uz_oallocs;
321 	zone->uz_oallocs = zone->uz_allocs;
322 	zone->uz_wssize = alloc;
323 
324 	ZONE_UNLOCK(zone);
325 }
326 
327 /*
328  * Allocate and zero fill the next sized hash table from the appropriate
329  * backing store.
330  *
331  * Arguments:
332  *	hash  A new hash structure with the old hash size in uh_hashsize
333  *
334  * Returns:
335  *	1 on sucess and 0 on failure.
336  */
337 static int
338 hash_alloc(struct uma_hash *hash)
339 {
340 	int oldsize;
341 	int alloc;
342 
343 	oldsize = hash->uh_hashsize;
344 
345 	/* We're just going to go to a power of two greater */
346 	if (oldsize)  {
347 		hash->uh_hashsize = oldsize * 2;
348 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
349 		/* XXX Shouldn't be abusing DEVBUF here */
350 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
351 		    M_DEVBUF, M_NOWAIT);
352 	} else {
353 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
354 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
355 		    M_WAITOK);
356 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
357 	}
358 	if (hash->uh_slab_hash) {
359 		bzero(hash->uh_slab_hash, alloc);
360 		hash->uh_hashmask = hash->uh_hashsize - 1;
361 		return (1);
362 	}
363 
364 	return (0);
365 }
366 
367 /*
368  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
369  * to reduce collisions.  This must not be done in the regular allocation path,
370  * otherwise, we can recurse on the vm while allocating pages.
371  *
372  * Arguments:
373  *	oldhash  The hash you want to expand
374  *	newhash  The hash structure for the new table
375  *
376  * Returns:
377  * 	Nothing
378  *
379  * Discussion:
380  */
381 static int
382 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
383 {
384 	uma_slab_t slab;
385 	int hval;
386 	int i;
387 
388 	if (!newhash->uh_slab_hash)
389 		return (0);
390 
391 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
392 		return (0);
393 
394 	/*
395 	 * I need to investigate hash algorithms for resizing without a
396 	 * full rehash.
397 	 */
398 
399 	for (i = 0; i < oldhash->uh_hashsize; i++)
400 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
401 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
402 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
403 			hval = UMA_HASH(newhash, slab->us_data);
404 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
405 			    slab, us_hlink);
406 		}
407 
408 	return (1);
409 }
410 
411 /*
412  * Free the hash bucket to the appropriate backing store.
413  *
414  * Arguments:
415  *	slab_hash  The hash bucket we're freeing
416  *	hashsize   The number of entries in that hash bucket
417  *
418  * Returns:
419  *	Nothing
420  */
421 static void
422 hash_free(struct uma_hash *hash)
423 {
424 	if (hash->uh_slab_hash == NULL)
425 		return;
426 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
427 		uma_zfree_internal(hashzone,
428 		    hash->uh_slab_hash, NULL, 0);
429 	else
430 		free(hash->uh_slab_hash, M_DEVBUF);
431 }
432 
433 /*
434  * Frees all outstanding items in a bucket
435  *
436  * Arguments:
437  *	zone   The zone to free to, must be unlocked.
438  *	bucket The free/alloc bucket with items, cpu queue must be locked.
439  *
440  * Returns:
441  *	Nothing
442  */
443 
444 static void
445 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
446 {
447 	uma_slab_t slab;
448 	int mzone;
449 	void *item;
450 
451 	if (bucket == NULL)
452 		return;
453 
454 	slab = NULL;
455 	mzone = 0;
456 
457 	/* We have to lookup the slab again for malloc.. */
458 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
459 		mzone = 1;
460 
461 	while (bucket->ub_ptr > -1)  {
462 		item = bucket->ub_bucket[bucket->ub_ptr];
463 #ifdef INVARIANTS
464 		bucket->ub_bucket[bucket->ub_ptr] = NULL;
465 		KASSERT(item != NULL,
466 		    ("bucket_drain: botched ptr, item is NULL"));
467 #endif
468 		bucket->ub_ptr--;
469 		/*
470 		 * This is extremely inefficient.  The slab pointer was passed
471 		 * to uma_zfree_arg, but we lost it because the buckets don't
472 		 * hold them.  This will go away when free() gets a size passed
473 		 * to it.
474 		 */
475 		if (mzone)
476 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
477 		uma_zfree_internal(zone, item, slab, 1);
478 	}
479 }
480 
481 /*
482  * Drains the per cpu caches for a zone.
483  *
484  * Arguments:
485  *	zone  The zone to drain, must be unlocked.
486  *
487  * Returns:
488  *	Nothing
489  *
490  * This function returns with the zone locked so that the per cpu queues can
491  * not be filled until zone_drain is finished.
492  *
493  */
494 static void
495 cache_drain(uma_zone_t zone)
496 {
497 	uma_bucket_t bucket;
498 	uma_cache_t cache;
499 	int cpu;
500 
501 	/*
502 	 * Flush out the per cpu queues.
503 	 *
504 	 * XXX This causes unnecessary thrashing due to immediately having
505 	 * empty per cpu queues.  I need to improve this.
506 	 */
507 
508 	/*
509 	 * We have to lock each cpu cache before locking the zone
510 	 */
511 	ZONE_UNLOCK(zone);
512 
513 	for (cpu = 0; cpu < maxcpu; cpu++) {
514 		if (CPU_ABSENT(cpu))
515 			continue;
516 		CPU_LOCK(zone, cpu);
517 		cache = &zone->uz_cpu[cpu];
518 		bucket_drain(zone, cache->uc_allocbucket);
519 		bucket_drain(zone, cache->uc_freebucket);
520 	}
521 
522 	/*
523 	 * Drain the bucket queues and free the buckets, we just keep two per
524 	 * cpu (alloc/free).
525 	 */
526 	ZONE_LOCK(zone);
527 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
528 		LIST_REMOVE(bucket, ub_link);
529 		ZONE_UNLOCK(zone);
530 		bucket_drain(zone, bucket);
531 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
532 		ZONE_LOCK(zone);
533 	}
534 
535 	/* Now we do the free queue.. */
536 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
537 		LIST_REMOVE(bucket, ub_link);
538 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
539 	}
540 
541 	/* We unlock here, but they will all block until the zone is unlocked */
542 	for (cpu = 0; cpu < maxcpu; cpu++) {
543 		if (CPU_ABSENT(cpu))
544 			continue;
545 		CPU_UNLOCK(zone, cpu);
546 	}
547 
548 	zone->uz_cachefree = 0;
549 }
550 
551 /*
552  * Frees pages from a zone back to the system.  This is done on demand from
553  * the pageout daemon.
554  *
555  * Arguments:
556  *	zone  The zone to free pages from
557  *	all   Should we drain all items?
558  *
559  * Returns:
560  *	Nothing.
561  */
562 static void
563 zone_drain(uma_zone_t zone)
564 {
565 	struct slabhead freeslabs = {};
566 	uma_slab_t slab;
567 	uma_slab_t n;
568 	u_int64_t extra;
569 	u_int8_t flags;
570 	u_int8_t *mem;
571 	int i;
572 
573 	/*
574 	 * We don't want to take pages from staticly allocated zones at this
575 	 * time
576 	 */
577 	if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
578 		return;
579 
580 	ZONE_LOCK(zone);
581 
582 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
583 		cache_drain(zone);
584 
585 	if (zone->uz_free < zone->uz_wssize)
586 		goto finished;
587 #ifdef UMA_DEBUG
588 	printf("%s working set size: %llu free items: %u\n",
589 	    zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
590 #endif
591 	extra = zone->uz_free - zone->uz_wssize;
592 	extra /= zone->uz_ipers;
593 
594 	/* extra is now the number of extra slabs that we can free */
595 
596 	if (extra == 0)
597 		goto finished;
598 
599 	slab = LIST_FIRST(&zone->uz_free_slab);
600 	while (slab && extra) {
601 		n = LIST_NEXT(slab, us_link);
602 
603 		/* We have no where to free these to */
604 		if (slab->us_flags & UMA_SLAB_BOOT) {
605 			slab = n;
606 			continue;
607 		}
608 
609 		LIST_REMOVE(slab, us_link);
610 		zone->uz_pages -= zone->uz_ppera;
611 		zone->uz_free -= zone->uz_ipers;
612 
613 		if (zone->uz_flags & UMA_ZFLAG_HASH)
614 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
615 
616 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
617 
618 		slab = n;
619 		extra--;
620 	}
621 finished:
622 	ZONE_UNLOCK(zone);
623 
624 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
625 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
626 		if (zone->uz_fini)
627 			for (i = 0; i < zone->uz_ipers; i++)
628 				zone->uz_fini(
629 				    slab->us_data + (zone->uz_rsize * i),
630 				    zone->uz_size);
631 		flags = slab->us_flags;
632 		mem = slab->us_data;
633 
634 		if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
635 			uma_zfree_internal(slabzone, slab, NULL, 0);
636 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
637 			for (i = 0; i < zone->uz_ppera; i++)
638 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
639 				    kmem_object);
640 #ifdef UMA_DEBUG
641 		printf("%s: Returning %d bytes.\n",
642 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
643 #endif
644 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
645 	}
646 
647 }
648 
649 /*
650  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
651  *
652  * Arguments:
653  *	zone  The zone to allocate slabs for
654  *	wait  Shall we wait?
655  *
656  * Returns:
657  *	The slab that was allocated or NULL if there is no memory and the
658  *	caller specified M_NOWAIT.
659  *
660  */
661 static uma_slab_t
662 slab_zalloc(uma_zone_t zone, int wait)
663 {
664 	uma_slab_t slab;	/* Starting slab */
665 	u_int8_t *mem;
666 	u_int8_t flags;
667 	int i;
668 
669 	slab = NULL;
670 
671 #ifdef UMA_DEBUG
672 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
673 #endif
674 	ZONE_UNLOCK(zone);
675 
676 	if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
677 		slab = uma_zalloc_internal(slabzone, NULL, wait);
678 		if (slab == NULL) {
679 			ZONE_LOCK(zone);
680 			return NULL;
681 		}
682 	}
683 
684 	/*
685 	 * This reproduces the old vm_zone behavior of zero filling pages the
686 	 * first time they are added to a zone.
687 	 *
688 	 * Malloced items are zeroed in uma_zalloc.
689 	 */
690 
691 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
692 		wait |= M_ZERO;
693 	else
694 		wait &= ~M_ZERO;
695 
696 	if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
697 		mtx_lock(&Giant);
698 		mem = zone->uz_allocf(zone,
699 		    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
700 		mtx_unlock(&Giant);
701 		if (mem == NULL) {
702 			ZONE_LOCK(zone);
703 			return (NULL);
704 		}
705 	} else {
706 		uma_slab_t tmps;
707 
708 		if (zone->uz_ppera > 1)
709 			panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
710 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
711 			panic("Mallocing before uma_startup2 has been called.\n");
712 		if (uma_boot_free == 0)
713 			panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
714 		tmps = LIST_FIRST(&uma_boot_pages);
715 		LIST_REMOVE(tmps, us_link);
716 		uma_boot_free--;
717 		mem = tmps->us_data;
718 	}
719 
720 	/* Point the slab into the allocated memory */
721 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
722 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
723 
724 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
725 		for (i = 0; i < zone->uz_ppera; i++)
726 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
727 
728 	slab->us_zone = zone;
729 	slab->us_data = mem;
730 
731 	/*
732 	 * This is intended to spread data out across cache lines.
733 	 *
734 	 * This code doesn't seem to work properly on x86, and on alpha
735 	 * it makes absolutely no performance difference. I'm sure it could
736 	 * use some tuning, but sun makes outrageous claims about it's
737 	 * performance.
738 	 */
739 #if 0
740 	if (zone->uz_cachemax) {
741 		slab->us_data += zone->uz_cacheoff;
742 		zone->uz_cacheoff += UMA_CACHE_INC;
743 		if (zone->uz_cacheoff > zone->uz_cachemax)
744 			zone->uz_cacheoff = 0;
745 	}
746 #endif
747 
748 	slab->us_freecount = zone->uz_ipers;
749 	slab->us_firstfree = 0;
750 	slab->us_flags = flags;
751 	for (i = 0; i < zone->uz_ipers; i++)
752 		slab->us_freelist[i] = i+1;
753 
754 	if (zone->uz_init)
755 		for (i = 0; i < zone->uz_ipers; i++)
756 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
757 			    zone->uz_size);
758 	ZONE_LOCK(zone);
759 
760 	if (zone->uz_flags & UMA_ZFLAG_HASH)
761 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
762 
763 	zone->uz_pages += zone->uz_ppera;
764 	zone->uz_free += zone->uz_ipers;
765 
766 
767 	return (slab);
768 }
769 
770 /*
771  * Allocates a number of pages from the system
772  *
773  * Arguments:
774  *	zone  Unused
775  *	bytes  The number of bytes requested
776  *	wait  Shall we wait?
777  *
778  * Returns:
779  *	A pointer to the alloced memory or possibly
780  *	NULL if M_NOWAIT is set.
781  */
782 static void *
783 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
784 {
785 	void *p;	/* Returned page */
786 
787 	*pflag = UMA_SLAB_KMEM;
788 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
789 
790 	return (p);
791 }
792 
793 /*
794  * Allocates a number of pages from within an object
795  *
796  * Arguments:
797  *	zone   Unused
798  *	bytes  The number of bytes requested
799  *	wait   Shall we wait?
800  *
801  * Returns:
802  *	A pointer to the alloced memory or possibly
803  *	NULL if M_NOWAIT is set.
804  *
805  * TODO: If we fail during a multi-page allocation release the pages that have
806  *	 already been allocated.
807  */
808 static void *
809 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
810 {
811 	vm_offset_t zkva;
812 	vm_offset_t retkva;
813 	vm_page_t p;
814 	int pages;
815 
816 	retkva = 0;
817 	pages = zone->uz_pages;
818 
819 	/*
820 	 * This looks a little weird since we're getting one page at a time
821 	 */
822 	while (bytes > 0) {
823 		p = vm_page_alloc(zone->uz_obj, pages,
824 		    VM_ALLOC_INTERRUPT);
825 		if (p == NULL)
826 			return (NULL);
827 
828 		zkva = zone->uz_kva + pages * PAGE_SIZE;
829 		if (retkva == 0)
830 			retkva = zkva;
831 		pmap_qenter(zkva, &p, 1);
832 		bytes -= PAGE_SIZE;
833 		pages += 1;
834 	}
835 
836 	*flags = UMA_SLAB_PRIV;
837 
838 	return ((void *)retkva);
839 }
840 
841 /*
842  * Frees a number of pages to the system
843  *
844  * Arguments:
845  *	mem   A pointer to the memory to be freed
846  *	size  The size of the memory being freed
847  *	flags The original p->us_flags field
848  *
849  * Returns:
850  *	Nothing
851  *
852  */
853 static void
854 page_free(void *mem, int size, u_int8_t flags)
855 {
856 	vm_map_t map;
857 
858 	if (flags & UMA_SLAB_KMEM)
859 		map = kmem_map;
860 	else
861 		panic("UMA: page_free used with invalid flags %d\n", flags);
862 
863 	kmem_free(map, (vm_offset_t)mem, size);
864 }
865 
866 /*
867  * Zero fill initializer
868  *
869  * Arguments/Returns follow uma_init specifications
870  *
871  */
872 static void
873 zero_init(void *mem, int size)
874 {
875 	bzero(mem, size);
876 }
877 
878 /*
879  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
880  *
881  * Arguments
882  *	zone  The zone we should initialize
883  *
884  * Returns
885  *	Nothing
886  */
887 static void
888 zone_small_init(uma_zone_t zone)
889 {
890 	int rsize;
891 	int memused;
892 	int ipers;
893 
894 	rsize = zone->uz_size;
895 
896 	if (rsize < UMA_SMALLEST_UNIT)
897 		rsize = UMA_SMALLEST_UNIT;
898 
899 	if (rsize & zone->uz_align)
900 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
901 
902 	zone->uz_rsize = rsize;
903 
904 	rsize += 1;	/* Account for the byte of linkage */
905 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
906 	zone->uz_ppera = 1;
907 
908 	memused = zone->uz_ipers * zone->uz_rsize;
909 
910 	/* Can we do any better? */
911 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
912 		if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
913 			return;
914 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
915 		if (ipers > zone->uz_ipers) {
916 			zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
917 			if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
918 				zone->uz_flags |= UMA_ZFLAG_HASH;
919 			zone->uz_ipers = ipers;
920 		}
921 	}
922 
923 }
924 
925 /*
926  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
927  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
928  * more complicated.
929  *
930  * Arguments
931  *	zone  The zone we should initialize
932  *
933  * Returns
934  *	Nothing
935  */
936 static void
937 zone_large_init(uma_zone_t zone)
938 {
939 	int pages;
940 
941 	pages = zone->uz_size / UMA_SLAB_SIZE;
942 
943 	/* Account for remainder */
944 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
945 		pages++;
946 
947 	zone->uz_ppera = pages;
948 	zone->uz_ipers = 1;
949 
950 	zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
951 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
952 		zone->uz_flags |= UMA_ZFLAG_HASH;
953 
954 	zone->uz_rsize = zone->uz_size;
955 }
956 
957 /*
958  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
959  * the zone onto the global zone list.
960  *
961  * Arguments/Returns follow uma_ctor specifications
962  *	udata  Actually uma_zcreat_args
963  *
964  */
965 
966 static void
967 zone_ctor(void *mem, int size, void *udata)
968 {
969 	struct uma_zctor_args *arg = udata;
970 	uma_zone_t zone = mem;
971 	int privlc;
972 	int cplen;
973 	int cpu;
974 
975 	bzero(zone, size);
976 	zone->uz_name = arg->name;
977 	zone->uz_size = arg->size;
978 	zone->uz_ctor = arg->ctor;
979 	zone->uz_dtor = arg->dtor;
980 	zone->uz_init = arg->uminit;
981 	zone->uz_fini = arg->fini;
982 	zone->uz_align = arg->align;
983 	zone->uz_free = 0;
984 	zone->uz_pages = 0;
985 	zone->uz_flags = 0;
986 	zone->uz_allocf = page_alloc;
987 	zone->uz_freef = page_free;
988 
989 	if (arg->flags & UMA_ZONE_ZINIT)
990 		zone->uz_init = zero_init;
991 
992 	if (arg->flags & UMA_ZONE_INTERNAL)
993 		zone->uz_flags |= UMA_ZFLAG_INTERNAL;
994 
995 	if (arg->flags & UMA_ZONE_MALLOC)
996 		zone->uz_flags |= UMA_ZFLAG_MALLOC;
997 
998 	if (arg->flags & UMA_ZONE_NOFREE)
999 		zone->uz_flags |= UMA_ZFLAG_NOFREE;
1000 
1001 	if (arg->flags & UMA_ZONE_VM)
1002 		zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
1003 
1004 	if (zone->uz_size > UMA_SLAB_SIZE)
1005 		zone_large_init(zone);
1006 	else
1007 		zone_small_init(zone);
1008 
1009 	if (arg->flags & UMA_ZONE_MTXCLASS)
1010 		privlc = 1;
1011 	else
1012 		privlc = 0;
1013 
1014 	/* We do this so that the per cpu lock name is unique for each zone */
1015 	memcpy(zone->uz_lname, "PCPU ", 5);
1016 	cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1017 	memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1018 	zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1019 
1020 	/*
1021 	 * If we're putting the slab header in the actual page we need to
1022 	 * figure out where in each page it goes.  This calculates a right
1023 	 * justified offset into the memory on a ALIGN_PTR boundary.
1024 	 */
1025 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1026 		int totsize;
1027 		int waste;
1028 
1029 		/* Size of the slab struct and free list */
1030 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1031 		if (totsize & UMA_ALIGN_PTR)
1032 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1033 			    (UMA_ALIGN_PTR + 1);
1034 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1035 
1036 		waste = zone->uz_pgoff;
1037 		waste -= (zone->uz_ipers * zone->uz_rsize);
1038 
1039 		/*
1040 		 * This calculates how much space we have for cache line size
1041 		 * optimizations.  It works by offseting each slab slightly.
1042 		 * Currently it breaks on x86, and so it is disabled.
1043 		 */
1044 
1045 		if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1046 			zone->uz_cachemax = waste - UMA_CACHE_INC;
1047 			zone->uz_cacheoff = 0;
1048 		}
1049 
1050 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1051 		    + zone->uz_ipers;
1052 		/* I don't think it's possible, but I'll make sure anyway */
1053 		if (totsize > UMA_SLAB_SIZE) {
1054 			printf("zone %s ipers %d rsize %d size %d\n",
1055 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1056 			    zone->uz_size);
1057 			panic("UMA slab won't fit.\n");
1058 		}
1059 	}
1060 
1061 	if (zone->uz_flags & UMA_ZFLAG_HASH)
1062 		hash_alloc(&zone->uz_hash);
1063 
1064 #ifdef UMA_DEBUG
1065 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1066 	    zone->uz_name, zone,
1067 	    zone->uz_size, zone->uz_ipers,
1068 	    zone->uz_ppera, zone->uz_pgoff);
1069 #endif
1070 	ZONE_LOCK_INIT(zone, privlc);
1071 
1072 	mtx_lock(&uma_mtx);
1073 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1074 	mtx_unlock(&uma_mtx);
1075 
1076 	/*
1077 	 * Some internal zones don't have room allocated for the per cpu
1078 	 * caches.  If we're internal, bail out here.
1079 	 */
1080 
1081 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1082 		return;
1083 
1084 	if (zone->uz_ipers < UMA_BUCKET_SIZE)
1085 		zone->uz_count = zone->uz_ipers - 1;
1086 	else
1087 		zone->uz_count = UMA_BUCKET_SIZE - 1;
1088 
1089 	for (cpu = 0; cpu < maxcpu; cpu++)
1090 		CPU_LOCK_INIT(zone, cpu, privlc);
1091 }
1092 
1093 /*
1094  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1095  * and removes the zone from the global list.
1096  *
1097  * Arguments/Returns follow uma_dtor specifications
1098  *	udata  unused
1099  */
1100 
1101 static void
1102 zone_dtor(void *arg, int size, void *udata)
1103 {
1104 	uma_zone_t zone;
1105 	int cpu;
1106 
1107 	zone = (uma_zone_t)arg;
1108 
1109 	ZONE_LOCK(zone);
1110 	zone->uz_wssize = 0;
1111 	ZONE_UNLOCK(zone);
1112 
1113 	mtx_lock(&uma_mtx);
1114 	LIST_REMOVE(zone, uz_link);
1115 	zone_drain(zone);
1116 	mtx_unlock(&uma_mtx);
1117 
1118 	ZONE_LOCK(zone);
1119 	if (zone->uz_free != 0)
1120 		printf("Zone %s was not empty.  Lost %d pages of memory.\n",
1121 		    zone->uz_name, zone->uz_pages);
1122 
1123 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
1124 		for (cpu = 0; cpu < maxcpu; cpu++)
1125 			CPU_LOCK_FINI(zone, cpu);
1126 
1127 	ZONE_UNLOCK(zone);
1128 	if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1129 		hash_free(&zone->uz_hash);
1130 
1131 	ZONE_LOCK_FINI(zone);
1132 }
1133 /*
1134  * Traverses every zone in the system and calls a callback
1135  *
1136  * Arguments:
1137  *	zfunc  A pointer to a function which accepts a zone
1138  *		as an argument.
1139  *
1140  * Returns:
1141  *	Nothing
1142  */
1143 static void
1144 zone_foreach(void (*zfunc)(uma_zone_t))
1145 {
1146 	uma_zone_t zone;
1147 
1148 	mtx_lock(&uma_mtx);
1149 	LIST_FOREACH(zone, &uma_zones, uz_link) {
1150 		zfunc(zone);
1151 	}
1152 	mtx_unlock(&uma_mtx);
1153 }
1154 
1155 /* Public functions */
1156 /* See uma.h */
1157 void
1158 uma_startup(void *bootmem)
1159 {
1160 	struct uma_zctor_args args;
1161 	uma_slab_t slab;
1162 	int slabsize;
1163 	int i;
1164 
1165 #ifdef UMA_DEBUG
1166 	printf("Creating uma zone headers zone.\n");
1167 #endif
1168 #ifdef SMP
1169 	maxcpu = mp_maxid + 1;
1170 #else
1171 	maxcpu = 1;
1172 #endif
1173 #ifdef UMA_DEBUG
1174 	printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1175 	Debugger("stop");
1176 #endif
1177 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1178 	/* "manually" Create the initial zone */
1179 	args.name = "UMA Zones";
1180 	args.size = sizeof(struct uma_zone) +
1181 	    (sizeof(struct uma_cache) * (maxcpu - 1));
1182 	args.ctor = zone_ctor;
1183 	args.dtor = zone_dtor;
1184 	args.uminit = zero_init;
1185 	args.fini = NULL;
1186 	args.align = 32 - 1;
1187 	args.flags = UMA_ZONE_INTERNAL;
1188 	/* The initial zone has no Per cpu queues so it's smaller */
1189 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1190 
1191 #ifdef UMA_DEBUG
1192 	printf("Filling boot free list.\n");
1193 #endif
1194 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1195 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1196 		slab->us_data = (u_int8_t *)slab;
1197 		slab->us_flags = UMA_SLAB_BOOT;
1198 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1199 		uma_boot_free++;
1200 	}
1201 
1202 #ifdef UMA_DEBUG
1203 	printf("Creating slab zone.\n");
1204 #endif
1205 
1206 	/*
1207 	 * This is the max number of free list items we'll have with
1208 	 * offpage slabs.
1209 	 */
1210 
1211 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1212 	slabsize /= UMA_MAX_WASTE;
1213 	slabsize++;			/* In case there it's rounded */
1214 	slabsize += sizeof(struct uma_slab);
1215 
1216 	/* Now make a zone for slab headers */
1217 	slabzone = uma_zcreate("UMA Slabs",
1218 				slabsize,
1219 				NULL, NULL, NULL, NULL,
1220 				UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1221 
1222 	hashzone = uma_zcreate("UMA Hash",
1223 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1224 	    NULL, NULL, NULL, NULL,
1225 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1226 
1227 	bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1228 	    NULL, NULL, NULL, NULL,
1229 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1230 
1231 
1232 #ifdef UMA_DEBUG
1233 	printf("UMA startup complete.\n");
1234 #endif
1235 }
1236 
1237 /* see uma.h */
1238 void
1239 uma_startup2(void)
1240 {
1241 	booted = 1;
1242 	bucket_enable();
1243 #ifdef UMA_DEBUG
1244 	printf("UMA startup2 complete.\n");
1245 #endif
1246 }
1247 
1248 /*
1249  * Initialize our callout handle
1250  *
1251  */
1252 
1253 static void
1254 uma_startup3(void)
1255 {
1256 #ifdef UMA_DEBUG
1257 	printf("Starting callout.\n");
1258 #endif
1259 	callout_init(&uma_callout, 0);
1260 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1261 #ifdef UMA_DEBUG
1262 	printf("UMA startup3 complete.\n");
1263 #endif
1264 }
1265 
1266 /* See uma.h */
1267 uma_zone_t
1268 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1269 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1270 
1271 {
1272 	struct uma_zctor_args args;
1273 
1274 	/* This stuff is essential for the zone ctor */
1275 	args.name = name;
1276 	args.size = size;
1277 	args.ctor = ctor;
1278 	args.dtor = dtor;
1279 	args.uminit = uminit;
1280 	args.fini = fini;
1281 	args.align = align;
1282 	args.flags = flags;
1283 
1284 	return (uma_zalloc_internal(zones, &args, M_WAITOK));
1285 }
1286 
1287 /* See uma.h */
1288 void
1289 uma_zdestroy(uma_zone_t zone)
1290 {
1291 	uma_zfree_internal(zones, zone, NULL, 0);
1292 }
1293 
1294 /* See uma.h */
1295 void *
1296 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1297 {
1298 	void *item;
1299 	uma_cache_t cache;
1300 	uma_bucket_t bucket;
1301 	int cpu;
1302 
1303 	/* This is the fast path allocation */
1304 #ifdef UMA_DEBUG_ALLOC_1
1305 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1306 #endif
1307 
1308 	if (!(flags & M_NOWAIT)) {
1309 		KASSERT(curthread->td_intr_nesting_level == 0,
1310 		   ("malloc(M_WAITOK) in interrupt context"));
1311 		WITNESS_SLEEP(1, NULL);
1312 	}
1313 
1314 zalloc_restart:
1315 	cpu = PCPU_GET(cpuid);
1316 	CPU_LOCK(zone, cpu);
1317 	cache = &zone->uz_cpu[cpu];
1318 
1319 zalloc_start:
1320 	bucket = cache->uc_allocbucket;
1321 
1322 	if (bucket) {
1323 		if (bucket->ub_ptr > -1) {
1324 			item = bucket->ub_bucket[bucket->ub_ptr];
1325 #ifdef INVARIANTS
1326 			bucket->ub_bucket[bucket->ub_ptr] = NULL;
1327 #endif
1328 			bucket->ub_ptr--;
1329 			KASSERT(item != NULL,
1330 			    ("uma_zalloc: Bucket pointer mangled."));
1331 			cache->uc_allocs++;
1332 #ifdef INVARIANTS
1333 			uma_dbg_alloc(zone, NULL, item);
1334 #endif
1335 			CPU_UNLOCK(zone, cpu);
1336 			if (zone->uz_ctor)
1337 				zone->uz_ctor(item, zone->uz_size, udata);
1338 			if (flags & M_ZERO)
1339 				bzero(item, zone->uz_size);
1340 			return (item);
1341 		} else if (cache->uc_freebucket) {
1342 			/*
1343 			 * We have run out of items in our allocbucket.
1344 			 * See if we can switch with our free bucket.
1345 			 */
1346 			if (cache->uc_freebucket->ub_ptr > -1) {
1347 				uma_bucket_t swap;
1348 
1349 #ifdef UMA_DEBUG_ALLOC
1350 				printf("uma_zalloc: Swapping empty with alloc.\n");
1351 #endif
1352 				swap = cache->uc_freebucket;
1353 				cache->uc_freebucket = cache->uc_allocbucket;
1354 				cache->uc_allocbucket = swap;
1355 
1356 				goto zalloc_start;
1357 			}
1358 		}
1359 	}
1360 	ZONE_LOCK(zone);
1361 	/* Since we have locked the zone we may as well send back our stats */
1362 	zone->uz_allocs += cache->uc_allocs;
1363 	cache->uc_allocs = 0;
1364 
1365 	/* Our old one is now a free bucket */
1366 	if (cache->uc_allocbucket) {
1367 		KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1368 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1369 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1370 		    cache->uc_allocbucket, ub_link);
1371 		cache->uc_allocbucket = NULL;
1372 	}
1373 
1374 	/* Check the free list for a new alloc bucket */
1375 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1376 		KASSERT(bucket->ub_ptr != -1,
1377 		    ("uma_zalloc_arg: Returning an empty bucket."));
1378 
1379 		LIST_REMOVE(bucket, ub_link);
1380 		cache->uc_allocbucket = bucket;
1381 		ZONE_UNLOCK(zone);
1382 		goto zalloc_start;
1383 	}
1384 	/* We are no longer associated with this cpu!!! */
1385 	CPU_UNLOCK(zone, cpu);
1386 
1387 	/* Bump up our uz_count so we get here less */
1388 	if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1389 		zone->uz_count++;
1390 
1391 	/*
1392 	 * Now lets just fill a bucket and put it on the free list.  If that
1393 	 * works we'll restart the allocation from the begining.
1394 	 */
1395 
1396 	if (uma_zalloc_bucket(zone, flags)) {
1397 		ZONE_UNLOCK(zone);
1398 		goto zalloc_restart;
1399 	}
1400 	ZONE_UNLOCK(zone);
1401 	/*
1402 	 * We may not be able to get a bucket so return an actual item.
1403 	 */
1404 #ifdef UMA_DEBUG
1405 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1406 #endif
1407 
1408 	return (uma_zalloc_internal(zone, udata, flags));
1409 }
1410 
1411 static uma_slab_t
1412 uma_zone_slab(uma_zone_t zone, int flags)
1413 {
1414 	uma_slab_t slab;
1415 
1416 	/*
1417 	 * This is to prevent us from recursively trying to allocate
1418 	 * buckets.  The problem is that if an allocation forces us to
1419 	 * grab a new bucket we will call page_alloc, which will go off
1420 	 * and cause the vm to allocate vm_map_entries.  If we need new
1421 	 * buckets there too we will recurse in kmem_alloc and bad
1422 	 * things happen.  So instead we return a NULL bucket, and make
1423 	 * the code that allocates buckets smart enough to deal with it
1424 	 */
1425 	if (zone == bucketzone && zone->uz_recurse != 0)
1426 		return (NULL);
1427 
1428 	slab = NULL;
1429 
1430 	for (;;) {
1431 		/*
1432 		 * Find a slab with some space.  Prefer slabs that are partially
1433 		 * used over those that are totally full.  This helps to reduce
1434 		 * fragmentation.
1435 		 */
1436 		if (zone->uz_free != 0) {
1437 			if (!LIST_EMPTY(&zone->uz_part_slab)) {
1438 				slab = LIST_FIRST(&zone->uz_part_slab);
1439 			} else {
1440 				slab = LIST_FIRST(&zone->uz_free_slab);
1441 				LIST_REMOVE(slab, us_link);
1442 				LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1443 				us_link);
1444 			}
1445 			return (slab);
1446 		}
1447 
1448 		/*
1449 		 * M_NOVM means don't ask at all!
1450 		 */
1451 		if (flags & M_NOVM)
1452 			break;
1453 
1454 		if (zone->uz_maxpages &&
1455 		    zone->uz_pages >= zone->uz_maxpages) {
1456 			zone->uz_flags |= UMA_ZFLAG_FULL;
1457 
1458 			if (flags & M_WAITOK)
1459 				msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1460 			else
1461 				break;
1462 			continue;
1463 		}
1464 		zone->uz_recurse++;
1465 		slab = slab_zalloc(zone, flags);
1466 		zone->uz_recurse--;
1467 		/*
1468 		 * If we got a slab here it's safe to mark it partially used
1469 		 * and return.  We assume that the caller is going to remove
1470 		 * at least one item.
1471 		 */
1472 		if (slab) {
1473 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1474 			return (slab);
1475 		}
1476 		/*
1477 		 * We might not have been able to get a slab but another cpu
1478 		 * could have while we were unlocked.  Check again before we
1479 		 * fail.
1480 		 */
1481 		if ((flags & M_WAITOK) == 0)
1482 			flags |= M_NOVM;
1483 	}
1484 	return (slab);
1485 }
1486 
1487 static __inline void *
1488 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1489 {
1490 	void *item;
1491 	u_int8_t freei;
1492 
1493 	freei = slab->us_firstfree;
1494 	slab->us_firstfree = slab->us_freelist[freei];
1495 	item = slab->us_data + (zone->uz_rsize * freei);
1496 
1497 	slab->us_freecount--;
1498 	zone->uz_free--;
1499 #ifdef INVARIANTS
1500 	uma_dbg_alloc(zone, slab, item);
1501 #endif
1502 	/* Move this slab to the full list */
1503 	if (slab->us_freecount == 0) {
1504 		LIST_REMOVE(slab, us_link);
1505 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1506 	}
1507 
1508 	return (item);
1509 }
1510 
1511 static int
1512 uma_zalloc_bucket(uma_zone_t zone, int flags)
1513 {
1514 	uma_bucket_t bucket;
1515 	uma_slab_t slab;
1516 
1517 	/*
1518 	 * Try this zone's free list first so we don't allocate extra buckets.
1519 	 */
1520 
1521 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1522 		KASSERT(bucket->ub_ptr == -1,
1523 		    ("uma_zalloc_bucket: Bucket on free list is not empty."));
1524 		LIST_REMOVE(bucket, ub_link);
1525 	} else {
1526 		int bflags;
1527 
1528 		bflags = flags;
1529 		if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1530 			bflags |= M_NOVM;
1531 
1532 		ZONE_UNLOCK(zone);
1533 		bucket = uma_zalloc_internal(bucketzone,
1534 		    NULL, bflags);
1535 		ZONE_LOCK(zone);
1536 		if (bucket != NULL) {
1537 #ifdef INVARIANTS
1538 			bzero(bucket, bucketzone->uz_size);
1539 #endif
1540 			bucket->ub_ptr = -1;
1541 		}
1542 	}
1543 
1544 	if (bucket == NULL)
1545 		return (0);
1546 
1547 #ifdef SMP
1548 	/*
1549 	 * This code is here to limit the number of simultaneous bucket fills
1550 	 * for any given zone to the number of per cpu caches in this zone. This
1551 	 * is done so that we don't allocate more memory than we really need.
1552 	 */
1553 	if (zone->uz_fills >= mp_ncpus)
1554 		goto done;
1555 
1556 #endif
1557 	zone->uz_fills++;
1558 
1559 	/* Try to keep the buckets totally full */
1560 	while ((slab = uma_zone_slab(zone, flags)) != NULL &&
1561 	    bucket->ub_ptr < zone->uz_count) {
1562 		while (slab->us_freecount &&
1563 		    bucket->ub_ptr < zone->uz_count) {
1564 			bucket->ub_bucket[++bucket->ub_ptr] =
1565 			    uma_slab_alloc(zone, slab);
1566 		}
1567 		/* Don't block on the next fill */
1568 		flags |= M_NOWAIT;
1569 		flags &= ~M_WAITOK;
1570 	}
1571 
1572 	zone->uz_fills--;
1573 
1574 	if (bucket->ub_ptr != -1) {
1575 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1576 		    bucket, ub_link);
1577 		return (1);
1578 	}
1579 #ifdef SMP
1580 done:
1581 #endif
1582 	uma_zfree_internal(bucketzone, bucket, NULL, 0);
1583 
1584 	return (0);
1585 }
1586 /*
1587  * Allocates an item for an internal zone
1588  *
1589  * Arguments
1590  *	zone   The zone to alloc for.
1591  *	udata  The data to be passed to the constructor.
1592  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1593  *
1594  * Returns
1595  *	NULL if there is no memory and M_NOWAIT is set
1596  *	An item if successful
1597  */
1598 
1599 static void *
1600 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1601 {
1602 	uma_slab_t slab;
1603 	void *item;
1604 
1605 	item = NULL;
1606 
1607 	/*
1608 	 * This is to stop us from allocating per cpu buckets while we're
1609 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
1610 	 * boot pages.
1611 	 */
1612 
1613 	if (bucketdisable && zone == bucketzone)
1614 		return (NULL);
1615 
1616 #ifdef UMA_DEBUG_ALLOC
1617 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1618 #endif
1619 	ZONE_LOCK(zone);
1620 
1621 	slab = uma_zone_slab(zone, flags);
1622 	if (slab == NULL) {
1623 		ZONE_UNLOCK(zone);
1624 		return (NULL);
1625 	}
1626 
1627 	item = uma_slab_alloc(zone, slab);
1628 
1629 	ZONE_UNLOCK(zone);
1630 
1631 	if (zone->uz_ctor != NULL)
1632 		zone->uz_ctor(item, zone->uz_size, udata);
1633 	if (flags & M_ZERO)
1634 		bzero(item, zone->uz_size);
1635 
1636 	return (item);
1637 }
1638 
1639 /* See uma.h */
1640 void
1641 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1642 {
1643 	uma_cache_t cache;
1644 	uma_bucket_t bucket;
1645 	int bflags;
1646 	int cpu;
1647 
1648 	/* This is the fast path free */
1649 #ifdef UMA_DEBUG_ALLOC_1
1650 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1651 #endif
1652 	/*
1653 	 * The race here is acceptable.  If we miss it we'll just have to wait
1654 	 * a little longer for the limits to be reset.
1655 	 */
1656 
1657 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1658 		goto zfree_internal;
1659 
1660 	if (zone->uz_dtor)
1661 		zone->uz_dtor(item, zone->uz_size, udata);
1662 
1663 zfree_restart:
1664 	cpu = PCPU_GET(cpuid);
1665 	CPU_LOCK(zone, cpu);
1666 	cache = &zone->uz_cpu[cpu];
1667 
1668 zfree_start:
1669 	bucket = cache->uc_freebucket;
1670 
1671 	if (bucket) {
1672 		/*
1673 		 * Do we have room in our bucket? It is OK for this uz count
1674 		 * check to be slightly out of sync.
1675 		 */
1676 
1677 		if (bucket->ub_ptr < zone->uz_count) {
1678 			bucket->ub_ptr++;
1679 			KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1680 			    ("uma_zfree: Freeing to non free bucket index."));
1681 			bucket->ub_bucket[bucket->ub_ptr] = item;
1682 #ifdef INVARIANTS
1683 			if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1684 				uma_dbg_free(zone, udata, item);
1685 			else
1686 				uma_dbg_free(zone, NULL, item);
1687 #endif
1688 			CPU_UNLOCK(zone, cpu);
1689 			return;
1690 		} else if (cache->uc_allocbucket) {
1691 #ifdef UMA_DEBUG_ALLOC
1692 			printf("uma_zfree: Swapping buckets.\n");
1693 #endif
1694 			/*
1695 			 * We have run out of space in our freebucket.
1696 			 * See if we can switch with our alloc bucket.
1697 			 */
1698 			if (cache->uc_allocbucket->ub_ptr <
1699 			    cache->uc_freebucket->ub_ptr) {
1700 				uma_bucket_t swap;
1701 
1702 				swap = cache->uc_freebucket;
1703 				cache->uc_freebucket = cache->uc_allocbucket;
1704 				cache->uc_allocbucket = swap;
1705 
1706 				goto zfree_start;
1707 			}
1708 		}
1709 	}
1710 
1711 	/*
1712 	 * We can get here for two reasons:
1713 	 *
1714 	 * 1) The buckets are NULL
1715 	 * 2) The alloc and free buckets are both somewhat full.
1716 	 *
1717 	 */
1718 
1719 	ZONE_LOCK(zone);
1720 
1721 	bucket = cache->uc_freebucket;
1722 	cache->uc_freebucket = NULL;
1723 
1724 	/* Can we throw this on the zone full list? */
1725 	if (bucket != NULL) {
1726 #ifdef UMA_DEBUG_ALLOC
1727 		printf("uma_zfree: Putting old bucket on the free list.\n");
1728 #endif
1729 		/* ub_ptr is pointing to the last free item */
1730 		KASSERT(bucket->ub_ptr != -1,
1731 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1732 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1733 		    bucket, ub_link);
1734 	}
1735 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1736 		LIST_REMOVE(bucket, ub_link);
1737 		ZONE_UNLOCK(zone);
1738 		cache->uc_freebucket = bucket;
1739 		goto zfree_start;
1740 	}
1741 	/* We're done with this CPU now */
1742 	CPU_UNLOCK(zone, cpu);
1743 
1744 	/* And the zone.. */
1745 	ZONE_UNLOCK(zone);
1746 
1747 #ifdef UMA_DEBUG_ALLOC
1748 	printf("uma_zfree: Allocating new free bucket.\n");
1749 #endif
1750 	bflags = M_NOWAIT;
1751 
1752 	if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1753 		bflags |= M_NOVM;
1754 #ifdef INVARIANTS
1755 	bflags |= M_ZERO;
1756 #endif
1757 	bucket = uma_zalloc_internal(bucketzone,
1758 	    NULL, bflags);
1759 	if (bucket) {
1760 		bucket->ub_ptr = -1;
1761 		ZONE_LOCK(zone);
1762 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1763 		    bucket, ub_link);
1764 		ZONE_UNLOCK(zone);
1765 		goto zfree_restart;
1766 	}
1767 
1768 	/*
1769 	 * If nothing else caught this, we'll just do an internal free.
1770 	 */
1771 
1772 zfree_internal:
1773 
1774 	uma_zfree_internal(zone, item, udata, 0);
1775 
1776 	return;
1777 
1778 }
1779 
1780 /*
1781  * Frees an item to an INTERNAL zone or allocates a free bucket
1782  *
1783  * Arguments:
1784  *	zone   The zone to free to
1785  *	item   The item we're freeing
1786  *	udata  User supplied data for the dtor
1787  *	skip   Skip the dtor, it was done in uma_zfree_arg
1788  */
1789 
1790 static void
1791 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1792 {
1793 	uma_slab_t slab;
1794 	u_int8_t *mem;
1795 	u_int8_t freei;
1796 
1797 	if (!skip && zone->uz_dtor)
1798 		zone->uz_dtor(item, zone->uz_size, udata);
1799 
1800 	ZONE_LOCK(zone);
1801 
1802 	if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1803 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1804 		if (zone->uz_flags & UMA_ZFLAG_HASH)
1805 			slab = hash_sfind(&zone->uz_hash, mem);
1806 		else {
1807 			mem += zone->uz_pgoff;
1808 			slab = (uma_slab_t)mem;
1809 		}
1810 	} else {
1811 		slab = (uma_slab_t)udata;
1812 	}
1813 
1814 	/* Do we need to remove from any lists? */
1815 	if (slab->us_freecount+1 == zone->uz_ipers) {
1816 		LIST_REMOVE(slab, us_link);
1817 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1818 	} else if (slab->us_freecount == 0) {
1819 		LIST_REMOVE(slab, us_link);
1820 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1821 	}
1822 
1823 	/* Slab management stuff */
1824 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1825 		/ zone->uz_rsize;
1826 
1827 #ifdef INVARIANTS
1828 	if (!skip)
1829 		uma_dbg_free(zone, slab, item);
1830 #endif
1831 
1832 	slab->us_freelist[freei] = slab->us_firstfree;
1833 	slab->us_firstfree = freei;
1834 	slab->us_freecount++;
1835 
1836 	/* Zone statistics */
1837 	zone->uz_free++;
1838 
1839 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1840 		if (zone->uz_pages < zone->uz_maxpages)
1841 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1842 
1843 		/* We can handle one more allocation */
1844 		wakeup_one(&zone);
1845 	}
1846 
1847 	ZONE_UNLOCK(zone);
1848 }
1849 
1850 /* See uma.h */
1851 void
1852 uma_zone_set_max(uma_zone_t zone, int nitems)
1853 {
1854 	ZONE_LOCK(zone);
1855 	if (zone->uz_ppera > 1)
1856 		zone->uz_maxpages = nitems * zone->uz_ppera;
1857 	else
1858 		zone->uz_maxpages = nitems / zone->uz_ipers;
1859 
1860 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1861 		zone->uz_maxpages++;
1862 
1863 	ZONE_UNLOCK(zone);
1864 }
1865 
1866 /* See uma.h */
1867 void
1868 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1869 {
1870 	ZONE_LOCK(zone);
1871 
1872 	zone->uz_freef = freef;
1873 
1874 	ZONE_UNLOCK(zone);
1875 }
1876 
1877 /* See uma.h */
1878 void
1879 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1880 {
1881 	ZONE_LOCK(zone);
1882 
1883 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1884 	zone->uz_allocf = allocf;
1885 
1886 	ZONE_UNLOCK(zone);
1887 }
1888 
1889 /* See uma.h */
1890 int
1891 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1892 {
1893 	int pages;
1894 	vm_offset_t kva;
1895 
1896 	mtx_lock(&Giant);
1897 
1898 	pages = count / zone->uz_ipers;
1899 
1900 	if (pages * zone->uz_ipers < count)
1901 		pages++;
1902 
1903 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1904 
1905 	if (kva == 0) {
1906 		mtx_unlock(&Giant);
1907 		return (0);
1908 	}
1909 
1910 
1911 	if (obj == NULL)
1912 		obj = vm_object_allocate(OBJT_DEFAULT,
1913 		    pages);
1914 	else
1915 		_vm_object_allocate(OBJT_DEFAULT,
1916 		    pages, obj);
1917 
1918 	ZONE_LOCK(zone);
1919 	zone->uz_kva = kva;
1920 	zone->uz_obj = obj;
1921 	zone->uz_maxpages = pages;
1922 
1923 	zone->uz_allocf = obj_alloc;
1924 	zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1925 
1926 	ZONE_UNLOCK(zone);
1927 	mtx_unlock(&Giant);
1928 
1929 	return (1);
1930 }
1931 
1932 /* See uma.h */
1933 void
1934 uma_prealloc(uma_zone_t zone, int items)
1935 {
1936 	int slabs;
1937 	uma_slab_t slab;
1938 
1939 	ZONE_LOCK(zone);
1940 	slabs = items / zone->uz_ipers;
1941 	if (slabs * zone->uz_ipers < items)
1942 		slabs++;
1943 
1944 	while (slabs > 0) {
1945 		slab = slab_zalloc(zone, M_WAITOK);
1946 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1947 		slabs--;
1948 	}
1949 	ZONE_UNLOCK(zone);
1950 }
1951 
1952 /* See uma.h */
1953 void
1954 uma_reclaim(void)
1955 {
1956 	/*
1957 	 * You might think that the delay below would improve performance since
1958 	 * the allocator will give away memory that it may ask for immediately.
1959 	 * Really, it makes things worse, since cpu cycles are so much cheaper
1960 	 * than disk activity.
1961 	 */
1962 #if 0
1963 	static struct timeval tv = {0};
1964 	struct timeval now;
1965 	getmicrouptime(&now);
1966 	if (now.tv_sec > tv.tv_sec + 30)
1967 		tv = now;
1968 	else
1969 		return;
1970 #endif
1971 #ifdef UMA_DEBUG
1972 	printf("UMA: vm asked us to release pages!\n");
1973 #endif
1974 	bucket_enable();
1975 	zone_foreach(zone_drain);
1976 
1977 	/*
1978 	 * Some slabs may have been freed but this zone will be visited early
1979 	 * we visit again so that we can free pages that are empty once other
1980 	 * zones are drained.  We have to do the same for buckets.
1981 	 */
1982 	zone_drain(slabzone);
1983 	zone_drain(bucketzone);
1984 }
1985 
1986 void *
1987 uma_large_malloc(int size, int wait)
1988 {
1989 	void *mem;
1990 	uma_slab_t slab;
1991 	u_int8_t flags;
1992 
1993 	slab = uma_zalloc_internal(slabzone, NULL, wait);
1994 	if (slab == NULL)
1995 		return (NULL);
1996 
1997 	mem = page_alloc(NULL, size, &flags, wait);
1998 	if (mem) {
1999 		vsetslab((vm_offset_t)mem, slab);
2000 		slab->us_data = mem;
2001 		slab->us_flags = flags | UMA_SLAB_MALLOC;
2002 		slab->us_size = size;
2003 	} else {
2004 		uma_zfree_internal(slabzone, slab, NULL, 0);
2005 	}
2006 
2007 
2008 	return (mem);
2009 }
2010 
2011 void
2012 uma_large_free(uma_slab_t slab)
2013 {
2014 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
2015 	page_free(slab->us_data, slab->us_size, slab->us_flags);
2016 	uma_zfree_internal(slabzone, slab, NULL, 0);
2017 }
2018 
2019 void
2020 uma_print_stats(void)
2021 {
2022 	zone_foreach(uma_print_zone);
2023 }
2024 
2025 void
2026 uma_print_zone(uma_zone_t zone)
2027 {
2028 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2029 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2030 	    zone->uz_ipers, zone->uz_ppera,
2031 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2032 }
2033 
2034 /*
2035  * Sysctl handler for vm.zone
2036  *
2037  * stolen from vm_zone.c
2038  */
2039 static int
2040 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2041 {
2042 	int error, len, cnt;
2043 	const int linesize = 128;	/* conservative */
2044 	int totalfree;
2045 	char *tmpbuf, *offset;
2046 	uma_zone_t z;
2047 	char *p;
2048 
2049 	cnt = 0;
2050 	mtx_lock(&uma_mtx);
2051 	LIST_FOREACH(z, &uma_zones, uz_link)
2052 		cnt++;
2053 	mtx_unlock(&uma_mtx);
2054 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2055 			M_TEMP, M_WAITOK);
2056 	len = snprintf(tmpbuf, linesize,
2057 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2058 	if (cnt == 0)
2059 		tmpbuf[len - 1] = '\0';
2060 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2061 	if (error || cnt == 0)
2062 		goto out;
2063 	offset = tmpbuf;
2064 	mtx_lock(&uma_mtx);
2065 	LIST_FOREACH(z, &uma_zones, uz_link) {
2066 		if (cnt == 0)	/* list may have changed size */
2067 			break;
2068 		ZONE_LOCK(z);
2069 		totalfree = z->uz_free + z->uz_cachefree;
2070 		len = snprintf(offset, linesize,
2071 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2072 		    z->uz_name, z->uz_size,
2073 		    z->uz_maxpages * z->uz_ipers,
2074 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2075 		    totalfree,
2076 		    (unsigned long long)z->uz_allocs);
2077 		ZONE_UNLOCK(z);
2078 		for (p = offset + 12; p > offset && *p == ' '; --p)
2079 			/* nothing */ ;
2080 		p[1] = ':';
2081 		cnt--;
2082 		offset += len;
2083 	}
2084 	mtx_unlock(&uma_mtx);
2085 	*offset++ = '\0';
2086 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2087 out:
2088 	FREE(tmpbuf, M_TEMP);
2089 	return (error);
2090 }
2091