xref: /freebsd/sys/vm/uma_core.c (revision 77b7cdf1999ee965ad494fddd184b18f532ac91a)
1 /*
2  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  *
28  */
29 
30 /*
31  * uma_core.c  Implementation of the Universal Memory allocator
32  *
33  * This allocator is intended to replace the multitude of similar object caches
34  * in the standard FreeBSD kernel.  The intent is to be flexible as well as
35  * effecient.  A primary design goal is to return unused memory to the rest of
36  * the system.  This will make the system as a whole more flexible due to the
37  * ability to move memory to subsystems which most need it instead of leaving
38  * pools of reserved memory unused.
39  *
40  * The basic ideas stem from similar slab/zone based allocators whose algorithms
41  * are well known.
42  *
43  */
44 
45 /*
46  * TODO:
47  *	- Improve memory usage for large allocations
48  *	- Investigate cache size adjustments
49  */
50 
51 /* I should really use ktr.. */
52 /*
53 #define UMA_DEBUG 1
54 #define UMA_DEBUG_ALLOC 1
55 #define UMA_DEBUG_ALLOC_1 1
56 */
57 
58 
59 #include "opt_param.h"
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/kernel.h>
63 #include <sys/types.h>
64 #include <sys/queue.h>
65 #include <sys/malloc.h>
66 #include <sys/lock.h>
67 #include <sys/sysctl.h>
68 #include <sys/mutex.h>
69 #include <sys/proc.h>
70 #include <sys/smp.h>
71 #include <sys/vmmeter.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_kern.h>
79 #include <vm/vm_extern.h>
80 #include <vm/uma.h>
81 #include <vm/uma_int.h>
82 #include <vm/uma_dbg.h>
83 
84 #include <machine/vmparam.h>
85 
86 /*
87  * This is the zone from which all zones are spawned.  The idea is that even
88  * the zone heads are allocated from the allocator, so we use the bss section
89  * to bootstrap us.
90  */
91 static struct uma_zone masterzone;
92 static uma_zone_t zones = &masterzone;
93 
94 /* This is the zone from which all of uma_slab_t's are allocated. */
95 static uma_zone_t slabzone;
96 
97 /*
98  * The initial hash tables come out of this zone so they can be allocated
99  * prior to malloc coming up.
100  */
101 static uma_zone_t hashzone;
102 
103 /*
104  * Zone that buckets come from.
105  */
106 static uma_zone_t bucketzone;
107 
108 /*
109  * Are we allowed to allocate buckets?
110  */
111 static int bucketdisable = 1;
112 
113 /* Linked list of all zones in the system */
114 static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones);
115 
116 /* This mutex protects the zone list */
117 static struct mtx uma_mtx;
118 
119 /* Linked list of boot time pages */
120 static LIST_HEAD(,uma_slab) uma_boot_pages =
121     LIST_HEAD_INITIALIZER(&uma_boot_pages);
122 
123 /* Count of free boottime pages */
124 static int uma_boot_free = 0;
125 
126 /* Is the VM done starting up? */
127 static int booted = 0;
128 
129 /* This is the handle used to schedule our working set calculator */
130 static struct callout uma_callout;
131 
132 /* This is mp_maxid + 1, for use while looping over each cpu */
133 static int maxcpu;
134 
135 /*
136  * This structure is passed as the zone ctor arg so that I don't have to create
137  * a special allocation function just for zones.
138  */
139 struct uma_zctor_args {
140 	char *name;
141 	size_t size;
142 	uma_ctor ctor;
143 	uma_dtor dtor;
144 	uma_init uminit;
145 	uma_fini fini;
146 	int align;
147 	u_int16_t flags;
148 };
149 
150 /* Prototypes.. */
151 
152 static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
153 static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
154 static void page_free(void *, int, u_int8_t);
155 static uma_slab_t slab_zalloc(uma_zone_t, int);
156 static void cache_drain(uma_zone_t);
157 static void bucket_drain(uma_zone_t, uma_bucket_t);
158 static void zone_drain(uma_zone_t);
159 static void zone_ctor(void *, int, void *);
160 static void zone_dtor(void *, int, void *);
161 static void zero_init(void *, int);
162 static void zone_small_init(uma_zone_t zone);
163 static void zone_large_init(uma_zone_t zone);
164 static void zone_foreach(void (*zfunc)(uma_zone_t));
165 static void zone_timeout(uma_zone_t zone);
166 static int hash_alloc(struct uma_hash *);
167 static int hash_expand(struct uma_hash *, struct uma_hash *);
168 static void hash_free(struct uma_hash *hash);
169 static void uma_timeout(void *);
170 static void uma_startup3(void);
171 static void *uma_zalloc_internal(uma_zone_t, void *, int);
172 static void uma_zfree_internal(uma_zone_t, void *, void *, int);
173 static void bucket_enable(void);
174 static int uma_zalloc_bucket(uma_zone_t zone, int flags);
175 static uma_slab_t uma_zone_slab(uma_zone_t zone, int flags);
176 static void *uma_slab_alloc(uma_zone_t zone, uma_slab_t slab);
177 
178 void uma_print_zone(uma_zone_t);
179 void uma_print_stats(void);
180 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
181 
182 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD,
183     NULL, 0, sysctl_vm_zone, "A", "Zone Info");
184 SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
185 
186 /*
187  * This routine checks to see whether or not it's safe to enable buckets.
188  */
189 
190 static void
191 bucket_enable(void)
192 {
193 	if (cnt.v_free_count < cnt.v_free_min)
194 		bucketdisable = 1;
195 	else
196 		bucketdisable = 0;
197 }
198 
199 
200 /*
201  * Routine called by timeout which is used to fire off some time interval
202  * based calculations.  (working set, stats, etc.)
203  *
204  * Arguments:
205  *	arg   Unused
206  *
207  * Returns:
208  *	Nothing
209  */
210 static void
211 uma_timeout(void *unused)
212 {
213 	bucket_enable();
214 	zone_foreach(zone_timeout);
215 
216 	/* Reschedule this event */
217 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
218 }
219 
220 /*
221  * Routine to perform timeout driven calculations.  This does the working set
222  * as well as hash expanding, and per cpu statistics aggregation.
223  *
224  *  Arguments:
225  *	zone  The zone to operate on
226  *
227  *  Returns:
228  *	Nothing
229  */
230 static void
231 zone_timeout(uma_zone_t zone)
232 {
233 	uma_cache_t cache;
234 	u_int64_t alloc;
235 	int free;
236 	int cpu;
237 
238 	alloc = 0;
239 	free = 0;
240 
241 	/*
242 	 * Aggregate per cpu cache statistics back to the zone.
243 	 *
244 	 * I may rewrite this to set a flag in the per cpu cache instead of
245 	 * locking.  If the flag is not cleared on the next round I will have
246 	 * to lock and do it here instead so that the statistics don't get too
247 	 * far out of sync.
248 	 */
249 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) {
250 		for (cpu = 0; cpu < maxcpu; cpu++) {
251 			if (CPU_ABSENT(cpu))
252 				continue;
253 			CPU_LOCK(zone, cpu);
254 			cache = &zone->uz_cpu[cpu];
255 			/* Add them up, and reset */
256 			alloc += cache->uc_allocs;
257 			cache->uc_allocs = 0;
258 			if (cache->uc_allocbucket)
259 				free += cache->uc_allocbucket->ub_ptr + 1;
260 			if (cache->uc_freebucket)
261 				free += cache->uc_freebucket->ub_ptr + 1;
262 			CPU_UNLOCK(zone, cpu);
263 		}
264 	}
265 
266 	/* Now push these stats back into the zone.. */
267 	ZONE_LOCK(zone);
268 	zone->uz_allocs += alloc;
269 
270 	/*
271 	 * cachefree is an instantanious snapshot of what is in the per cpu
272 	 * caches, not an accurate counter
273 	 */
274 	zone->uz_cachefree = free;
275 
276 	/*
277 	 * Expand the zone hash table.
278 	 *
279 	 * This is done if the number of slabs is larger than the hash size.
280 	 * What I'm trying to do here is completely reduce collisions.  This
281 	 * may be a little aggressive.  Should I allow for two collisions max?
282 	 */
283 
284 	if (zone->uz_flags & UMA_ZFLAG_HASH &&
285 	    zone->uz_pages / zone->uz_ppera >= zone->uz_hash.uh_hashsize) {
286 		struct uma_hash newhash;
287 		struct uma_hash oldhash;
288 		int ret;
289 
290 		/*
291 		 * This is so involved because allocating and freeing
292 		 * while the zone lock is held will lead to deadlock.
293 		 * I have to do everything in stages and check for
294 		 * races.
295 		 */
296 		newhash = zone->uz_hash;
297 		ZONE_UNLOCK(zone);
298 		ret = hash_alloc(&newhash);
299 		ZONE_LOCK(zone);
300 		if (ret) {
301 			if (hash_expand(&zone->uz_hash, &newhash)) {
302 				oldhash = zone->uz_hash;
303 				zone->uz_hash = newhash;
304 			} else
305 				oldhash = newhash;
306 
307 			ZONE_UNLOCK(zone);
308 			hash_free(&oldhash);
309 			ZONE_LOCK(zone);
310 		}
311 	}
312 
313 	/*
314 	 * Here we compute the working set size as the total number of items
315 	 * left outstanding since the last time interval.  This is slightly
316 	 * suboptimal. What we really want is the highest number of outstanding
317 	 * items during the last time quantum.  This should be close enough.
318 	 *
319 	 * The working set size is used to throttle the zone_drain function.
320 	 * We don't want to return memory that we may need again immediately.
321 	 */
322 	alloc = zone->uz_allocs - zone->uz_oallocs;
323 	zone->uz_oallocs = zone->uz_allocs;
324 	zone->uz_wssize = alloc;
325 
326 	ZONE_UNLOCK(zone);
327 }
328 
329 /*
330  * Allocate and zero fill the next sized hash table from the appropriate
331  * backing store.
332  *
333  * Arguments:
334  *	hash  A new hash structure with the old hash size in uh_hashsize
335  *
336  * Returns:
337  *	1 on sucess and 0 on failure.
338  */
339 static int
340 hash_alloc(struct uma_hash *hash)
341 {
342 	int oldsize;
343 	int alloc;
344 
345 	oldsize = hash->uh_hashsize;
346 
347 	/* We're just going to go to a power of two greater */
348 	if (oldsize)  {
349 		hash->uh_hashsize = oldsize * 2;
350 		alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
351 		/* XXX Shouldn't be abusing DEVBUF here */
352 		hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
353 		    M_DEVBUF, M_NOWAIT);
354 	} else {
355 		alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
356 		hash->uh_slab_hash = uma_zalloc_internal(hashzone, NULL,
357 		    M_WAITOK);
358 		hash->uh_hashsize = UMA_HASH_SIZE_INIT;
359 	}
360 	if (hash->uh_slab_hash) {
361 		bzero(hash->uh_slab_hash, alloc);
362 		hash->uh_hashmask = hash->uh_hashsize - 1;
363 		return (1);
364 	}
365 
366 	return (0);
367 }
368 
369 /*
370  * Expands the hash table for OFFPAGE zones.  This is done from zone_timeout
371  * to reduce collisions.  This must not be done in the regular allocation path,
372  * otherwise, we can recurse on the vm while allocating pages.
373  *
374  * Arguments:
375  *	oldhash  The hash you want to expand
376  *	newhash  The hash structure for the new table
377  *
378  * Returns:
379  * 	Nothing
380  *
381  * Discussion:
382  */
383 static int
384 hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
385 {
386 	uma_slab_t slab;
387 	int hval;
388 	int i;
389 
390 	if (!newhash->uh_slab_hash)
391 		return (0);
392 
393 	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
394 		return (0);
395 
396 	/*
397 	 * I need to investigate hash algorithms for resizing without a
398 	 * full rehash.
399 	 */
400 
401 	for (i = 0; i < oldhash->uh_hashsize; i++)
402 		while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
403 			slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
404 			SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
405 			hval = UMA_HASH(newhash, slab->us_data);
406 			SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
407 			    slab, us_hlink);
408 		}
409 
410 	return (1);
411 }
412 
413 /*
414  * Free the hash bucket to the appropriate backing store.
415  *
416  * Arguments:
417  *	slab_hash  The hash bucket we're freeing
418  *	hashsize   The number of entries in that hash bucket
419  *
420  * Returns:
421  *	Nothing
422  */
423 static void
424 hash_free(struct uma_hash *hash)
425 {
426 	if (hash->uh_slab_hash == NULL)
427 		return;
428 	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
429 		uma_zfree_internal(hashzone,
430 		    hash->uh_slab_hash, NULL, 0);
431 	else
432 		free(hash->uh_slab_hash, M_DEVBUF);
433 }
434 
435 /*
436  * Frees all outstanding items in a bucket
437  *
438  * Arguments:
439  *	zone   The zone to free to, must be unlocked.
440  *	bucket The free/alloc bucket with items, cpu queue must be locked.
441  *
442  * Returns:
443  *	Nothing
444  */
445 
446 static void
447 bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
448 {
449 	uma_slab_t slab;
450 	int mzone;
451 	void *item;
452 
453 	if (bucket == NULL)
454 		return;
455 
456 	slab = NULL;
457 	mzone = 0;
458 
459 	/* We have to lookup the slab again for malloc.. */
460 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
461 		mzone = 1;
462 
463 	while (bucket->ub_ptr > -1)  {
464 		item = bucket->ub_bucket[bucket->ub_ptr];
465 #ifdef INVARIANTS
466 		bucket->ub_bucket[bucket->ub_ptr] = NULL;
467 		KASSERT(item != NULL,
468 		    ("bucket_drain: botched ptr, item is NULL"));
469 #endif
470 		bucket->ub_ptr--;
471 		/*
472 		 * This is extremely inefficient.  The slab pointer was passed
473 		 * to uma_zfree_arg, but we lost it because the buckets don't
474 		 * hold them.  This will go away when free() gets a size passed
475 		 * to it.
476 		 */
477 		if (mzone)
478 			slab = vtoslab((vm_offset_t)item & (~UMA_SLAB_MASK));
479 		uma_zfree_internal(zone, item, slab, 1);
480 	}
481 }
482 
483 /*
484  * Drains the per cpu caches for a zone.
485  *
486  * Arguments:
487  *	zone  The zone to drain, must be unlocked.
488  *
489  * Returns:
490  *	Nothing
491  *
492  * This function returns with the zone locked so that the per cpu queues can
493  * not be filled until zone_drain is finished.
494  *
495  */
496 static void
497 cache_drain(uma_zone_t zone)
498 {
499 	uma_bucket_t bucket;
500 	uma_cache_t cache;
501 	int cpu;
502 
503 	/*
504 	 * Flush out the per cpu queues.
505 	 *
506 	 * XXX This causes unnecessary thrashing due to immediately having
507 	 * empty per cpu queues.  I need to improve this.
508 	 */
509 
510 	/*
511 	 * We have to lock each cpu cache before locking the zone
512 	 */
513 	ZONE_UNLOCK(zone);
514 
515 	for (cpu = 0; cpu < maxcpu; cpu++) {
516 		if (CPU_ABSENT(cpu))
517 			continue;
518 		CPU_LOCK(zone, cpu);
519 		cache = &zone->uz_cpu[cpu];
520 		bucket_drain(zone, cache->uc_allocbucket);
521 		bucket_drain(zone, cache->uc_freebucket);
522 	}
523 
524 	/*
525 	 * Drain the bucket queues and free the buckets, we just keep two per
526 	 * cpu (alloc/free).
527 	 */
528 	ZONE_LOCK(zone);
529 	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
530 		LIST_REMOVE(bucket, ub_link);
531 		ZONE_UNLOCK(zone);
532 		bucket_drain(zone, bucket);
533 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
534 		ZONE_LOCK(zone);
535 	}
536 
537 	/* Now we do the free queue.. */
538 	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
539 		LIST_REMOVE(bucket, ub_link);
540 		uma_zfree_internal(bucketzone, bucket, NULL, 0);
541 	}
542 
543 	/* We unlock here, but they will all block until the zone is unlocked */
544 	for (cpu = 0; cpu < maxcpu; cpu++) {
545 		if (CPU_ABSENT(cpu))
546 			continue;
547 		CPU_UNLOCK(zone, cpu);
548 	}
549 
550 	zone->uz_cachefree = 0;
551 }
552 
553 /*
554  * Frees pages from a zone back to the system.  This is done on demand from
555  * the pageout daemon.
556  *
557  * Arguments:
558  *	zone  The zone to free pages from
559  *	all   Should we drain all items?
560  *
561  * Returns:
562  *	Nothing.
563  */
564 static void
565 zone_drain(uma_zone_t zone)
566 {
567 	struct slabhead freeslabs = {};
568 	uma_slab_t slab;
569 	uma_slab_t n;
570 	u_int64_t extra;
571 	u_int8_t flags;
572 	u_int8_t *mem;
573 	int i;
574 
575 	/*
576 	 * We don't want to take pages from staticly allocated zones at this
577 	 * time
578 	 */
579 	if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL)
580 		return;
581 
582 	ZONE_LOCK(zone);
583 
584 	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
585 		cache_drain(zone);
586 
587 	if (zone->uz_free < zone->uz_wssize)
588 		goto finished;
589 #ifdef UMA_DEBUG
590 	printf("%s working set size: %llu free items: %u\n",
591 	    zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free);
592 #endif
593 	extra = zone->uz_free - zone->uz_wssize;
594 	extra /= zone->uz_ipers;
595 
596 	/* extra is now the number of extra slabs that we can free */
597 
598 	if (extra == 0)
599 		goto finished;
600 
601 	slab = LIST_FIRST(&zone->uz_free_slab);
602 	while (slab && extra) {
603 		n = LIST_NEXT(slab, us_link);
604 
605 		/* We have no where to free these to */
606 		if (slab->us_flags & UMA_SLAB_BOOT) {
607 			slab = n;
608 			continue;
609 		}
610 
611 		LIST_REMOVE(slab, us_link);
612 		zone->uz_pages -= zone->uz_ppera;
613 		zone->uz_free -= zone->uz_ipers;
614 
615 		if (zone->uz_flags & UMA_ZFLAG_HASH)
616 			UMA_HASH_REMOVE(&zone->uz_hash, slab, slab->us_data);
617 
618 		SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
619 
620 		slab = n;
621 		extra--;
622 	}
623 finished:
624 	ZONE_UNLOCK(zone);
625 
626 	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
627 		SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
628 		if (zone->uz_fini)
629 			for (i = 0; i < zone->uz_ipers; i++)
630 				zone->uz_fini(
631 				    slab->us_data + (zone->uz_rsize * i),
632 				    zone->uz_size);
633 		flags = slab->us_flags;
634 		mem = slab->us_data;
635 
636 		if (zone->uz_flags & UMA_ZFLAG_OFFPAGE)
637 			uma_zfree_internal(slabzone, slab, NULL, 0);
638 		if (zone->uz_flags & UMA_ZFLAG_MALLOC) {
639 			vm_object_t obj;
640 
641 			if (flags & UMA_SLAB_KMEM)
642 				obj = kmem_object;
643 			else
644 				obj = NULL;
645 			for (i = 0; i < zone->uz_ppera; i++)
646 				vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
647 				    obj);
648 		}
649 #ifdef UMA_DEBUG
650 		printf("%s: Returning %d bytes.\n",
651 		    zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera);
652 #endif
653 		zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags);
654 	}
655 
656 }
657 
658 /*
659  * Allocate a new slab for a zone.  This does not insert the slab onto a list.
660  *
661  * Arguments:
662  *	zone  The zone to allocate slabs for
663  *	wait  Shall we wait?
664  *
665  * Returns:
666  *	The slab that was allocated or NULL if there is no memory and the
667  *	caller specified M_NOWAIT.
668  *
669  */
670 static uma_slab_t
671 slab_zalloc(uma_zone_t zone, int wait)
672 {
673 	uma_slab_t slab;	/* Starting slab */
674 	u_int8_t *mem;
675 	u_int8_t flags;
676 	int i;
677 
678 	slab = NULL;
679 
680 #ifdef UMA_DEBUG
681 	printf("slab_zalloc:  Allocating a new slab for %s\n", zone->uz_name);
682 #endif
683 	ZONE_UNLOCK(zone);
684 
685 	if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) {
686 		slab = uma_zalloc_internal(slabzone, NULL, wait);
687 		if (slab == NULL) {
688 			ZONE_LOCK(zone);
689 			return NULL;
690 		}
691 	}
692 
693 	/*
694 	 * This reproduces the old vm_zone behavior of zero filling pages the
695 	 * first time they are added to a zone.
696 	 *
697 	 * Malloced items are zeroed in uma_zalloc.
698 	 */
699 
700 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
701 		wait |= M_ZERO;
702 	else
703 		wait &= ~M_ZERO;
704 
705 	if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) {
706 		if ((wait & M_NOWAIT) == 0) {
707 			mtx_lock(&Giant);
708 			mem = zone->uz_allocf(zone,
709 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
710 			mtx_unlock(&Giant);
711 		} else {
712 			mem = zone->uz_allocf(zone,
713 			    zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait);
714 		}
715 		if (mem == NULL) {
716 			ZONE_LOCK(zone);
717 			return (NULL);
718 		}
719 	} else {
720 		uma_slab_t tmps;
721 
722 		if (zone->uz_ppera > 1)
723 			panic("UMA: Attemping to allocate multiple pages before vm has started.\n");
724 		if (zone->uz_flags & UMA_ZFLAG_MALLOC)
725 			panic("Mallocing before uma_startup2 has been called.\n");
726 		if (uma_boot_free == 0)
727 			panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n");
728 		tmps = LIST_FIRST(&uma_boot_pages);
729 		LIST_REMOVE(tmps, us_link);
730 		uma_boot_free--;
731 		mem = tmps->us_data;
732 		flags = tmps->us_flags;
733 	}
734 
735 	/* Point the slab into the allocated memory */
736 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE))
737 		slab = (uma_slab_t )(mem + zone->uz_pgoff);
738 
739 	if (zone->uz_flags & UMA_ZFLAG_MALLOC)
740 		for (i = 0; i < zone->uz_ppera; i++)
741 			vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
742 
743 	slab->us_zone = zone;
744 	slab->us_data = mem;
745 
746 	/*
747 	 * This is intended to spread data out across cache lines.
748 	 *
749 	 * This code doesn't seem to work properly on x86, and on alpha
750 	 * it makes absolutely no performance difference. I'm sure it could
751 	 * use some tuning, but sun makes outrageous claims about it's
752 	 * performance.
753 	 */
754 #if 0
755 	if (zone->uz_cachemax) {
756 		slab->us_data += zone->uz_cacheoff;
757 		zone->uz_cacheoff += UMA_CACHE_INC;
758 		if (zone->uz_cacheoff > zone->uz_cachemax)
759 			zone->uz_cacheoff = 0;
760 	}
761 #endif
762 
763 	slab->us_freecount = zone->uz_ipers;
764 	slab->us_firstfree = 0;
765 	slab->us_flags = flags;
766 	for (i = 0; i < zone->uz_ipers; i++)
767 		slab->us_freelist[i] = i+1;
768 
769 	if (zone->uz_init)
770 		for (i = 0; i < zone->uz_ipers; i++)
771 			zone->uz_init(slab->us_data + (zone->uz_rsize * i),
772 			    zone->uz_size);
773 	ZONE_LOCK(zone);
774 
775 	if (zone->uz_flags & UMA_ZFLAG_HASH)
776 		UMA_HASH_INSERT(&zone->uz_hash, slab, mem);
777 
778 	zone->uz_pages += zone->uz_ppera;
779 	zone->uz_free += zone->uz_ipers;
780 
781 
782 	return (slab);
783 }
784 
785 /*
786  * Allocates a number of pages from the system
787  *
788  * Arguments:
789  *	zone  Unused
790  *	bytes  The number of bytes requested
791  *	wait  Shall we wait?
792  *
793  * Returns:
794  *	A pointer to the alloced memory or possibly
795  *	NULL if M_NOWAIT is set.
796  */
797 static void *
798 page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
799 {
800 	void *p;	/* Returned page */
801 
802 	*pflag = UMA_SLAB_KMEM;
803 	p = (void *) kmem_malloc(kmem_map, bytes, wait);
804 
805 	return (p);
806 }
807 
808 /*
809  * Allocates a number of pages from within an object
810  *
811  * Arguments:
812  *	zone   Unused
813  *	bytes  The number of bytes requested
814  *	wait   Shall we wait?
815  *
816  * Returns:
817  *	A pointer to the alloced memory or possibly
818  *	NULL if M_NOWAIT is set.
819  *
820  * TODO: If we fail during a multi-page allocation release the pages that have
821  *	 already been allocated.
822  */
823 static void *
824 obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
825 {
826 	vm_offset_t zkva;
827 	vm_offset_t retkva;
828 	vm_page_t p;
829 	int pages;
830 
831 	retkva = 0;
832 	pages = zone->uz_pages;
833 
834 	/*
835 	 * This looks a little weird since we're getting one page at a time
836 	 */
837 	while (bytes > 0) {
838 		VM_OBJECT_LOCK(zone->uz_obj);
839 		p = vm_page_alloc(zone->uz_obj, pages,
840 		    VM_ALLOC_INTERRUPT);
841 		VM_OBJECT_UNLOCK(zone->uz_obj);
842 		if (p == NULL)
843 			return (NULL);
844 
845 		zkva = zone->uz_kva + pages * PAGE_SIZE;
846 		if (retkva == 0)
847 			retkva = zkva;
848 		pmap_qenter(zkva, &p, 1);
849 		bytes -= PAGE_SIZE;
850 		pages += 1;
851 	}
852 
853 	*flags = UMA_SLAB_PRIV;
854 
855 	return ((void *)retkva);
856 }
857 
858 /*
859  * Frees a number of pages to the system
860  *
861  * Arguments:
862  *	mem   A pointer to the memory to be freed
863  *	size  The size of the memory being freed
864  *	flags The original p->us_flags field
865  *
866  * Returns:
867  *	Nothing
868  *
869  */
870 static void
871 page_free(void *mem, int size, u_int8_t flags)
872 {
873 	vm_map_t map;
874 
875 	if (flags & UMA_SLAB_KMEM)
876 		map = kmem_map;
877 	else
878 		panic("UMA: page_free used with invalid flags %d\n", flags);
879 
880 	kmem_free(map, (vm_offset_t)mem, size);
881 }
882 
883 /*
884  * Zero fill initializer
885  *
886  * Arguments/Returns follow uma_init specifications
887  *
888  */
889 static void
890 zero_init(void *mem, int size)
891 {
892 	bzero(mem, size);
893 }
894 
895 /*
896  * Finish creating a small uma zone.  This calculates ipers, and the zone size.
897  *
898  * Arguments
899  *	zone  The zone we should initialize
900  *
901  * Returns
902  *	Nothing
903  */
904 static void
905 zone_small_init(uma_zone_t zone)
906 {
907 	int rsize;
908 	int memused;
909 	int ipers;
910 
911 	rsize = zone->uz_size;
912 
913 	if (rsize < UMA_SMALLEST_UNIT)
914 		rsize = UMA_SMALLEST_UNIT;
915 
916 	if (rsize & zone->uz_align)
917 		rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1);
918 
919 	zone->uz_rsize = rsize;
920 
921 	rsize += 1;	/* Account for the byte of linkage */
922 	zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize;
923 	zone->uz_ppera = 1;
924 
925 	memused = zone->uz_ipers * zone->uz_rsize;
926 
927 	/* Can we do any better? */
928 	if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) {
929 		if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
930 			return;
931 		ipers = UMA_SLAB_SIZE / zone->uz_rsize;
932 		if (ipers > zone->uz_ipers) {
933 			zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
934 			if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
935 				zone->uz_flags |= UMA_ZFLAG_HASH;
936 			zone->uz_ipers = ipers;
937 		}
938 	}
939 
940 }
941 
942 /*
943  * Finish creating a large (> UMA_SLAB_SIZE) uma zone.  Just give in and do
944  * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
945  * more complicated.
946  *
947  * Arguments
948  *	zone  The zone we should initialize
949  *
950  * Returns
951  *	Nothing
952  */
953 static void
954 zone_large_init(uma_zone_t zone)
955 {
956 	int pages;
957 
958 	pages = zone->uz_size / UMA_SLAB_SIZE;
959 
960 	/* Account for remainder */
961 	if ((pages * UMA_SLAB_SIZE) < zone->uz_size)
962 		pages++;
963 
964 	zone->uz_ppera = pages;
965 	zone->uz_ipers = 1;
966 
967 	zone->uz_flags |= UMA_ZFLAG_OFFPAGE;
968 	if ((zone->uz_flags & UMA_ZFLAG_MALLOC) == 0)
969 		zone->uz_flags |= UMA_ZFLAG_HASH;
970 
971 	zone->uz_rsize = zone->uz_size;
972 }
973 
974 /*
975  * Zone header ctor.  This initializes all fields, locks, etc.  And inserts
976  * the zone onto the global zone list.
977  *
978  * Arguments/Returns follow uma_ctor specifications
979  *	udata  Actually uma_zcreat_args
980  *
981  */
982 
983 static void
984 zone_ctor(void *mem, int size, void *udata)
985 {
986 	struct uma_zctor_args *arg = udata;
987 	uma_zone_t zone = mem;
988 	int privlc;
989 	int cplen;
990 	int cpu;
991 
992 	bzero(zone, size);
993 	zone->uz_name = arg->name;
994 	zone->uz_size = arg->size;
995 	zone->uz_ctor = arg->ctor;
996 	zone->uz_dtor = arg->dtor;
997 	zone->uz_init = arg->uminit;
998 	zone->uz_fini = arg->fini;
999 	zone->uz_align = arg->align;
1000 	zone->uz_free = 0;
1001 	zone->uz_pages = 0;
1002 	zone->uz_flags = 0;
1003 	zone->uz_allocf = page_alloc;
1004 	zone->uz_freef = page_free;
1005 
1006 	if (arg->flags & UMA_ZONE_ZINIT)
1007 		zone->uz_init = zero_init;
1008 
1009 	if (arg->flags & UMA_ZONE_INTERNAL)
1010 		zone->uz_flags |= UMA_ZFLAG_INTERNAL;
1011 
1012 	if (arg->flags & UMA_ZONE_MALLOC)
1013 		zone->uz_flags |= UMA_ZFLAG_MALLOC;
1014 
1015 	if (arg->flags & UMA_ZONE_NOFREE)
1016 		zone->uz_flags |= UMA_ZFLAG_NOFREE;
1017 
1018 	if (arg->flags & UMA_ZONE_VM)
1019 		zone->uz_flags |= UMA_ZFLAG_BUCKETCACHE;
1020 
1021 	if (zone->uz_size > UMA_SLAB_SIZE)
1022 		zone_large_init(zone);
1023 	else
1024 		zone_small_init(zone);
1025 #ifdef UMA_MD_SMALL_ALLOC
1026 	if (zone->uz_ppera == 1) {
1027 		zone->uz_allocf = uma_small_alloc;
1028 		zone->uz_freef = uma_small_free;
1029 	}
1030 #endif	/* UMA_MD_SMALL_ALLOC */
1031 
1032 	if (arg->flags & UMA_ZONE_MTXCLASS)
1033 		privlc = 1;
1034 	else
1035 		privlc = 0;
1036 
1037 	/* We do this so that the per cpu lock name is unique for each zone */
1038 	memcpy(zone->uz_lname, "PCPU ", 5);
1039 	cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6);
1040 	memcpy(zone->uz_lname+5, zone->uz_name, cplen);
1041 	zone->uz_lname[LOCKNAME_LEN - 1] = '\0';
1042 
1043 	/*
1044 	 * If we're putting the slab header in the actual page we need to
1045 	 * figure out where in each page it goes.  This calculates a right
1046 	 * justified offset into the memory on an ALIGN_PTR boundary.
1047 	 */
1048 	if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) {
1049 		int totsize;
1050 		int waste;
1051 
1052 		/* Size of the slab struct and free list */
1053 		totsize = sizeof(struct uma_slab) + zone->uz_ipers;
1054 		if (totsize & UMA_ALIGN_PTR)
1055 			totsize = (totsize & ~UMA_ALIGN_PTR) +
1056 			    (UMA_ALIGN_PTR + 1);
1057 		zone->uz_pgoff = UMA_SLAB_SIZE - totsize;
1058 
1059 		waste = zone->uz_pgoff;
1060 		waste -= (zone->uz_ipers * zone->uz_rsize);
1061 
1062 		/*
1063 		 * This calculates how much space we have for cache line size
1064 		 * optimizations.  It works by offseting each slab slightly.
1065 		 * Currently it breaks on x86, and so it is disabled.
1066 		 */
1067 
1068 		if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) {
1069 			zone->uz_cachemax = waste - UMA_CACHE_INC;
1070 			zone->uz_cacheoff = 0;
1071 		}
1072 
1073 		totsize = zone->uz_pgoff + sizeof(struct uma_slab)
1074 		    + zone->uz_ipers;
1075 		/* I don't think it's possible, but I'll make sure anyway */
1076 		if (totsize > UMA_SLAB_SIZE) {
1077 			printf("zone %s ipers %d rsize %d size %d\n",
1078 			    zone->uz_name, zone->uz_ipers, zone->uz_rsize,
1079 			    zone->uz_size);
1080 			panic("UMA slab won't fit.\n");
1081 		}
1082 	}
1083 
1084 	if (zone->uz_flags & UMA_ZFLAG_HASH)
1085 		hash_alloc(&zone->uz_hash);
1086 
1087 #ifdef UMA_DEBUG
1088 	printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n",
1089 	    zone->uz_name, zone,
1090 	    zone->uz_size, zone->uz_ipers,
1091 	    zone->uz_ppera, zone->uz_pgoff);
1092 #endif
1093 	ZONE_LOCK_INIT(zone, privlc);
1094 
1095 	mtx_lock(&uma_mtx);
1096 	LIST_INSERT_HEAD(&uma_zones, zone, uz_link);
1097 	mtx_unlock(&uma_mtx);
1098 
1099 	/*
1100 	 * Some internal zones don't have room allocated for the per cpu
1101 	 * caches.  If we're internal, bail out here.
1102 	 */
1103 
1104 	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
1105 		return;
1106 
1107 	if (zone->uz_ipers < UMA_BUCKET_SIZE)
1108 		zone->uz_count = zone->uz_ipers - 1;
1109 	else
1110 		zone->uz_count = UMA_BUCKET_SIZE - 1;
1111 
1112 	for (cpu = 0; cpu < maxcpu; cpu++)
1113 		CPU_LOCK_INIT(zone, cpu, privlc);
1114 }
1115 
1116 /*
1117  * Zone header dtor.  This frees all data, destroys locks, frees the hash table
1118  * and removes the zone from the global list.
1119  *
1120  * Arguments/Returns follow uma_dtor specifications
1121  *	udata  unused
1122  */
1123 
1124 static void
1125 zone_dtor(void *arg, int size, void *udata)
1126 {
1127 	uma_zone_t zone;
1128 	int cpu;
1129 
1130 	zone = (uma_zone_t)arg;
1131 
1132 	ZONE_LOCK(zone);
1133 	zone->uz_wssize = 0;
1134 	ZONE_UNLOCK(zone);
1135 
1136 	mtx_lock(&uma_mtx);
1137 	LIST_REMOVE(zone, uz_link);
1138 	zone_drain(zone);
1139 	mtx_unlock(&uma_mtx);
1140 
1141 	ZONE_LOCK(zone);
1142 	if (zone->uz_free != 0)
1143 		printf("Zone %s was not empty (%d items).  Lost %d pages of memory.\n",
1144 		    zone->uz_name, zone->uz_free, zone->uz_pages);
1145 
1146 	if ((zone->uz_flags & UMA_ZFLAG_INTERNAL) == 0)
1147 		for (cpu = 0; cpu < maxcpu; cpu++)
1148 			CPU_LOCK_FINI(zone, cpu);
1149 
1150 	ZONE_UNLOCK(zone);
1151 	if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) != 0)
1152 		hash_free(&zone->uz_hash);
1153 
1154 	ZONE_LOCK_FINI(zone);
1155 }
1156 /*
1157  * Traverses every zone in the system and calls a callback
1158  *
1159  * Arguments:
1160  *	zfunc  A pointer to a function which accepts a zone
1161  *		as an argument.
1162  *
1163  * Returns:
1164  *	Nothing
1165  */
1166 static void
1167 zone_foreach(void (*zfunc)(uma_zone_t))
1168 {
1169 	uma_zone_t zone;
1170 
1171 	mtx_lock(&uma_mtx);
1172 	LIST_FOREACH(zone, &uma_zones, uz_link) {
1173 		zfunc(zone);
1174 	}
1175 	mtx_unlock(&uma_mtx);
1176 }
1177 
1178 /* Public functions */
1179 /* See uma.h */
1180 void
1181 uma_startup(void *bootmem)
1182 {
1183 	struct uma_zctor_args args;
1184 	uma_slab_t slab;
1185 	int slabsize;
1186 	int i;
1187 
1188 #ifdef UMA_DEBUG
1189 	printf("Creating uma zone headers zone.\n");
1190 #endif
1191 #ifdef SMP
1192 	maxcpu = mp_maxid + 1;
1193 #else
1194 	maxcpu = 1;
1195 #endif
1196 #ifdef UMA_DEBUG
1197 	printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid);
1198 	Debugger("stop");
1199 #endif
1200 	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1201 	/* "manually" Create the initial zone */
1202 	args.name = "UMA Zones";
1203 	args.size = sizeof(struct uma_zone) +
1204 	    (sizeof(struct uma_cache) * (maxcpu - 1));
1205 	args.ctor = zone_ctor;
1206 	args.dtor = zone_dtor;
1207 	args.uminit = zero_init;
1208 	args.fini = NULL;
1209 	args.align = 32 - 1;
1210 	args.flags = UMA_ZONE_INTERNAL;
1211 	/* The initial zone has no Per cpu queues so it's smaller */
1212 	zone_ctor(zones, sizeof(struct uma_zone), &args);
1213 
1214 #ifdef UMA_DEBUG
1215 	printf("Filling boot free list.\n");
1216 #endif
1217 	for (i = 0; i < UMA_BOOT_PAGES; i++) {
1218 		slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1219 		slab->us_data = (u_int8_t *)slab;
1220 		slab->us_flags = UMA_SLAB_BOOT;
1221 		LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1222 		uma_boot_free++;
1223 	}
1224 
1225 #ifdef UMA_DEBUG
1226 	printf("Creating slab zone.\n");
1227 #endif
1228 
1229 	/*
1230 	 * This is the max number of free list items we'll have with
1231 	 * offpage slabs.
1232 	 */
1233 
1234 	slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab);
1235 	slabsize /= UMA_MAX_WASTE;
1236 	slabsize++;			/* In case there it's rounded */
1237 	slabsize += sizeof(struct uma_slab);
1238 
1239 	/* Now make a zone for slab headers */
1240 	slabzone = uma_zcreate("UMA Slabs",
1241 				slabsize,
1242 				NULL, NULL, NULL, NULL,
1243 				UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1244 
1245 	hashzone = uma_zcreate("UMA Hash",
1246 	    sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1247 	    NULL, NULL, NULL, NULL,
1248 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1249 
1250 	bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket),
1251 	    NULL, NULL, NULL, NULL,
1252 	    UMA_ALIGN_PTR, UMA_ZONE_INTERNAL);
1253 
1254 #ifdef UMA_MD_SMALL_ALLOC
1255 	booted = 1;
1256 #endif
1257 
1258 #ifdef UMA_DEBUG
1259 	printf("UMA startup complete.\n");
1260 #endif
1261 }
1262 
1263 /* see uma.h */
1264 void
1265 uma_startup2(void)
1266 {
1267 	booted = 1;
1268 	bucket_enable();
1269 #ifdef UMA_DEBUG
1270 	printf("UMA startup2 complete.\n");
1271 #endif
1272 }
1273 
1274 /*
1275  * Initialize our callout handle
1276  *
1277  */
1278 
1279 static void
1280 uma_startup3(void)
1281 {
1282 #ifdef UMA_DEBUG
1283 	printf("Starting callout.\n");
1284 #endif
1285 	callout_init(&uma_callout, 0);
1286 	callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL);
1287 #ifdef UMA_DEBUG
1288 	printf("UMA startup3 complete.\n");
1289 #endif
1290 }
1291 
1292 /* See uma.h */
1293 uma_zone_t
1294 uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1295 		uma_init uminit, uma_fini fini, int align, u_int16_t flags)
1296 
1297 {
1298 	struct uma_zctor_args args;
1299 
1300 	/* This stuff is essential for the zone ctor */
1301 	args.name = name;
1302 	args.size = size;
1303 	args.ctor = ctor;
1304 	args.dtor = dtor;
1305 	args.uminit = uminit;
1306 	args.fini = fini;
1307 	args.align = align;
1308 	args.flags = flags;
1309 
1310 	return (uma_zalloc_internal(zones, &args, M_WAITOK));
1311 }
1312 
1313 /* See uma.h */
1314 void
1315 uma_zdestroy(uma_zone_t zone)
1316 {
1317 	uma_zfree_internal(zones, zone, NULL, 0);
1318 }
1319 
1320 /* See uma.h */
1321 void *
1322 uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1323 {
1324 	void *item;
1325 	uma_cache_t cache;
1326 	uma_bucket_t bucket;
1327 	int cpu;
1328 
1329 	/* This is the fast path allocation */
1330 #ifdef UMA_DEBUG_ALLOC_1
1331 	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1332 #endif
1333 
1334 	if (!(flags & M_NOWAIT)) {
1335 		KASSERT(curthread->td_intr_nesting_level == 0,
1336 		   ("malloc(M_WAITOK) in interrupt context"));
1337 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1338 		    "malloc() of \"%s\"", zone->uz_name);
1339 	}
1340 
1341 zalloc_restart:
1342 	cpu = PCPU_GET(cpuid);
1343 	CPU_LOCK(zone, cpu);
1344 	cache = &zone->uz_cpu[cpu];
1345 
1346 zalloc_start:
1347 	bucket = cache->uc_allocbucket;
1348 
1349 	if (bucket) {
1350 		if (bucket->ub_ptr > -1) {
1351 			item = bucket->ub_bucket[bucket->ub_ptr];
1352 #ifdef INVARIANTS
1353 			bucket->ub_bucket[bucket->ub_ptr] = NULL;
1354 #endif
1355 			bucket->ub_ptr--;
1356 			KASSERT(item != NULL,
1357 			    ("uma_zalloc: Bucket pointer mangled."));
1358 			cache->uc_allocs++;
1359 #ifdef INVARIANTS
1360 			ZONE_LOCK(zone);
1361 			uma_dbg_alloc(zone, NULL, item);
1362 			ZONE_UNLOCK(zone);
1363 #endif
1364 			CPU_UNLOCK(zone, cpu);
1365 			if (zone->uz_ctor)
1366 				zone->uz_ctor(item, zone->uz_size, udata);
1367 			if (flags & M_ZERO)
1368 				bzero(item, zone->uz_size);
1369 			return (item);
1370 		} else if (cache->uc_freebucket) {
1371 			/*
1372 			 * We have run out of items in our allocbucket.
1373 			 * See if we can switch with our free bucket.
1374 			 */
1375 			if (cache->uc_freebucket->ub_ptr > -1) {
1376 				uma_bucket_t swap;
1377 
1378 #ifdef UMA_DEBUG_ALLOC
1379 				printf("uma_zalloc: Swapping empty with alloc.\n");
1380 #endif
1381 				swap = cache->uc_freebucket;
1382 				cache->uc_freebucket = cache->uc_allocbucket;
1383 				cache->uc_allocbucket = swap;
1384 
1385 				goto zalloc_start;
1386 			}
1387 		}
1388 	}
1389 	ZONE_LOCK(zone);
1390 	/* Since we have locked the zone we may as well send back our stats */
1391 	zone->uz_allocs += cache->uc_allocs;
1392 	cache->uc_allocs = 0;
1393 
1394 	/* Our old one is now a free bucket */
1395 	if (cache->uc_allocbucket) {
1396 		KASSERT(cache->uc_allocbucket->ub_ptr == -1,
1397 		    ("uma_zalloc_arg: Freeing a non free bucket."));
1398 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1399 		    cache->uc_allocbucket, ub_link);
1400 		cache->uc_allocbucket = NULL;
1401 	}
1402 
1403 	/* Check the free list for a new alloc bucket */
1404 	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1405 		KASSERT(bucket->ub_ptr != -1,
1406 		    ("uma_zalloc_arg: Returning an empty bucket."));
1407 
1408 		LIST_REMOVE(bucket, ub_link);
1409 		cache->uc_allocbucket = bucket;
1410 		ZONE_UNLOCK(zone);
1411 		goto zalloc_start;
1412 	}
1413 	/* We are no longer associated with this cpu!!! */
1414 	CPU_UNLOCK(zone, cpu);
1415 
1416 	/* Bump up our uz_count so we get here less */
1417 	if (zone->uz_count < UMA_BUCKET_SIZE - 1)
1418 		zone->uz_count++;
1419 
1420 	/*
1421 	 * Now lets just fill a bucket and put it on the free list.  If that
1422 	 * works we'll restart the allocation from the begining.
1423 	 */
1424 
1425 	if (uma_zalloc_bucket(zone, flags)) {
1426 		ZONE_UNLOCK(zone);
1427 		goto zalloc_restart;
1428 	}
1429 	ZONE_UNLOCK(zone);
1430 	/*
1431 	 * We may not be able to get a bucket so return an actual item.
1432 	 */
1433 #ifdef UMA_DEBUG
1434 	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1435 #endif
1436 
1437 	return (uma_zalloc_internal(zone, udata, flags));
1438 }
1439 
1440 static uma_slab_t
1441 uma_zone_slab(uma_zone_t zone, int flags)
1442 {
1443 	uma_slab_t slab;
1444 
1445 	/*
1446 	 * This is to prevent us from recursively trying to allocate
1447 	 * buckets.  The problem is that if an allocation forces us to
1448 	 * grab a new bucket we will call page_alloc, which will go off
1449 	 * and cause the vm to allocate vm_map_entries.  If we need new
1450 	 * buckets there too we will recurse in kmem_alloc and bad
1451 	 * things happen.  So instead we return a NULL bucket, and make
1452 	 * the code that allocates buckets smart enough to deal with it
1453 	 */
1454 	if (zone == bucketzone && zone->uz_recurse != 0)
1455 		return (NULL);
1456 
1457 	slab = NULL;
1458 
1459 	for (;;) {
1460 		/*
1461 		 * Find a slab with some space.  Prefer slabs that are partially
1462 		 * used over those that are totally full.  This helps to reduce
1463 		 * fragmentation.
1464 		 */
1465 		if (zone->uz_free != 0) {
1466 			if (!LIST_EMPTY(&zone->uz_part_slab)) {
1467 				slab = LIST_FIRST(&zone->uz_part_slab);
1468 			} else {
1469 				slab = LIST_FIRST(&zone->uz_free_slab);
1470 				LIST_REMOVE(slab, us_link);
1471 				LIST_INSERT_HEAD(&zone->uz_part_slab, slab,
1472 				us_link);
1473 			}
1474 			return (slab);
1475 		}
1476 
1477 		/*
1478 		 * M_NOVM means don't ask at all!
1479 		 */
1480 		if (flags & M_NOVM)
1481 			break;
1482 
1483 		if (zone->uz_maxpages &&
1484 		    zone->uz_pages >= zone->uz_maxpages) {
1485 			zone->uz_flags |= UMA_ZFLAG_FULL;
1486 
1487 			if (flags & M_NOWAIT)
1488 				break;
1489 			else
1490 				msleep(zone, &zone->uz_lock, PVM, "zonelimit", 0);
1491 			continue;
1492 		}
1493 		zone->uz_recurse++;
1494 		slab = slab_zalloc(zone, flags);
1495 		zone->uz_recurse--;
1496 		/*
1497 		 * If we got a slab here it's safe to mark it partially used
1498 		 * and return.  We assume that the caller is going to remove
1499 		 * at least one item.
1500 		 */
1501 		if (slab) {
1502 			LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1503 			return (slab);
1504 		}
1505 		/*
1506 		 * We might not have been able to get a slab but another cpu
1507 		 * could have while we were unlocked.  Check again before we
1508 		 * fail.
1509 		 */
1510 		if (flags & M_NOWAIT)
1511 			flags |= M_NOVM;
1512 	}
1513 	return (slab);
1514 }
1515 
1516 static __inline void *
1517 uma_slab_alloc(uma_zone_t zone, uma_slab_t slab)
1518 {
1519 	void *item;
1520 	u_int8_t freei;
1521 
1522 	freei = slab->us_firstfree;
1523 	slab->us_firstfree = slab->us_freelist[freei];
1524 	item = slab->us_data + (zone->uz_rsize * freei);
1525 
1526 	slab->us_freecount--;
1527 	zone->uz_free--;
1528 #ifdef INVARIANTS
1529 	uma_dbg_alloc(zone, slab, item);
1530 #endif
1531 	/* Move this slab to the full list */
1532 	if (slab->us_freecount == 0) {
1533 		LIST_REMOVE(slab, us_link);
1534 		LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link);
1535 	}
1536 
1537 	return (item);
1538 }
1539 
1540 static int
1541 uma_zalloc_bucket(uma_zone_t zone, int flags)
1542 {
1543 	uma_bucket_t bucket;
1544 	uma_slab_t slab;
1545 
1546 	/*
1547 	 * Try this zone's free list first so we don't allocate extra buckets.
1548 	 */
1549 
1550 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1551 		KASSERT(bucket->ub_ptr == -1,
1552 		    ("uma_zalloc_bucket: Bucket on free list is not empty."));
1553 		LIST_REMOVE(bucket, ub_link);
1554 	} else {
1555 		int bflags;
1556 
1557 		bflags = flags;
1558 		if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1559 			bflags |= M_NOVM;
1560 
1561 		ZONE_UNLOCK(zone);
1562 		bucket = uma_zalloc_internal(bucketzone,
1563 		    NULL, bflags);
1564 		ZONE_LOCK(zone);
1565 		if (bucket != NULL) {
1566 #ifdef INVARIANTS
1567 			bzero(bucket, bucketzone->uz_size);
1568 #endif
1569 			bucket->ub_ptr = -1;
1570 		}
1571 	}
1572 
1573 	if (bucket == NULL)
1574 		return (0);
1575 
1576 #ifdef SMP
1577 	/*
1578 	 * This code is here to limit the number of simultaneous bucket fills
1579 	 * for any given zone to the number of per cpu caches in this zone. This
1580 	 * is done so that we don't allocate more memory than we really need.
1581 	 */
1582 	if (zone->uz_fills >= mp_ncpus)
1583 		goto done;
1584 
1585 #endif
1586 	zone->uz_fills++;
1587 
1588 	/* Try to keep the buckets totally full */
1589 	while ((slab = uma_zone_slab(zone, flags)) != NULL &&
1590 	    bucket->ub_ptr < zone->uz_count) {
1591 		while (slab->us_freecount &&
1592 		    bucket->ub_ptr < zone->uz_count) {
1593 			bucket->ub_bucket[++bucket->ub_ptr] =
1594 			    uma_slab_alloc(zone, slab);
1595 		}
1596 		/* Don't block on the next fill */
1597 		flags |= M_NOWAIT;
1598 	}
1599 
1600 	zone->uz_fills--;
1601 
1602 	if (bucket->ub_ptr != -1) {
1603 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1604 		    bucket, ub_link);
1605 		return (1);
1606 	}
1607 #ifdef SMP
1608 done:
1609 #endif
1610 	uma_zfree_internal(bucketzone, bucket, NULL, 0);
1611 
1612 	return (0);
1613 }
1614 /*
1615  * Allocates an item for an internal zone
1616  *
1617  * Arguments
1618  *	zone   The zone to alloc for.
1619  *	udata  The data to be passed to the constructor.
1620  *	flags  M_WAITOK, M_NOWAIT, M_ZERO.
1621  *
1622  * Returns
1623  *	NULL if there is no memory and M_NOWAIT is set
1624  *	An item if successful
1625  */
1626 
1627 static void *
1628 uma_zalloc_internal(uma_zone_t zone, void *udata, int flags)
1629 {
1630 	uma_slab_t slab;
1631 	void *item;
1632 
1633 	item = NULL;
1634 
1635 	/*
1636 	 * This is to stop us from allocating per cpu buckets while we're
1637 	 * running out of UMA_BOOT_PAGES.  Otherwise, we would exhaust the
1638 	 * boot pages.
1639 	 */
1640 
1641 	if (bucketdisable && zone == bucketzone)
1642 		return (NULL);
1643 
1644 #ifdef UMA_DEBUG_ALLOC
1645 	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
1646 #endif
1647 	ZONE_LOCK(zone);
1648 
1649 	slab = uma_zone_slab(zone, flags);
1650 	if (slab == NULL) {
1651 		ZONE_UNLOCK(zone);
1652 		return (NULL);
1653 	}
1654 
1655 	item = uma_slab_alloc(zone, slab);
1656 
1657 	ZONE_UNLOCK(zone);
1658 
1659 	if (zone->uz_ctor != NULL)
1660 		zone->uz_ctor(item, zone->uz_size, udata);
1661 	if (flags & M_ZERO)
1662 		bzero(item, zone->uz_size);
1663 
1664 	return (item);
1665 }
1666 
1667 /* See uma.h */
1668 void
1669 uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
1670 {
1671 	uma_cache_t cache;
1672 	uma_bucket_t bucket;
1673 	int bflags;
1674 	int cpu;
1675 
1676 	/* This is the fast path free */
1677 #ifdef UMA_DEBUG_ALLOC_1
1678 	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
1679 #endif
1680 	/*
1681 	 * The race here is acceptable.  If we miss it we'll just have to wait
1682 	 * a little longer for the limits to be reset.
1683 	 */
1684 
1685 	if (zone->uz_flags & UMA_ZFLAG_FULL)
1686 		goto zfree_internal;
1687 
1688 	if (zone->uz_dtor)
1689 		zone->uz_dtor(item, zone->uz_size, udata);
1690 
1691 zfree_restart:
1692 	cpu = PCPU_GET(cpuid);
1693 	CPU_LOCK(zone, cpu);
1694 	cache = &zone->uz_cpu[cpu];
1695 
1696 zfree_start:
1697 	bucket = cache->uc_freebucket;
1698 
1699 	if (bucket) {
1700 		/*
1701 		 * Do we have room in our bucket? It is OK for this uz count
1702 		 * check to be slightly out of sync.
1703 		 */
1704 
1705 		if (bucket->ub_ptr < zone->uz_count) {
1706 			bucket->ub_ptr++;
1707 			KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL,
1708 			    ("uma_zfree: Freeing to non free bucket index."));
1709 			bucket->ub_bucket[bucket->ub_ptr] = item;
1710 #ifdef INVARIANTS
1711 			ZONE_LOCK(zone);
1712 			if (zone->uz_flags & UMA_ZFLAG_MALLOC)
1713 				uma_dbg_free(zone, udata, item);
1714 			else
1715 				uma_dbg_free(zone, NULL, item);
1716 			ZONE_UNLOCK(zone);
1717 #endif
1718 			CPU_UNLOCK(zone, cpu);
1719 			return;
1720 		} else if (cache->uc_allocbucket) {
1721 #ifdef UMA_DEBUG_ALLOC
1722 			printf("uma_zfree: Swapping buckets.\n");
1723 #endif
1724 			/*
1725 			 * We have run out of space in our freebucket.
1726 			 * See if we can switch with our alloc bucket.
1727 			 */
1728 			if (cache->uc_allocbucket->ub_ptr <
1729 			    cache->uc_freebucket->ub_ptr) {
1730 				uma_bucket_t swap;
1731 
1732 				swap = cache->uc_freebucket;
1733 				cache->uc_freebucket = cache->uc_allocbucket;
1734 				cache->uc_allocbucket = swap;
1735 
1736 				goto zfree_start;
1737 			}
1738 		}
1739 	}
1740 
1741 	/*
1742 	 * We can get here for two reasons:
1743 	 *
1744 	 * 1) The buckets are NULL
1745 	 * 2) The alloc and free buckets are both somewhat full.
1746 	 *
1747 	 */
1748 
1749 	ZONE_LOCK(zone);
1750 
1751 	bucket = cache->uc_freebucket;
1752 	cache->uc_freebucket = NULL;
1753 
1754 	/* Can we throw this on the zone full list? */
1755 	if (bucket != NULL) {
1756 #ifdef UMA_DEBUG_ALLOC
1757 		printf("uma_zfree: Putting old bucket on the free list.\n");
1758 #endif
1759 		/* ub_ptr is pointing to the last free item */
1760 		KASSERT(bucket->ub_ptr != -1,
1761 		    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
1762 		LIST_INSERT_HEAD(&zone->uz_full_bucket,
1763 		    bucket, ub_link);
1764 	}
1765 	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
1766 		LIST_REMOVE(bucket, ub_link);
1767 		ZONE_UNLOCK(zone);
1768 		cache->uc_freebucket = bucket;
1769 		goto zfree_start;
1770 	}
1771 	/* We're done with this CPU now */
1772 	CPU_UNLOCK(zone, cpu);
1773 
1774 	/* And the zone.. */
1775 	ZONE_UNLOCK(zone);
1776 
1777 #ifdef UMA_DEBUG_ALLOC
1778 	printf("uma_zfree: Allocating new free bucket.\n");
1779 #endif
1780 	bflags = M_NOWAIT;
1781 
1782 	if (zone->uz_flags & UMA_ZFLAG_BUCKETCACHE)
1783 		bflags |= M_NOVM;
1784 #ifdef INVARIANTS
1785 	bflags |= M_ZERO;
1786 #endif
1787 	bucket = uma_zalloc_internal(bucketzone,
1788 	    NULL, bflags);
1789 	if (bucket) {
1790 		bucket->ub_ptr = -1;
1791 		ZONE_LOCK(zone);
1792 		LIST_INSERT_HEAD(&zone->uz_free_bucket,
1793 		    bucket, ub_link);
1794 		ZONE_UNLOCK(zone);
1795 		goto zfree_restart;
1796 	}
1797 
1798 	/*
1799 	 * If nothing else caught this, we'll just do an internal free.
1800 	 */
1801 
1802 zfree_internal:
1803 
1804 	uma_zfree_internal(zone, item, udata, 0);
1805 
1806 	return;
1807 
1808 }
1809 
1810 /*
1811  * Frees an item to an INTERNAL zone or allocates a free bucket
1812  *
1813  * Arguments:
1814  *	zone   The zone to free to
1815  *	item   The item we're freeing
1816  *	udata  User supplied data for the dtor
1817  *	skip   Skip the dtor, it was done in uma_zfree_arg
1818  */
1819 
1820 static void
1821 uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip)
1822 {
1823 	uma_slab_t slab;
1824 	u_int8_t *mem;
1825 	u_int8_t freei;
1826 
1827 	if (!skip && zone->uz_dtor)
1828 		zone->uz_dtor(item, zone->uz_size, udata);
1829 
1830 	ZONE_LOCK(zone);
1831 
1832 	if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) {
1833 		mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
1834 		if (zone->uz_flags & UMA_ZFLAG_HASH)
1835 			slab = hash_sfind(&zone->uz_hash, mem);
1836 		else {
1837 			mem += zone->uz_pgoff;
1838 			slab = (uma_slab_t)mem;
1839 		}
1840 	} else {
1841 		slab = (uma_slab_t)udata;
1842 	}
1843 
1844 	/* Do we need to remove from any lists? */
1845 	if (slab->us_freecount+1 == zone->uz_ipers) {
1846 		LIST_REMOVE(slab, us_link);
1847 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1848 	} else if (slab->us_freecount == 0) {
1849 		LIST_REMOVE(slab, us_link);
1850 		LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link);
1851 	}
1852 
1853 	/* Slab management stuff */
1854 	freei = ((unsigned long)item - (unsigned long)slab->us_data)
1855 		/ zone->uz_rsize;
1856 
1857 #ifdef INVARIANTS
1858 	if (!skip)
1859 		uma_dbg_free(zone, slab, item);
1860 #endif
1861 
1862 	slab->us_freelist[freei] = slab->us_firstfree;
1863 	slab->us_firstfree = freei;
1864 	slab->us_freecount++;
1865 
1866 	/* Zone statistics */
1867 	zone->uz_free++;
1868 
1869 	if (zone->uz_flags & UMA_ZFLAG_FULL) {
1870 		if (zone->uz_pages < zone->uz_maxpages)
1871 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
1872 
1873 		/* We can handle one more allocation */
1874 		wakeup_one(zone);
1875 	}
1876 
1877 	ZONE_UNLOCK(zone);
1878 }
1879 
1880 /* See uma.h */
1881 void
1882 uma_zone_set_max(uma_zone_t zone, int nitems)
1883 {
1884 	ZONE_LOCK(zone);
1885 	if (zone->uz_ppera > 1)
1886 		zone->uz_maxpages = nitems * zone->uz_ppera;
1887 	else
1888 		zone->uz_maxpages = nitems / zone->uz_ipers;
1889 
1890 	if (zone->uz_maxpages * zone->uz_ipers < nitems)
1891 		zone->uz_maxpages++;
1892 
1893 	ZONE_UNLOCK(zone);
1894 }
1895 
1896 /* See uma.h */
1897 void
1898 uma_zone_set_freef(uma_zone_t zone, uma_free freef)
1899 {
1900 	ZONE_LOCK(zone);
1901 
1902 	zone->uz_freef = freef;
1903 
1904 	ZONE_UNLOCK(zone);
1905 }
1906 
1907 /* See uma.h */
1908 void
1909 uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
1910 {
1911 	ZONE_LOCK(zone);
1912 
1913 	zone->uz_flags |= UMA_ZFLAG_PRIVALLOC;
1914 	zone->uz_allocf = allocf;
1915 
1916 	ZONE_UNLOCK(zone);
1917 }
1918 
1919 /* See uma.h */
1920 int
1921 uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
1922 {
1923 	int pages;
1924 	vm_offset_t kva;
1925 
1926 	mtx_lock(&Giant);
1927 
1928 	pages = count / zone->uz_ipers;
1929 
1930 	if (pages * zone->uz_ipers < count)
1931 		pages++;
1932 
1933 	kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE);
1934 
1935 	if (kva == 0) {
1936 		mtx_unlock(&Giant);
1937 		return (0);
1938 	}
1939 
1940 
1941 	if (obj == NULL)
1942 		obj = vm_object_allocate(OBJT_DEFAULT,
1943 		    pages);
1944 	else {
1945 		VM_OBJECT_LOCK_INIT(obj);
1946 		_vm_object_allocate(OBJT_DEFAULT,
1947 		    pages, obj);
1948 	}
1949 	ZONE_LOCK(zone);
1950 	zone->uz_kva = kva;
1951 	zone->uz_obj = obj;
1952 	zone->uz_maxpages = pages;
1953 
1954 	zone->uz_allocf = obj_alloc;
1955 	zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC;
1956 
1957 	ZONE_UNLOCK(zone);
1958 	mtx_unlock(&Giant);
1959 
1960 	return (1);
1961 }
1962 
1963 /* See uma.h */
1964 void
1965 uma_prealloc(uma_zone_t zone, int items)
1966 {
1967 	int slabs;
1968 	uma_slab_t slab;
1969 
1970 	ZONE_LOCK(zone);
1971 	slabs = items / zone->uz_ipers;
1972 	if (slabs * zone->uz_ipers < items)
1973 		slabs++;
1974 
1975 	while (slabs > 0) {
1976 		slab = slab_zalloc(zone, M_WAITOK);
1977 		LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link);
1978 		slabs--;
1979 	}
1980 	ZONE_UNLOCK(zone);
1981 }
1982 
1983 /* See uma.h */
1984 void
1985 uma_reclaim(void)
1986 {
1987 	/*
1988 	 * You might think that the delay below would improve performance since
1989 	 * the allocator will give away memory that it may ask for immediately.
1990 	 * Really, it makes things worse, since cpu cycles are so much cheaper
1991 	 * than disk activity.
1992 	 */
1993 #if 0
1994 	static struct timeval tv = {0};
1995 	struct timeval now;
1996 	getmicrouptime(&now);
1997 	if (now.tv_sec > tv.tv_sec + 30)
1998 		tv = now;
1999 	else
2000 		return;
2001 #endif
2002 #ifdef UMA_DEBUG
2003 	printf("UMA: vm asked us to release pages!\n");
2004 #endif
2005 	bucket_enable();
2006 	zone_foreach(zone_drain);
2007 
2008 	/*
2009 	 * Some slabs may have been freed but this zone will be visited early
2010 	 * we visit again so that we can free pages that are empty once other
2011 	 * zones are drained.  We have to do the same for buckets.
2012 	 */
2013 	zone_drain(slabzone);
2014 	zone_drain(bucketzone);
2015 }
2016 
2017 void *
2018 uma_large_malloc(int size, int wait)
2019 {
2020 	void *mem;
2021 	uma_slab_t slab;
2022 	u_int8_t flags;
2023 
2024 	slab = uma_zalloc_internal(slabzone, NULL, wait);
2025 	if (slab == NULL)
2026 		return (NULL);
2027 
2028 	/* XXX: kmem_malloc panics if Giant isn't held and sleep allowed */
2029 	if ((wait & M_NOWAIT) == 0 && !mtx_owned(&Giant)) {
2030 		mtx_lock(&Giant);
2031 		mem = page_alloc(NULL, size, &flags, wait);
2032 		mtx_unlock(&Giant);
2033 	} else
2034 		mem = page_alloc(NULL, size, &flags, wait);
2035 	if (mem) {
2036 		vsetslab((vm_offset_t)mem, slab);
2037 		slab->us_data = mem;
2038 		slab->us_flags = flags | UMA_SLAB_MALLOC;
2039 		slab->us_size = size;
2040 	} else {
2041 		uma_zfree_internal(slabzone, slab, NULL, 0);
2042 	}
2043 
2044 
2045 	return (mem);
2046 }
2047 
2048 void
2049 uma_large_free(uma_slab_t slab)
2050 {
2051 	vsetobj((vm_offset_t)slab->us_data, kmem_object);
2052 	/*
2053 	 * XXX: We get a lock order reversal if we don't have Giant:
2054 	 * vm_map_remove (locks system map) -> vm_map_delete ->
2055 	 *    vm_map_entry_unwire -> vm_fault_unwire -> mtx_lock(&Giant)
2056 	 */
2057 	if (!mtx_owned(&Giant)) {
2058 		mtx_lock(&Giant);
2059 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2060 		mtx_unlock(&Giant);
2061 	} else
2062 		page_free(slab->us_data, slab->us_size, slab->us_flags);
2063 	uma_zfree_internal(slabzone, slab, NULL, 0);
2064 }
2065 
2066 void
2067 uma_print_stats(void)
2068 {
2069 	zone_foreach(uma_print_zone);
2070 }
2071 
2072 void
2073 uma_print_zone(uma_zone_t zone)
2074 {
2075 	printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
2076 	    zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags,
2077 	    zone->uz_ipers, zone->uz_ppera,
2078 	    (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free);
2079 }
2080 
2081 /*
2082  * Sysctl handler for vm.zone
2083  *
2084  * stolen from vm_zone.c
2085  */
2086 static int
2087 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
2088 {
2089 	int error, len, cnt;
2090 	const int linesize = 128;	/* conservative */
2091 	int totalfree;
2092 	char *tmpbuf, *offset;
2093 	uma_zone_t z;
2094 	char *p;
2095 
2096 	cnt = 0;
2097 	mtx_lock(&uma_mtx);
2098 	LIST_FOREACH(z, &uma_zones, uz_link)
2099 		cnt++;
2100 	mtx_unlock(&uma_mtx);
2101 	MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize,
2102 			M_TEMP, M_WAITOK);
2103 	len = snprintf(tmpbuf, linesize,
2104 	    "\nITEM            SIZE     LIMIT     USED    FREE  REQUESTS\n\n");
2105 	if (cnt == 0)
2106 		tmpbuf[len - 1] = '\0';
2107 	error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len);
2108 	if (error || cnt == 0)
2109 		goto out;
2110 	offset = tmpbuf;
2111 	mtx_lock(&uma_mtx);
2112 	LIST_FOREACH(z, &uma_zones, uz_link) {
2113 		if (cnt == 0)	/* list may have changed size */
2114 			break;
2115 		ZONE_LOCK(z);
2116 		totalfree = z->uz_free + z->uz_cachefree;
2117 		len = snprintf(offset, linesize,
2118 		    "%-12.12s  %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n",
2119 		    z->uz_name, z->uz_size,
2120 		    z->uz_maxpages * z->uz_ipers,
2121 		    (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree,
2122 		    totalfree,
2123 		    (unsigned long long)z->uz_allocs);
2124 		ZONE_UNLOCK(z);
2125 		for (p = offset + 12; p > offset && *p == ' '; --p)
2126 			/* nothing */ ;
2127 		p[1] = ':';
2128 		cnt--;
2129 		offset += len;
2130 	}
2131 	mtx_unlock(&uma_mtx);
2132 	*offset++ = '\0';
2133 	error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf);
2134 out:
2135 	FREE(tmpbuf, M_TEMP);
2136 	return (error);
2137 }
2138